Merge branch 'acpi-battery' * acpi-battery: ACPI / battery: Correctly serialise with the pending async probe

commit: 0cc4b48149ff6948dd82a039ad55cdbec49107f7 [log] [tgz]
author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Wed May 25 22:11:28 2016 +0200
committer: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Wed May 25 22:11:28 2016 +0200
tree: 0d0e4a091f8bc43c2c2e9d51d1d5f4b1164b9972
parent: 46c13450624e36302547a2ac3695f2350fe7ffc3 [diff]
parent: 5dfa0c73953360e07a47731e412d33dfc896bf4e [diff]
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
new file mode 100644
index 0000000..727e270
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg

@@ -0,0 +1,474 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:28:20 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="9.1in"
+   height="8.9in"
+   viewBox="-66 -66 10932 10707"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreeClassicRCU.fig">
+  <metadata
+     id="metadata106">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs104">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3864"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="973"
+     inkscape:window-height="1137"
+     id="namedview102"
+     showgrid="false"
+     inkscape:zoom="0.9743589"
+     inkscape:cx="409.50003"
+     inkscape:cy="400.49997"
+     inkscape:window-x="915"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="10800"
+       height="5625"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="1125"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="3825"
+       y="900"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="6525"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line -->
+    <polyline
+       points="3375,6525 3375,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline14" />
+    <!-- Arrowhead on XXXpoint 3375 6525 - 3375 4860-->
+    <!-- Circle -->
+    <circle
+       cx="7425"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle18" />
+    <!-- Circle -->
+    <circle
+       cx="7875"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle20" />
+    <!-- Circle -->
+    <circle
+       cx="8325"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle22" />
+    <!-- Circle -->
+    <circle
+       cx="2025"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle24" />
+    <!-- Circle -->
+    <circle
+       cx="2475"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="2925"
+       cy="6075"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="4725"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle30" />
+    <!-- Circle -->
+    <circle
+       cx="5175"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle32" />
+    <!-- Circle -->
+    <circle
+       cx="5625"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle34" />
+    <!-- Line: box -->
+    <rect
+       x="2025"
+       y="6525"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect36" />
+    <!-- Line -->
+    <polyline
+       points="2475,3600 3975,2310 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 2475 3600 - 4116 2190-->
+    <!-- Line -->
+    <polyline
+       points="7875,3600 6372,2310 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline42" />
+    <!-- Arrowhead on XXXpoint 7875 3600 - 6231 2190-->
+    <!-- Line -->
+    <polyline
+       points="6975,8775 6975,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 6975 8775 - 6975 4860-->
+    <!-- Line -->
+    <polyline
+       points="1575,8775 1575,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 1575 8775 - 1575 4860-->
+    <!-- Line -->
+    <polyline
+       points="8775,6525 8775,5046 "
+       style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 8775 6525 - 8775 4860-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1575"
+       y="9225"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text58">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1575"
+       y="9675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text60">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1575"
+       y="10350"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text62">CPU 0</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3375"
+       y="6975"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text64">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3375"
+       y="7425"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text66">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3375"
+       y="8100"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text68">CPU 15</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6975"
+       y="9225"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text70">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6975"
+       y="9675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text72">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6975"
+       y="10350"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text74">CPU 1007</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="6930"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text76">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="7380"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text78">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="8055"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text80">CPU 1023</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="start"
+       id="text82">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2475"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text84">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2475"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text86">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7875"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text88">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7875"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text90">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5175"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text92">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5175"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text94">rcu_node</text>
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="8775"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect96" />
+    <!-- Line: box -->
+    <rect
+       x="5625"
+       y="8775"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect98" />
+    <!-- Line: box -->
+    <rect
+       x="7380"
+       y="6480"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect100" />
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
new file mode 100644
index 0000000..9bbb194
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg

@@ -0,0 +1,499 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:26:09 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="5.7in"
+   height="6.6in"
+   viewBox="-44 -44 6838 7888"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreeClassicRCUBH.fig">
+  <metadata
+     id="metadata110">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs108">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3868"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Mend"
+       style="overflow:visible;">
+      <path
+         id="path3886"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(0.6) rotate(180) translate(0,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="878"
+     inkscape:window-height="1148"
+     id="namedview106"
+     showgrid="false"
+     inkscape:zoom="1.3547758"
+     inkscape:cx="256.5"
+     inkscape:cy="297"
+     inkscape:window-x="45"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect14" />
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle16" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle18" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle20" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle22" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle24" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle30" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle32" />
+    <!-- Line -->
+    <polyline
+       points="1350,3450 2350,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+    <!-- Line -->
+    <polyline
+       points="4950,3450 3948,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect42" />
+    <!-- Line -->
+    <polyline
+       points="2250,5400 2250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect48" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect50" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect52" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect54" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect56" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="1650"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect58" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text60">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="1950"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text62">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text64">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text66">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text68">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text70">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text72">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text74">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text76">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text78">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text80">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text82">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text84">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text86">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text88">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text90">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text92">rcu_sched</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5400 5250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline94" />
+    <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+    <!-- Line -->
+    <polyline
+       points="4050,6600 4050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline98" />
+    <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+    <!-- Line -->
+    <polyline
+       points="1050,6600 1050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline102" />
+    <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
new file mode 100644
index 0000000..21ba782
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg

@@ -0,0 +1,695 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:20:02 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="5.7in"
+   height="8.6in"
+   viewBox="-44 -44 6838 10288"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreeClassicRCUBHdyntick.fig">
+  <metadata
+     id="metadata166">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs164">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3924"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Lend"
+       style="overflow:visible;">
+      <path
+         id="path3936"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(1.1) rotate(180) translate(1,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="845"
+     inkscape:window-height="988"
+     id="namedview162"
+     showgrid="false"
+     inkscape:zoom="1.0452196"
+     inkscape:cx="256.5"
+     inkscape:cy="387.00003"
+     inkscape:window-x="356"
+     inkscape:window-y="61"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect10" />
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5688,5912 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline12" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+    <polyline
+       points="5714 6068 5704 5822 5598 6044 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline14" />
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4486,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline16" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+    <polyline
+       points="4514 7418 4506 7172 4396 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline18" />
+    <!-- Line -->
+    <polyline
+       points="1040,9300 1476,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+    <polyline
+       points="1504 7418 1496 7172 1386 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline22" />
+    <!-- Line -->
+    <polyline
+       points="2240,8100 2676,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline24" />
+    <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+    <polyline
+       points="2704 6218 2696 5972 2586 6194 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline26" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect30" />
+    <!-- Line -->
+    <polyline
+       points="1350,3450 2350,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+    <!-- Line -->
+    <polyline
+       points="4950,3450 3948,2590 "
+       style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+    <!-- Line -->
+    <polyline
+       points="4050,6600 4050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+    <!-- Line -->
+    <polyline
+       points="1050,6600 1050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,5400 2250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline48" />
+    <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,8100 2250,6364 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline52" />
+    <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+    <!-- Line -->
+    <polyline
+       points="1050,9300 1050,7564 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline56" />
+    <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4050,7564 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5250,6364 "
+       style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline64" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle80" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle82" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle84" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect86" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect88" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect90" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect92" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect94" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="1650"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect96" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect98" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect100" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect102" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect104" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect106" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text108">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="1950"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text110">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text112">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text114">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text116">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text118">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text120">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text122">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text124">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text126">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text128">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text130">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text132">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text134">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text136">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text138">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text152">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text154">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text156">rcu_sched</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5400 5250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline158" />
+    <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
new file mode 100644
index 0000000..15adcac
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg

@@ -0,0 +1,741 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:32:59 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="6.1in"
+   height="8.9in"
+   viewBox="-44 -44 7288 10738"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreePreemptRCUBHdyntick.fig">
+  <metadata
+     id="metadata182">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs180">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3940"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="874"
+     inkscape:window-height="1148"
+     id="namedview178"
+     showgrid="false"
+     inkscape:zoom="1.2097379"
+     inkscape:cx="274.5"
+     inkscape:cy="400.49997"
+     inkscape:window-x="946"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="900"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="1200"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="5400"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect16" />
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5688,6362 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline18" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+    <polyline
+       points="5714 6518 5704 6272 5598 6494 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline20" />
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4486,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+    <polyline
+       points="4514 7868 4506 7622 4396 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline24" />
+    <!-- Line -->
+    <polyline
+       points="1040,9750 1476,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline26" />
+    <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+    <polyline
+       points="1504 7868 1496 7622 1386 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline28" />
+    <!-- Line -->
+    <polyline
+       points="2240,8550 2676,6512 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline30" />
+    <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+    <polyline
+       points="2704 6668 2696 6422 2586 6644 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline32" />
+    <!-- Line -->
+    <polyline
+       points="4050,9750 5682,6360 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 5736 6246-->
+    <polyline
+       points="5672 6518 5722 6276 5562 6466 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline36" />
+    <!-- Line -->
+    <polyline
+       points="1010,9750 2642,6360 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 1010 9750 - 2696 6246-->
+    <polyline
+       points="2632 6518 2682 6276 2522 6466 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline40" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="900"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect42" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1500"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect44" />
+    <!-- Line -->
+    <polyline
+       points="1350,3900 2350,3040 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+    <!-- Line -->
+    <polyline
+       points="4950,3900 3948,3040 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+    <!-- Line -->
+    <polyline
+       points="4050,7050 4050,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+    <!-- Line -->
+    <polyline
+       points="1050,7050 1050,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline58" />
+    <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,5850 2250,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline62" />
+    <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,8550 2250,6814 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline66" />
+    <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+    <!-- Line -->
+    <polyline
+       points="1050,9750 1050,8014 "
+       style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline70" />
+    <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4050,8014 "
+       style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline74" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5250,6814 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline78" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle82" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle84" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle86" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle88" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle90" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle92" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle94" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle96" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle98" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect100" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect102" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect104" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect106" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect108" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="2100"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect110" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect112" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect114" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect116" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect118" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect120" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text122">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text124">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text126">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text128">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text130">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text132">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text134">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text136">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text138">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text152">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text154">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text156">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text158">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text160">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text164">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text166">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text168">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6900"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text170">rcu_preempt</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="1200"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text172">rcu_sched</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5850 5250,4864 "
+       style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline174" />
+    <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
new file mode 100644
index 0000000..bbc3801
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg

@@ -0,0 +1,858 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:29:48 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="7.4in"
+   height="9.9in"
+   viewBox="-44 -44 8938 11938"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="BigTreePreemptRCUBHdyntickCB.svg">
+  <metadata
+     id="metadata212">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs210">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3970"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="881"
+     inkscape:window-height="1128"
+     id="namedview208"
+     showgrid="false"
+     inkscape:zoom="1.0195195"
+     inkscape:cx="333"
+     inkscape:cy="445.49997"
+     inkscape:window-x="936"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="900"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="1200"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="5400"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect16" />
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5688,6362 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline18" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+    <polyline
+       points="5714 6518 5704 6272 5598 6494 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline20" />
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4486,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+    <polyline
+       points="4514 7868 4506 7622 4396 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline24" />
+    <!-- Line -->
+    <polyline
+       points="1040,9750 1476,7712 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline26" />
+    <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+    <polyline
+       points="1504 7868 1496 7622 1386 7844 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline28" />
+    <!-- Line -->
+    <polyline
+       points="2240,8550 2676,6512 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline30" />
+    <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+    <polyline
+       points="2704 6668 2696 6422 2586 6644 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline32" />
+    <!-- Line -->
+    <polyline
+       points="4050,9600 5692,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 4050 9600 - 5744 5948-->
+    <polyline
+       points="5682 6220 5730 5978 5574 6170 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline36" />
+    <!-- Line -->
+    <polyline
+       points="1086,9600 2728,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 1086 9600 - 2780 5948-->
+    <polyline
+       points="2718 6220 2766 5978 2610 6170 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline40" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="900"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect42" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1500"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect44" />
+    <!-- Line -->
+    <polyline
+       points="1350,3900 2350,3040 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+    <!-- Line -->
+    <polyline
+       points="4950,3900 3948,3040 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+    <!-- Line -->
+    <polyline
+       points="4050,7050 4050,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+    <!-- Line -->
+    <polyline
+       points="1050,7050 1050,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline58" />
+    <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,5850 2250,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline62" />
+    <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+    <!-- Line -->
+    <polyline
+       points="2250,8550 2250,6814 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline66" />
+    <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+    <!-- Line -->
+    <polyline
+       points="1050,9750 1050,8014 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline70" />
+    <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+    <!-- Line -->
+    <polyline
+       points="4050,9750 4050,8014 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline74" />
+    <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+    <!-- Line -->
+    <polyline
+       points="5250,8550 5250,6814 "
+       style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline78" />
+    <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+    <!-- Line -->
+    <polyline
+       points="6000,6300 8048,7910 "
+       style="stroke:#87cfff;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline82" />
+    <!-- Arrowhead on XXXpoint 6000 6300 - 8146 7986-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle86" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle88" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="4350"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle90" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle92" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle94" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle96" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle98" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle100" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5550"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle102" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="7950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect104" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="9450"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect106" />
+    <!-- Line -->
+    <polyline
+       points="8100,8850 8100,9384 "
+       style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline108" />
+    <!-- Arrowhead on XXXpoint 8100 8850 - 8100 9510-->
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="10950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect112" />
+    <!-- Line -->
+    <polyline
+       points="8100,10350 8100,10884 "
+       style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+       id="polyline114" />
+    <!-- Arrowhead on XXXpoint 8100 10350 - 8100 11010-->
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect118" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect120" />
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3900"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect122" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect124" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="7050"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect126" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="2100"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect128" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect130" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect132" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9750"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect134" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8550"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect136" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5850"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect138" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="8250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="8550"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_head</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="9750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_head</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="11250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8100"
+       y="11550"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_head</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="1200"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text152">rcu_sched</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text154">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text156">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text158">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text160">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text162">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text164">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text166">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text168">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text170">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text172">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text174">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text176">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text178">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text180">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7650"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text182">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text184">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text186">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text188">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text190">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="10350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text192">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text194">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text196">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8850"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text198">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="9150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text200">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6900"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text202">rcu_preempt</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5850 5250,4864 "
+       style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline204" />
+    <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
new file mode 100644
index 0000000..7eb47ac
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html

@@ -0,0 +1,1333 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+        "http://www.w3.org/TR/html4/loose.dtd">
+        <html>
+        <head><title>A Tour Through TREE_RCU's Data Structures [LWN.net]</title>
+        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+
+           <p>January 27, 2016</p>
+           <p>This article was contributed by Paul E.&nbsp;McKenney</p>
+
+<h3>Introduction</h3>
+
+This document describes RCU's major data structures and their relationship
+to each other.
+
+<ol>
+<li>	<a href="#Data-Structure Relationships">
+	Data-Structure Relationships</a>
+<li>	<a href="#The rcu_state Structure">
+	The <tt>rcu_state</tt> Structure</a>
+<li>	<a href="#The rcu_node Structure">
+	The <tt>rcu_node</tt> Structure</a>
+<li>	<a href="#The rcu_data Structure">
+	The <tt>rcu_data</tt> Structure</a>
+<li>	<a href="#The rcu_dynticks Structure">
+	The <tt>rcu_dynticks</tt> Structure</a>
+<li>	<a href="#The rcu_head Structure">
+	The <tt>rcu_head</tt> Structure</a>
+<li>	<a href="#RCU-Specific Fields in the task_struct Structure">
+	RCU-Specific Fields in the <tt>task_struct</tt> Structure</a>
+<li>	<a href="#Accessor Functions">
+	Accessor Functions</a>
+</ol>
+
+At the end we have the
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h3><a name="Data-Structure Relationships">Data-Structure Relationships</a></h3>
+
+<p>RCU is for all intents and purposes a large state machine, and its
+data structures maintain the state in such a way as to allow RCU readers
+to execute extremely quickly, while also processing the RCU grace periods
+requested by updaters in an efficient and extremely scalable fashion.
+The efficiency and scalability of RCU updaters is provided primarily
+by a combining tree, as shown below:
+
+</p><p><img src="BigTreeClassicRCU.svg" alt="BigTreeClassicRCU.svg" width="30%">
+
+</p><p>This diagram shows an enclosing <tt>rcu_state</tt> structure
+containing a tree of <tt>rcu_node</tt> structures.
+Each leaf node of the <tt>rcu_node</tt> tree has up to 16
+<tt>rcu_data</tt> structures associated with it, so that there
+are <tt>NR_CPUS</tt> number of <tt>rcu_data</tt> structures,
+one for each possible CPU.
+This structure is adjusted at boot time, if needed, to handle the
+common case where <tt>nr_cpu_ids</tt> is much less than
+<tt>NR_CPUs</tt>.
+For example, a number of Linux distributions set <tt>NR_CPUs=4096</tt>,
+which results in a three-level <tt>rcu_node</tt> tree.
+If the actual hardware has only 16 CPUs, RCU will adjust itself
+at boot time, resulting in an <tt>rcu_node</tt> tree with only a single node.
+
+</p><p>The purpose of this combining tree is to allow per-CPU events
+such as quiescent states, dyntick-idle transitions,
+and CPU hotplug operations to be processed efficiently
+and scalably.
+Quiescent states are recorded by the per-CPU <tt>rcu_data</tt> structures,
+and other events are recorded by the leaf-level <tt>rcu_node</tt>
+structures.
+All of these events are combined at each level of the tree until finally
+grace periods are completed at the tree's root <tt>rcu_node</tt>
+structure.
+A grace period can be completed at the root once every CPU
+(or, in the case of <tt>CONFIG_PREEMPT_RCU</tt>, task)
+has passed through a quiescent state.
+Once a grace period has completed, record of that fact is propagated
+back down the tree.
+
+</p><p>As can be seen from the diagram, on a 64-bit system
+a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
+of 64 at the root and a fanout of 16 at the leaves.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why isn't the fanout at the leaves also 64?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Because there are more types of events that affect the leaf-level
+	<tt>rcu_node</tt> structures than further up the tree.
+	Therefore, if the leaf <tt>rcu_node</tt> structures have fanout of
+	64, the contention on these structures' <tt>-&gt;structures</tt>
+	becomes excessive.
+	Experimentation on a wide variety of systems has shown that a fanout
+	of 16 works well for the leaves of the <tt>rcu_node</tt> tree.
+	</font>
+
+	<p><font color="ffffff">Of course, further experience with
+	systems having hundreds or thousands of CPUs may demonstrate
+	that the fanout for the non-leaf <tt>rcu_node</tt> structures
+	must also be reduced.
+	Such reduction can be easily carried out when and if it proves
+	necessary.
+	In the meantime, if you are using such a system and running into
+	contention problems on the non-leaf <tt>rcu_node</tt> structures,
+	you may use the <tt>CONFIG_RCU_FANOUT</tt> kernel configuration
+	parameter to reduce the non-leaf fanout as needed.
+	</font>
+
+	<p><font color="ffffff">Kernels built for systems with
+	strong NUMA characteristics might also need to adjust
+	<tt>CONFIG_RCU_FANOUT</tt> so that the domains of the
+	<tt>rcu_node</tt> structures align with hardware boundaries.
+	However, there has thus far been no need for this.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>If your system has more than 1,024 CPUs (or more than 512 CPUs on
+a 32-bit system), then RCU will automatically add more levels to the
+tree.
+For example, if you are crazy enough to build a 64-bit system with 65,536
+CPUs, RCU would configure the <tt>rcu_node</tt> tree as follows:
+
+</p><p><img src="HugeTreeClassicRCU.svg" alt="HugeTreeClassicRCU.svg" width="50%">
+
+</p><p>RCU currently permits up to a four-level tree, which on a 64-bit system
+accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
+32-bit systems.
+On the other hand, you can set <tt>CONFIG_RCU_FANOUT</tt> to be
+as small as 2 if you wish, which would permit only 16 CPUs, which
+is useful for testing.
+
+</p><p>This multi-level combining tree allows us to get most of the
+performance and scalability
+benefits of partitioning, even though RCU grace-period detection is
+inherently a global operation.
+The trick here is that only the last CPU to report a quiescent state
+into a given <tt>rcu_node</tt> structure need advance to the <tt>rcu_node</tt>
+structure at the next level up the tree.
+This means that at the leaf-level <tt>rcu_node</tt> structure, only
+one access out of sixteen will progress up the tree.
+For the internal <tt>rcu_node</tt> structures, the situation is even
+more extreme:  Only one access out of sixty-four will progress up
+the tree.
+Because the vast majority of the CPUs do not progress up the tree,
+the lock contention remains roughly constant up the tree.
+No matter how many CPUs there are in the system, at most 64 quiescent-state
+reports per grace period will progress all the way to the root
+<tt>rcu_node</tt> structure, thus ensuring that the lock contention
+on that root <tt>rcu_node</tt> structure remains acceptably low.
+
+</p><p>In effect, the combining tree acts like a big shock absorber,
+keeping lock contention under control at all tree levels regardless
+of the level of loading on the system.
+
+</p><p>The Linux kernel actually supports multiple flavors of RCU
+running concurrently, so RCU builds separate data structures for each
+flavor.
+For example, for <tt>CONFIG_TREE_RCU=y</tt> kernels, RCU provides
+rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBH.svg" alt="BigTreeClassicRCUBH.svg" width="33%">
+
+</p><p>Energy efficiency is increasingly important, and for that
+reason the Linux kernel provides <tt>CONFIG_NO_HZ_IDLE</tt>, which
+turns off the scheduling-clock interrupts on idle CPUs, which in
+turn allows those CPUs to attain deeper sleep states and to consume
+less energy.
+CPUs whose scheduling-clock interrupts have been turned off are
+said to be in <i>dyntick-idle mode</i>.
+RCU must handle dyntick-idle CPUs specially
+because RCU would otherwise wake up each CPU on every grace period,
+which would defeat the whole purpose of <tt>CONFIG_NO_HZ_IDLE</tt>.
+RCU uses the <tt>rcu_dynticks</tt> structure to track
+which CPUs are in dyntick idle mode, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBHdyntick.svg" alt="BigTreeClassicRCUBHdyntick.svg" width="33%">
+
+</p><p>However, if a CPU is in dyntick-idle mode, it is in that mode
+for all flavors of RCU.
+Therefore, a single <tt>rcu_dynticks</tt> structure is allocated per
+CPU, and all of a given CPU's <tt>rcu_data</tt> structures share
+that <tt>rcu_dynticks</tt>, as shown in the figure.
+
+</p><p>Kernels built with <tt>CONFIG_PREEMPT_RCU</tt> support
+rcu_preempt in addition to rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreePreemptRCUBHdyntick.svg" alt="BigTreePreemptRCUBHdyntick.svg" width="35%">
+
+</p><p>RCU updaters wait for normal grace periods by registering
+RCU callbacks, either directly via <tt>call_rcu()</tt> and
+friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
+there being a separate interface per flavor of RCU)
+or indirectly via <tt>synchronize_rcu()</tt> and friends.
+RCU callbacks are represented by <tt>rcu_head</tt> structures,
+which are queued on <tt>rcu_data</tt> structures while they are
+waiting for a grace period to elapse, as shown in the following figure:
+
+</p><p><img src="BigTreePreemptRCUBHdyntickCB.svg" alt="BigTreePreemptRCUBHdyntickCB.svg" width="40%">
+
+</p><p>This figure shows how <tt>TREE_RCU</tt>'s and
+<tt>PREEMPT_RCU</tt>'s major data structures are related.
+Lesser data structures will be introduced with the algorithms that
+make use of them.
+
+</p><p>Note that each of the data structures in the above figure has
+its own synchronization:
+
+<p><ol>
+<li>	Each <tt>rcu_state</tt> structures has a lock and a mutex,
+	and some fields are protected by the corresponding root
+	<tt>rcu_node</tt> structure's lock.
+<li>	Each <tt>rcu_node</tt> structure has a spinlock.
+<li>	The fields in <tt>rcu_data</tt> are private to the corresponding
+	CPU, although a few can be read and written by other CPUs.
+<li>	Similarly, the fields in <tt>rcu_dynticks</tt> are private
+	to the corresponding CPU, although a few can be read by
+	other CPUs.
+</ol>
+
+<p>It is important to note that different data structures can have
+very different ideas about the state of RCU at any given time.
+For but one example, awareness of the start or end of a given RCU
+grace period propagates slowly through the data structures.
+This slow propagation is absolutely necessary for RCU to have good
+read-side performance.
+If this balkanized implementation seems foreign to you, one useful
+trick is to consider each instance of these data structures to be
+a different person, each having the usual slightly different
+view of reality.
+
+</p><p>The general role of each of these data structures is as
+follows:
+
+</p><ol>
+<li>	<tt>rcu_state</tt>:
+	This structure forms the interconnection between the
+	<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+	tracks grace periods, serves as short-term repository
+	for callbacks orphaned by CPU-hotplug events,
+	maintains <tt>rcu_barrier()</tt> state,
+	tracks expedited grace-period state,
+	and maintains state used to force quiescent states when
+	grace periods extend too long,
+<li>	<tt>rcu_node</tt>: This structure forms the combining
+	tree that propagates quiescent-state
+	information from the leaves to the root, and also propagates
+	grace-period information from the root to the leaves.
+	It provides local copies of the grace-period state in order
+	to allow this information to be accessed in a synchronized
+	manner without suffering the scalability limitations that
+	would otherwise be imposed by global locking.
+	In <tt>CONFIG_PREEMPT_RCU</tt> kernels, it manages the lists
+	of tasks that have blocked while in their current
+	RCU read-side critical section.
+	In <tt>CONFIG_PREEMPT_RCU</tt> with
+	<tt>CONFIG_RCU_BOOST</tt>, it manages the
+	per-<tt>rcu_node</tt> priority-boosting
+	kernel threads (kthreads) and state.
+	Finally, it records CPU-hotplug state in order to determine
+	which CPUs should be ignored during a given grace period.
+<li>	<tt>rcu_data</tt>: This per-CPU structure is the
+	focus of quiescent-state detection and RCU callback queuing.
+	It also tracks its relationship to the corresponding leaf
+	<tt>rcu_node</tt> structure to allow more-efficient
+	propagation of quiescent states up the <tt>rcu_node</tt>
+	combining tree.
+	Like the <tt>rcu_node</tt> structure, it provides a local
+	copy of the grace-period information to allow for-free
+	synchronized
+	access to this information from the corresponding CPU.
+	Finally, this structure records past dyntick-idle state
+	for the corresponding CPU and also tracks statistics.
+<li>	<tt>rcu_dynticks</tt>:
+	This per-CPU structure tracks the current dyntick-idle
+	state for the corresponding CPU.
+	Unlike the other three structures, the <tt>rcu_dynticks</tt>
+	structure is not replicated per RCU flavor.
+<li>	<tt>rcu_head</tt>:
+	This structure represents RCU callbacks, and is the
+	only structure allocated and managed by RCU users.
+	The <tt>rcu_head</tt> structure is normally embedded
+	within the RCU-protected data structure.
+</ol>
+
+<p>If all you wanted from this article was a general notion of how
+RCU's data structures are related, you are done.
+Otherwise, each of the following sections give more details on
+the <tt>rcu_state</tt>, <tt>rcu_node</tt>, <tt>rcu_data</tt>,
+and <tt>rcu_dynticks</tt> data structures.
+
+<h3><a name="The rcu_state Structure">
+The <tt>rcu_state</tt> Structure</a></h3>
+
+<p>The <tt>rcu_state</tt> structure is the base structure that
+represents a flavor of RCU.
+This structure forms the interconnection between the
+<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+tracks grace periods, contains the lock used to
+synchronize with CPU-hotplug events,
+and maintains state used to force quiescent states when
+grace periods extend too long,
+
+</p><p>A few of the <tt>rcu_state</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+The more specialized fields are covered in the discussion of their
+use.
+
+<h5>Relationship to rcu_node and rcu_data Structures</h5>
+
+This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+  1   struct rcu_node node[NUM_RCU_NODES];
+  2   struct rcu_node *level[NUM_RCU_LVLS + 1];
+  3   struct rcu_data __percpu *rda;
+</pre>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Wait a minute!
+	You said that the <tt>rcu_node</tt> structures formed a tree,
+	but they are declared as a flat array!
+	What gives?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	The tree is laid out in the array.
+	The first node In the array is the head, the next set of nodes in the
+	array are children of the head node, and so on until the last set of
+	nodes in the array are the leaves.
+	</font>
+
+	<p><font color="ffffff">See the following diagrams to see how
+	this works.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>The <tt>rcu_node</tt> tree is embedded into the
+<tt>-&gt;node[]</tt> array as shown in the following figure:
+
+</p><p><img src="TreeMapping.svg" alt="TreeMapping.svg" width="40%">
+
+</p><p>One interesting consequence of this mapping is that a
+breadth-first traversal of the tree is implemented as a simple
+linear scan of the array, which is in fact what the
+<tt>rcu_for_each_node_breadth_first()</tt> macro does.
+This macro is used at the beginning and ends of grace periods.
+
+</p><p>Each entry of the <tt>-&gt;level</tt> array references
+the first <tt>rcu_node</tt> structure on the corresponding level
+of the tree, for example, as shown below:
+
+</p><p><img src="TreeMappingLevel.svg" alt="TreeMappingLevel.svg" width="40%">
+
+</p><p>The zero<sup>th</sup> element of the array references the root
+<tt>rcu_node</tt> structure, the first element references the
+first child of the root <tt>rcu_node</tt>, and finally the second
+element references the first leaf <tt>rcu_node</tt> structure.
+
+</p><p>For whatever it is worth, if you draw the tree to be tree-shaped
+rather than array-shaped, it is easy to draw a planar representation:
+
+</p><p><img src="TreeLevel.svg" alt="TreeLevel.svg" width="60%">
+
+</p><p>Finally, the <tt>-&gt;rda</tt> field references a per-CPU
+pointer to the corresponding CPU's <tt>rcu_data</tt> structure.
+
+</p><p>All of these fields are constant once initialization is complete,
+and therefore need no protection.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long gpnum;
+  2   unsigned long completed;
+</pre>
+
+<p>RCU grace periods are numbered, and
+the <tt>-&gt;gpnum</tt> field contains the number of the grace
+period that started most recently.
+The <tt>-&gt;completed</tt> field contains the number of the
+grace period that completed most recently.
+If the two fields are equal, the RCU grace period that most recently
+started has already completed, and therefore the corresponding
+flavor of RCU is idle.
+If <tt>-&gt;gpnum</tt> is one greater than <tt>-&gt;completed</tt>,
+then <tt>-&gt;gpnum</tt> gives the number of the current RCU
+grace period, which has not yet completed.
+Any other combination of values indicates that something is broken.
+These two fields are protected by the root <tt>rcu_node</tt>'s
+<tt>-&gt;lock</tt> field.
+
+</p><p>There are <tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt> fields
+in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
+as well.
+The fields in the <tt>rcu_state</tt> structure represent the
+most current values, and those of the other structures are compared
+in order to detect the start of a new grace period in a distributed
+fashion.
+The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
+(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
+
+<h5>Miscellaneous</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long gp_max;
+  2   char abbr;
+  3   char *name;
+</pre>
+
+<p>The <tt>-&gt;gp_max</tt> field tracks the duration of the longest
+grace period in jiffies.
+It is protected by the root <tt>rcu_node</tt>'s <tt>-&gt;lock</tt>.
+
+<p>The <tt>-&gt;name</tt> field points to the name of the RCU flavor
+(for example, &ldquo;rcu_sched&rdquo;), and is constant.
+The <tt>-&gt;abbr</tt> field contains a one-character abbreviation,
+for example, &ldquo;s&rdquo; for RCU-sched.
+
+<h3><a name="The rcu_node Structure">
+The <tt>rcu_node</tt> Structure</a></h3>
+
+<p>The <tt>rcu_node</tt> structures form the combining
+tree that propagates quiescent-state
+information from the leaves to the root and also that propagates
+grace-period information from the root down to the leaves.
+They provides local copies of the grace-period state in order
+to allow this information to be accessed in a synchronized
+manner without suffering the scalability limitations that
+would otherwise be imposed by global locking.
+In <tt>CONFIG_PREEMPT_RCU</tt> kernels, they manage the lists
+of tasks that have blocked while in their current
+RCU read-side critical section.
+In <tt>CONFIG_PREEMPT_RCU</tt> with
+<tt>CONFIG_RCU_BOOST</tt>, they manage the
+per-<tt>rcu_node</tt> priority-boosting
+kernel threads (kthreads) and state.
+Finally, they record CPU-hotplug state in order to determine
+which CPUs should be ignored during a given grace period.
+
+</p><p>The <tt>rcu_node</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Combining Tree</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+  1   struct rcu_node *parent;
+  2   u8 level;
+  3   u8 grpnum;
+  4   unsigned long grpmask;
+  5   int grplo;
+  6   int grphi;
+</pre>
+
+<p>The <tt>-&gt;parent</tt> pointer references the <tt>rcu_node</tt>
+one level up in the tree, and is <tt>NULL</tt> for the root
+<tt>rcu_node</tt>.
+The RCU implementation makes heavy use of this field to push quiescent
+states up the tree.
+The <tt>-&gt;level</tt> field gives the level in the tree, with
+the root being at level zero, its children at level one, and so on.
+The <tt>-&gt;grpnum</tt> field gives this node's position within
+the children of its parent, so this number can range between 0 and 31
+on 32-bit systems and between 0 and 63 on 64-bit systems.
+The <tt>-&gt;level</tt> and <tt>-&gt;grpnum</tt> fields are
+used only during initialization and for tracing.
+The <tt>-&gt;grpmask</tt> field is the bitmask counterpart of
+<tt>-&gt;grpnum</tt>, and therefore always has exactly one bit set.
+This mask is used to clear the bit corresponding to this <tt>rcu_node</tt>
+structure in its parent's bitmasks, which are described later.
+Finally, the <tt>-&gt;grplo</tt> and <tt>-&gt;grphi</tt> fields
+contain the lowest and highest numbered CPU served by this
+<tt>rcu_node</tt> structure, respectively.
+
+</p><p>All of these fields are constant, and thus do not require any
+synchronization.
+
+<h5>Synchronization</h5>
+
+<p>This field of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+  1   raw_spinlock_t lock;
+</pre>
+
+<p>This field is used to protect the remaining fields in this structure,
+unless otherwise stated.
+That said, all of the fields in this structure can be accessed without
+locking for tracing purposes.
+Yes, this can result in confusing traces, but better some tracing confusion
+than to be heisenbugged out of existence.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long gpnum;
+  2   unsigned long completed;
+</pre>
+
+<p>These fields are the counterparts of the fields of the same name in
+the <tt>rcu_state</tt> structure.
+They each may lag up to one behind their <tt>rcu_state</tt>
+counterparts.
+If a given <tt>rcu_node</tt> structure's <tt>-&gt;gpnum</tt> and
+<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_node</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> structure,
+the <tt>-&gt;gpnum</tt> field will be one greater than the
+<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
+indicating which grace period this <tt>rcu_node</tt> believes
+is still being waited for.
+
+</p><p>The <tt>&gt;gpnum</tt> field of each <tt>rcu_node</tt>
+structure is updated at the beginning
+of each grace period, and the <tt>-&gt;completed</tt> fields are
+updated at the end of each grace period.
+
+<h5>Quiescent-State Tracking</h5>
+
+<p>These fields manage the propagation of quiescent states up the
+combining tree.
+
+</p><p>This portion of the <tt>rcu_node</tt> structure has fields
+as follows:
+
+<pre>
+  1   unsigned long qsmask;
+  2   unsigned long expmask;
+  3   unsigned long qsmaskinit;
+  4   unsigned long expmaskinit;
+</pre>
+
+<p>The <tt>-&gt;qsmask</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current normal grace period.
+Such children will have a value of 1 in their corresponding bit.
+Note that the leaf <tt>rcu_node</tt> structures should be
+thought of as having <tt>rcu_data</tt> structures as their
+children.
+Similarly, the <tt>-&gt;expmask</tt> field tracks which
+of this <tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current expedited grace period.
+An expedited grace period has
+the same conceptual properties as a normal grace period, but the
+expedited implementation accepts extreme CPU overhead to obtain
+much lower grace-period latency, for example, consuming a few
+tens of microseconds worth of CPU time to reduce grace-period
+duration from milliseconds to tens of microseconds.
+The <tt>-&gt;qsmaskinit</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children cover for at least
+one online CPU.
+This mask is used to initialize <tt>-&gt;qsmask</tt>,
+and <tt>-&gt;expmaskinit</tt> is used to initialize
+<tt>-&gt;expmask</tt> and the beginning of the
+normal and expedited grace periods, respectively.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why are these bitmasks protected by locking?
+	Come on, haven't you heard of atomic instructions???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Lockless grace-period computation!  Such a tantalizing possibility!
+	</font>
+
+	<p><font color="ffffff">But consider the following sequence of events:
+	</font>
+
+	<ol>
+	<li>	<font color="ffffff">CPU&nbsp;0 has been in dyntick-idle
+		mode for quite some time.
+		When it wakes up, it notices that the current RCU
+		grace period needs it to report in, so it sets a
+		flag where the scheduling clock interrupt will find it.
+		</font><p>
+	<li>	<font color="ffffff">Meanwhile, CPU&nbsp;1 is running
+		<tt>force_quiescent_state()</tt>,
+		and notices that CPU&nbsp;0 has been in dyntick idle mode,
+		which qualifies as an extended quiescent state.
+		</font><p>
+	<li>	<font color="ffffff">CPU&nbsp;0's scheduling clock
+		interrupt fires in the
+		middle of an RCU read-side critical section, and notices
+		that the RCU core needs something, so commences RCU softirq
+		processing.
+		</font>
+		<p>
+	<li>	<font color="ffffff">CPU&nbsp;0's softirq handler
+		executes and is just about ready
+		to report its quiescent state up the <tt>rcu_node</tt>
+		tree.
+		</font><p>
+	<li>	<font color="ffffff">But CPU&nbsp;1 beats it to the punch,
+		completing the current
+		grace period and starting a new one.
+		</font><p>
+	<li>	<font color="ffffff">CPU&nbsp;0 now reports its quiescent
+		state for the wrong
+		grace period.
+		That grace period might now end before the RCU read-side
+		critical section.
+		If that happens, disaster will ensue.
+		</font>
+	</ol>
+
+	<p><font color="ffffff">So the locking is absolutely required in
+	order to coordinate
+	clearing of the bits with the grace-period numbers in
+	<tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h5>Blocked-Task Management</h5>
+
+<p><tt>PREEMPT_RCU</tt> allows tasks to be preempted in the
+midst of their RCU read-side critical sections, and these tasks
+must be tracked explicitly.
+The details of exactly why and how they are tracked will be covered
+in a separate article on RCU read-side processing.
+For now, it is enough to know that the <tt>rcu_node</tt>
+structure tracks them.
+
+<pre>
+  1   struct list_head blkd_tasks;
+  2   struct list_head *gp_tasks;
+  3   struct list_head *exp_tasks;
+  4   bool wait_blkd_tasks;
+</pre>
+
+<p>The <tt>-&gt;blkd_tasks</tt> field is a list header for
+the list of blocked and preempted tasks.
+As tasks undergo context switches within RCU read-side critical
+sections, their <tt>task_struct</tt> structures are enqueued
+(via the <tt>task_struct</tt>'s <tt>-&gt;rcu_node_entry</tt>
+field) onto the head of the <tt>-&gt;blkd_tasks</tt> list for the
+leaf <tt>rcu_node</tt> structure corresponding to the CPU
+on which the outgoing context switch executed.
+As these tasks later exit their RCU read-side critical sections,
+they remove themselves from the list.
+This list is therefore in reverse time order, so that if one of the tasks
+is blocking the current grace period, all subsequent tasks must
+also be blocking that same grace period.
+Therefore, a single pointer into this list suffices to track
+all tasks blocking a given grace period.
+That pointer is stored in <tt>-&gt;gp_tasks</tt> for normal
+grace periods and in <tt>-&gt;exp_tasks</tt> for expedited
+grace periods.
+These last two fields are <tt>NULL</tt> if either there is
+no grace period in flight or if there are no blocked tasks
+preventing that grace period from completing.
+If either of these two pointers is referencing a task that
+removes itself from the <tt>-&gt;blkd_tasks</tt> list,
+then that task must advance the pointer to the next task on
+the list, or set the pointer to <tt>NULL</tt> if there
+are no subsequent tasks on the list.
+
+</p><p>For example, suppose that tasks&nbsp;T1, T2, and&nbsp;T3 are
+all hard-affinitied to the largest-numbered CPU in the system.
+Then if task&nbsp;T1 blocked in an RCU read-side
+critical section, then an expedited grace period started,
+then task&nbsp;T2 blocked in an RCU read-side critical section,
+then a normal grace period started, and finally task&nbsp;3 blocked
+in an RCU read-side critical section, then the state of the
+last leaf <tt>rcu_node</tt> structure's blocked-task list
+would be as shown below:
+
+</p><p><img src="blkd_task.svg" alt="blkd_task.svg" width="60%">
+
+</p><p>Task&nbsp;T1 is blocking both grace periods, task&nbsp;T2 is
+blocking only the normal grace period, and task&nbsp;T3 is blocking
+neither grace period.
+Note that these tasks will not remove themselves from this list
+immediately upon resuming execution.
+They will instead remain on the list until they execute the outermost
+<tt>rcu_read_unlock()</tt> that ends their RCU read-side critical
+section.
+
+<p>
+The <tt>-&gt;wait_blkd_tasks</tt> field indicates whether or not
+the current grace period is waiting on a blocked task.
+
+<h5>Sizing the <tt>rcu_node</tt> Array</h5>
+
+<p>The <tt>rcu_node</tt> array is sized via a series of
+C-preprocessor expressions as follows:
+
+<pre>
+ 1 #ifdef CONFIG_RCU_FANOUT
+ 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
+ 3 #else
+ 4 # ifdef CONFIG_64BIT
+ 5 # define RCU_FANOUT 64
+ 6 # else
+ 7 # define RCU_FANOUT 32
+ 8 # endif
+ 9 #endif
+10
+11 #ifdef CONFIG_RCU_FANOUT_LEAF
+12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+13 #else
+14 # ifdef CONFIG_64BIT
+15 # define RCU_FANOUT_LEAF 64
+16 # else
+17 # define RCU_FANOUT_LEAF 32
+18 # endif
+19 #endif
+20
+21 #define RCU_FANOUT_1        (RCU_FANOUT_LEAF)
+22 #define RCU_FANOUT_2        (RCU_FANOUT_1 * RCU_FANOUT)
+23 #define RCU_FANOUT_3        (RCU_FANOUT_2 * RCU_FANOUT)
+24 #define RCU_FANOUT_4        (RCU_FANOUT_3 * RCU_FANOUT)
+25
+26 #if NR_CPUS &lt;= RCU_FANOUT_1
+27 #  define RCU_NUM_LVLS        1
+28 #  define NUM_RCU_LVL_0        1
+29 #  define NUM_RCU_NODES        NUM_RCU_LVL_0
+30 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
+31 #  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
+32 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
+33 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
+34 #elif NR_CPUS &lt;= RCU_FANOUT_2
+35 #  define RCU_NUM_LVLS        2
+36 #  define NUM_RCU_LVL_0        1
+37 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+38 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+39 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+40 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
+41 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+42 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
+43 #elif NR_CPUS &lt;= RCU_FANOUT_3
+44 #  define RCU_NUM_LVLS        3
+45 #  define NUM_RCU_LVL_0        1
+46 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+47 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+48 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+49 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+50 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+51 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+52 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
+53 #elif NR_CPUS &lt;= RCU_FANOUT_4
+54 #  define RCU_NUM_LVLS        4
+55 #  define NUM_RCU_LVL_0        1
+56 #  define NUM_RCU_LVL_1        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+57 #  define NUM_RCU_LVL_2        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+58 #  define NUM_RCU_LVL_3        DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+59 #  define NUM_RCU_NODES        (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+60 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+61 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+62 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+63 #  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
+64 #else
+65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+66 #endif
+</pre>
+
+<p>The maximum number of levels in the <tt>rcu_node</tt> structure
+is currently limited to four, as specified by lines&nbsp;21-24
+and the structure of the subsequent &ldquo;if&rdquo; statement.
+For 32-bit systems, this allows 16*32*32*32=524,288 CPUs, which
+should be sufficient for the next few years at least.
+For 64-bit systems, 16*64*64*64=4,194,304 CPUs is allowed, which
+should see us through the next decade or so.
+This four-level tree also allows kernels built with
+<tt>CONFIG_RCU_FANOUT=8</tt> to support up to 4096 CPUs,
+which might be useful in very large systems having eight CPUs per
+socket (but please note that no one has yet shown any measurable
+performance degradation due to misaligned socket and <tt>rcu_node</tt>
+boundaries).
+In addition, building kernels with a full four levels of <tt>rcu_node</tt>
+tree permits better testing of RCU's combining-tree code.
+
+</p><p>The <tt>RCU_FANOUT</tt> symbol controls how many children
+are permitted at each non-leaf level of the <tt>rcu_node</tt> tree.
+If the <tt>CONFIG_RCU_FANOUT</tt> Kconfig option is not specified,
+it is set based on the word size of the system, which is also
+the Kconfig default.
+
+</p><p>The <tt>RCU_FANOUT_LEAF</tt> symbol controls how many CPUs are
+handled by each leaf <tt>rcu_node</tt> structure.
+Experience has shown that allowing a given leaf <tt>rcu_node</tt>
+structure to handle 64 CPUs, as permitted by the number of bits in
+the <tt>-&gt;qsmask</tt> field on a 64-bit system, results in
+excessive contention for the leaf <tt>rcu_node</tt> structures'
+<tt>-&gt;lock</tt> fields.
+The number of CPUs per leaf <tt>rcu_node</tt> structure is therefore
+limited to 16 given the default value of <tt>CONFIG_RCU_FANOUT_LEAF</tt>.
+If <tt>CONFIG_RCU_FANOUT_LEAF</tt> is unspecified, the value
+selected is based on the word size of the system, just as for
+<tt>CONFIG_RCU_FANOUT</tt>.
+Lines&nbsp;11-19 perform this computation.
+
+</p><p>Lines&nbsp;21-24 compute the maximum number of CPUs supported by
+a single-level (which contains a single <tt>rcu_node</tt> structure),
+two-level, three-level, and four-level <tt>rcu_node</tt> tree,
+respectively, given the fanout specified by <tt>RCU_FANOUT</tt>
+and <tt>RCU_FANOUT_LEAF</tt>.
+These numbers of CPUs are retained in the
+<tt>RCU_FANOUT_1</tt>,
+<tt>RCU_FANOUT_2</tt>,
+<tt>RCU_FANOUT_3</tt>, and
+<tt>RCU_FANOUT_4</tt>
+C-preprocessor variables, respectively.
+
+</p><p>These variables are used to control the C-preprocessor <tt>#if</tt>
+statement spanning lines&nbsp;26-66 that computes the number of
+<tt>rcu_node</tt> structures required for each level of the tree,
+as well as the number of levels required.
+The number of levels is placed in the <tt>NUM_RCU_LVLS</tt>
+C-preprocessor variable by lines&nbsp;27, 35, 44, and&nbsp;54.
+The number of <tt>rcu_node</tt> structures for the topmost level
+of the tree is always exactly one, and this value is unconditionally
+placed into <tt>NUM_RCU_LVL_0</tt> by lines&nbsp;28, 36, 45, and&nbsp;55.
+The rest of the levels (if any) of the <tt>rcu_node</tt> tree
+are computed by dividing the maximum number of CPUs by the
+fanout supported by the number of levels from the current level down,
+rounding up.  This computation is performed by lines&nbsp;37,
+46-47, and&nbsp;56-58.
+Lines&nbsp;31-33, 40-42, 50-52, and&nbsp;62-63 create initializers
+for lockdep lock-class names.
+Finally, lines&nbsp;64-66 produce an error if the maximum number of
+CPUs is too large for the specified fanout.
+
+<h3><a name="The rcu_data Structure">
+The <tt>rcu_data</tt> Structure</a></h3>
+
+<p>The <tt>rcu_data</tt> maintains the per-CPU state for the
+corresponding flavor of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+This structure is the
+focus of quiescent-state detection and RCU callback queuing.
+It also tracks its relationship to the corresponding leaf
+<tt>rcu_node</tt> structure to allow more-efficient
+propagation of quiescent states up the <tt>rcu_node</tt>
+combining tree.
+Like the <tt>rcu_node</tt> structure, it provides a local
+copy of the grace-period information to allow for-free
+synchronized
+access to this information from the corresponding CPU.
+Finally, this structure records past dyntick-idle state
+for the corresponding CPU and also tracks statistics.
+
+</p><p>The <tt>rcu_data</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Other Data Structures</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+  1   int cpu;
+  2   struct rcu_state *rsp;
+  3   struct rcu_node *mynode;
+  4   struct rcu_dynticks *dynticks;
+  5   unsigned long grpmask;
+  6   bool beenonline;
+</pre>
+
+<p>The <tt>-&gt;cpu</tt> field contains the number of the
+corresponding CPU, the <tt>-&gt;rsp</tt> pointer references
+the corresponding <tt>rcu_state</tt> structure (and is most frequently
+used to locate the name of the corresponding flavor of RCU for tracing),
+and the <tt>-&gt;mynode</tt> field references the corresponding
+<tt>rcu_node</tt> structure.
+The <tt>-&gt;mynode</tt> is used to propagate quiescent states
+up the combining tree.
+<p>The <tt>-&gt;dynticks</tt> pointer references the
+<tt>rcu_dynticks</tt> structure corresponding to this
+CPU.
+Recall that a single per-CPU instance of the <tt>rcu_dynticks</tt>
+structure is shared among all flavors of RCU.
+These first four fields are constant and therefore require not
+synchronization.
+
+</p><p>The <tt>-&gt;grpmask</tt> field indicates the bit in
+the <tt>-&gt;mynode-&gt;qsmask</tt> corresponding to this
+<tt>rcu_data</tt> structure, and is also used when propagating
+quiescent states.
+The <tt>-&gt;beenonline</tt> flag is set whenever the corresponding
+CPU comes online, which means that the debugfs tracing need not dump
+out any <tt>rcu_data</tt> structure for which this flag is not set.
+
+<h5>Quiescent-State and Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+  1   unsigned long completed;
+  2   unsigned long gpnum;
+  3   bool cpu_no_qs;
+  4   bool core_needs_qs;
+  5   bool gpwrap;
+  6   unsigned long rcu_qs_ctr_snap;
+</pre>
+
+<p>The <tt>completed</tt> and <tt>gpnum</tt>
+fields are the counterparts of the fields of the same name
+in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.
+They may each lag up to one behind their <tt>rcu_node</tt>
+counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
+<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
+arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
+will catch up upon exit from dyntick-idle mode).
+If a given <tt>rcu_data</tt> structure's <tt>-&gt;gpnum</tt> and
+<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_data</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> and <tt>rcu_node</tt>
+structure,
+the <tt>-&gt;gpnum</tt> field will be one greater than the
+<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
+indicating which grace period this <tt>rcu_data</tt> believes
+is still being waited for.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	All this replication of the grace period numbers can only cause
+	massive confusion.
+	Why not just keep a global pair of counters and be done with it???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Because if there was only a single global pair of grace-period
+	numbers, there would need to be a single global lock to allow
+	safely accessing and updating them.
+	And if we are not going to have a single global lock, we need
+	to carefully manage the numbers on a per-node basis.
+	Recall from the answer to a previous Quick Quiz that the consequences
+	of applying a previously sampled quiescent state to the wrong
+	grace period are quite severe.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>The <tt>-&gt;cpu_no_qs</tt> flag indicates that the
+CPU has not yet passed through a quiescent state,
+while the <tt>-&gt;core_needs_qs</tt> flag indicates that the
+RCU core needs a quiescent state from the corresponding CPU.
+The <tt>-&gt;gpwrap</tt> field indicates that the corresponding
+CPU has remained idle for so long that the <tt>completed</tt>
+and <tt>gpnum</tt> counters are in danger of overflow, which
+will cause the CPU to disregard the values of its counters on
+its next exit from idle.
+Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
+cases where a given operation has resulted in a quiescent state
+for all flavors of RCU, for example, <tt>cond_resched_rcu_qs()</tt>.
+
+<h5>RCU Callback Handling</h5>
+
+<p>In the absence of CPU-hotplug events, RCU callbacks are invoked by
+the same CPU that registered them.
+This is strictly a cache-locality optimization: callbacks can and
+do get invoked on CPUs other than the one that registered them.
+After all, if the CPU that registered a given callback has gone
+offline before the callback can be invoked, there really is no other
+choice.
+
+</p><p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_head *nxtlist;
+ 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
+ 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
+ 4 long qlen_lazy;
+ 5 long qlen;
+ 6 long qlen_last_fqs_check;
+ 7 unsigned long n_force_qs_snap;
+ 8 unsigned long n_cbs_invoked;
+ 9 unsigned long n_cbs_orphaned;
+10 unsigned long n_cbs_adopted;
+11 long blimit;
+</pre>
+
+<p>The <tt>-&gt;nxtlist</tt> pointer and the
+<tt>-&gt;nxttail[]</tt> array form a four-segment list with
+older callbacks near the head and newer ones near the tail.
+Each segment contains callbacks with the corresponding relationship
+to the current grace period.
+The pointer out of the end of each of the four segments is referenced
+by the element of the <tt>-&gt;nxttail[]</tt> array indexed by
+<tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period),
+<tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period),
+<tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next
+grace period), and
+<tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated
+with a specific grace period)
+respectively, as shown in the following figure.
+
+</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
+
+</p><p>In this figure, the <tt>-&gt;nxtlist</tt> pointer references the
+first
+RCU callback in the list.
+The <tt>-&gt;nxttail[RCU_DONE_TAIL]</tt> array element references
+the <tt>-&gt;nxtlist</tt> pointer itself, indicating that none
+of the callbacks is ready to invoke.
+The <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt> array element references callback
+CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
+CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period.
+The <tt>-&gt;nxttail[RCU_NEXT_READY_TAIL]</tt> array element
+references the same RCU callback that <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt>
+does, which indicates that there are no callbacks waiting on the next
+RCU grace period.
+The <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element references
+CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
+remaining RCU callbacks have not yet been assigned to an RCU grace
+period.
+Note that the <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element
+always references the last RCU callback's <tt>-&gt;next</tt> pointer
+unless the callback list is empty, in which case it references
+the <tt>-&gt;nxtlist</tt> pointer.
+
+</p><p>CPUs advance their callbacks from the
+<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
+<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
+as grace periods advance.
+The CPU advances the callbacks in its <tt>rcu_data</tt> structure
+whenever it notices that another RCU grace period has completed.
+The CPU detects the completion of an RCU grace period by noticing
+that the value of its <tt>rcu_data</tt> structure's
+<tt>-&gt;completed</tt> field differs from that of its leaf
+<tt>rcu_node</tt> structure.
+Recall that each <tt>rcu_node</tt> structure's
+<tt>-&gt;completed</tt> field is updated at the end of each
+grace period.
+
+</p><p>The <tt>-&gt;nxtcompleted[]</tt> array records grace-period
+numbers corresponding to the list segments.
+This allows CPUs that go idle for extended periods to determine
+which of their callbacks are ready to be invoked after reawakening.
+
+</p><p>The <tt>-&gt;qlen</tt> counter contains the number of
+callbacks in <tt>-&gt;nxtlist</tt>, and the
+<tt>-&gt;qlen_lazy</tt> contains the number of those callbacks that
+are known to only free memory, and whose invocation can therefore
+be safely deferred.
+The <tt>-&gt;qlen_last_fqs_check</tt> and
+<tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
+states from <tt>call_rcu()</tt> and friends when callback
+lists grow excessively long.
+
+</p><p>The <tt>-&gt;n_cbs_invoked</tt>,
+<tt>-&gt;n_cbs_orphaned</tt>, and <tt>-&gt;n_cbs_adopted</tt>
+fields count the number of callbacks invoked,
+sent to other CPUs when this CPU goes offline,
+and received from other CPUs when those other CPUs go offline.
+Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
+RCU callbacks that may be invoked at a given time.
+
+<h5>Dyntick-Idle Handling</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+  1   int dynticks_snap;
+  2   unsigned long dynticks_fqs;
+</pre>
+
+The <tt>-&gt;dynticks_snap</tt> field is used to take a snapshot
+of the corresponding CPU's dyntick-idle state when forcing
+quiescent states, and is therefore accessed from other CPUs.
+Finally, the <tt>-&gt;dynticks_fqs</tt> field is used to
+count the number of times this CPU is determined to be in
+dyntick-idle state, and is used for tracing and debugging purposes.
+
+<h3><a name="The rcu_dynticks Structure">
+The <tt>rcu_dynticks</tt> Structure</a></h3>
+
+<p>The <tt>rcu_dynticks</tt> maintains the per-CPU dyntick-idle state
+for the corresponding CPU.
+Unlike the other structures, <tt>rcu_dynticks</tt> is not
+replicated over the different flavors of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+Its fields are as follows:
+
+<pre>
+  1   int dynticks_nesting;
+  2   int dynticks_nmi_nesting;
+  3   atomic_t dynticks;
+</pre>
+
+<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
+nesting depth of normal interrupts.
+In addition, this counter is incremented when exiting dyntick-idle
+mode and decremented when entering it.
+This counter can therefore be thought of as counting the number
+of reasons why this CPU cannot be permitted to enter dyntick-idle
+mode, aside from non-maskable interrupts (NMIs).
+NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
+field, except that NMIs that interrupt non-dyntick-idle execution
+are not counted.
+
+</p><p>Finally, the <tt>-&gt;dynticks</tt> field counts the corresponding
+CPU's transitions to and from dyntick-idle mode, so that this counter
+has an even value when the CPU is in dyntick-idle mode and an odd
+value otherwise.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why not just count all NMIs?
+	Wouldn't that be simpler and less error prone?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	It seems simpler only until you think hard about how to go about
+	updating the <tt>rcu_dynticks</tt> structure's
+	<tt>-&gt;dynticks</tt> field.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>Additional fields are present for some special-purpose
+builds, and are discussed separately.
+
+<h3><a name="The rcu_head Structure">
+The <tt>rcu_head</tt> Structure</a></h3>
+
+<p>Each <tt>rcu_head</tt> structure represents an RCU callback.
+These structures are normally embedded within RCU-protected data
+structures whose algorithms use asynchronous grace periods.
+In contrast, when using algorithms that block waiting for RCU grace periods,
+RCU users need not provide <tt>rcu_head</tt> structures.
+
+</p><p>The <tt>rcu_head</tt> structure has fields as follows:
+
+<pre>
+  1   struct rcu_head *next;
+  2   void (*func)(struct rcu_head *head);
+</pre>
+
+<p>The <tt>-&gt;next</tt> field is used
+to link the <tt>rcu_head</tt> structures together in the
+lists within the <tt>rcu_data</tt> structures.
+The <tt>-&gt;func</tt> field is a pointer to the function
+to be called when the callback is ready to be invoked, and
+this function is passed a pointer to the <tt>rcu_head</tt>
+structure.
+However, <tt>kfree_rcu()</tt> uses the <tt>-&gt;func</tt>
+field to record the offset of the <tt>rcu_head</tt>
+structure within the enclosing RCU-protected data structure.
+
+</p><p>Both of these fields are used internally by RCU.
+From the viewpoint of RCU users, this structure is an
+opaque &ldquo;cookie&rdquo;.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Given that the callback function <tt>-&gt;func</tt>
+	is passed a pointer to the <tt>rcu_head</tt> structure,
+	how is that function supposed to find the beginning of the
+	enclosing RCU-protected data structure?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In actual practice, there is a separate callback function per
+	type of RCU-protected data structure.
+	The callback function can therefore use the <tt>container_of()</tt>
+	macro in the Linux kernel (or other pointer-manipulation facilities
+	in other software environments) to find the beginning of the
+	enclosing structure.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h3><a name="RCU-Specific Fields in the task_struct Structure">
+RCU-Specific Fields in the <tt>task_struct</tt> Structure</a></h3>
+
+<p>The <tt>CONFIG_PREEMPT_RCU</tt> implementation uses some
+additional fields in the <tt>task_struct</tt> structure:
+
+<pre>
+ 1 #ifdef CONFIG_PREEMPT_RCU
+ 2   int rcu_read_lock_nesting;
+ 3   union rcu_special rcu_read_unlock_special;
+ 4   struct list_head rcu_node_entry;
+ 5   struct rcu_node *rcu_blocked_node;
+ 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+ 7 #ifdef CONFIG_TASKS_RCU
+ 8   unsigned long rcu_tasks_nvcsw;
+ 9   bool rcu_tasks_holdout;
+10   struct list_head rcu_tasks_holdout_list;
+11   int rcu_tasks_idle_cpu;
+12 #endif /* #ifdef CONFIG_TASKS_RCU */
+</pre>
+
+<p>The <tt>-&gt;rcu_read_lock_nesting</tt> field records the
+nesting level for RCU read-side critical sections, and
+the <tt>-&gt;rcu_read_unlock_special</tt> field is a bitmask
+that records special conditions that require <tt>rcu_read_unlock()</tt>
+to do additional work.
+The <tt>-&gt;rcu_node_entry</tt> field is used to form lists of
+tasks that have blocked within preemptible-RCU read-side critical
+sections and the <tt>-&gt;rcu_blocked_node</tt> field references
+the <tt>rcu_node</tt> structure whose list this task is a member of,
+or <tt>NULL</tt> if it is not blocked within a preemptible-RCU
+read-side critical section.
+
+<p>The <tt>-&gt;rcu_tasks_nvcsw</tt> field tracks the number of
+voluntary context switches that this task had undergone at the
+beginning of the current tasks-RCU grace period,
+<tt>-&gt;rcu_tasks_holdout</tt> is set if the current tasks-RCU
+grace period is waiting on this task, <tt>-&gt;rcu_tasks_holdout_list</tt>
+is a list element enqueuing this task on the holdout list,
+and <tt>-&gt;rcu_tasks_idle_cpu</tt> tracks which CPU this
+idle task is running, but only if the task is currently running,
+that is, if the CPU is currently idle.
+
+<h3><a name="Accessor Functions">
+Accessor Functions</a></h3>
+
+<p>The following listing shows the
+<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt>,
+<tt>rcu_for_each_nonleaf_node_breadth_first()</tt>, and
+<tt>rcu_for_each_leaf_node()</tt> function and macros:
+
+<pre>
+  1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+  2 {
+  3   return &amp;rsp-&gt;node[0];
+  4 }
+  5
+  6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
+  7   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
+  8        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+  9
+ 10 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+ 11   for ((rnp) = &amp;(rsp)-&gt;node[0]; \
+ 12        (rnp) &lt; (rsp)-&gt;level[NUM_RCU_LVLS - 1]; (rnp)++)
+ 13
+ 14 #define rcu_for_each_leaf_node(rsp, rnp) \
+ 15   for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
+ 16        (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+</pre>
+
+<p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
+first element of the specified <tt>rcu_state</tt> structure's
+<tt>-&gt;node[]</tt> array, which is the root <tt>rcu_node</tt>
+structure.
+
+</p><p>As noted earlier, the <tt>rcu_for_each_node_breadth_first()</tt>
+macro takes advantage of the layout of the <tt>rcu_node</tt>
+structures in the <tt>rcu_state</tt> structure's
+<tt>-&gt;node[]</tt> array, performing a breadth-first traversal by
+simply traversing the array in order.
+The <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> macro operates
+similarly, but traverses only the first part of the array, thus excluding
+the leaf <tt>rcu_node</tt> structures.
+Finally, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
+the last part of the array, thus traversing only the leaf
+<tt>rcu_node</tt> structures.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	What do <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> and
+	<tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
+	contains only a single node?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In the single-node case,
+	<tt>rcu_for_each_nonleaf_node_breadth_first()</tt> is a no-op
+	and <tt>rcu_for_each_leaf_node()</tt> traverses the single node.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h3><a name="Summary">
+Summary</a></h3>
+
+So each flavor of RCU is represented by an <tt>rcu_state</tt> structure,
+which contains a combining tree of <tt>rcu_node</tt> and
+<tt>rcu_data</tt> structures.
+Finally, in <tt>CONFIG_NO_HZ_IDLE</tt> kernels, each CPU's dyntick-idle
+state is tracked by an <tt>rcu_dynticks</tt> structure.
+
+If you made it this far, you are well prepared to read the code
+walkthroughs in the other articles in this series.
+
+<h3><a name="Acknowledgments">
+Acknowledgments</a></h3>
+
+I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
+Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn
+for helping me get this document into a more human-readable state.
+
+<h3><a name="Legal Statement">
+Legal Statement</a></h3>
+
+<p>This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+</p><p>Linux is a registered trademark of Linus Torvalds.
+
+</p><p>Other company, product, and service names may be trademarks or
+service marks of others.
+
+</body></html>

diff --git a/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
new file mode 100644
index 0000000..2bf12b4
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg

@@ -0,0 +1,939 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:37:22 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="15.1in"
+   height="11.2in"
+   viewBox="-66 -66 18087 13407"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="HugeTreeClassicRCU.fig">
+  <metadata
+     id="metadata224">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs222">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3982"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1134"
+     inkscape:window-height="789"
+     id="namedview220"
+     showgrid="false"
+     inkscape:zoom="0.60515873"
+     inkscape:cx="679.5"
+     inkscape:cy="504"
+     inkscape:window-x="786"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="17100"
+       height="8325"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="11025"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="4275"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="5400"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="9900"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="14400"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="900"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect18" />
+    <!-- Line: box -->
+    <rect
+       x="7650"
+       y="900"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect20" />
+    <!-- Line -->
+    <polyline
+       points="3150,9225 3150,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 3150 9225 - 3150 7560-->
+    <!-- Circle -->
+    <circle
+       cx="8550"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="9000"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="9450"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle30" />
+    <!-- Line -->
+    <polyline
+       points="6750,6300 8250,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 6750 6300 - 8391 4890-->
+    <!-- Line -->
+    <polyline
+       points="11250,6300 9747,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 11250 6300 - 9606 4890-->
+    <!-- Circle -->
+    <circle
+       cx="13950"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle40" />
+    <!-- Circle -->
+    <circle
+       cx="13500"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle42" />
+    <!-- Circle -->
+    <circle
+       cx="13050"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle44" />
+    <!-- Circle -->
+    <circle
+       cx="9450"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle46" />
+    <!-- Circle -->
+    <circle
+       cx="9000"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle48" />
+    <!-- Circle -->
+    <circle
+       cx="8550"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle50" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle52" />
+    <!-- Circle -->
+    <circle
+       cx="4500"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle54" />
+    <!-- Circle -->
+    <circle
+       cx="4050"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle56" />
+    <!-- Circle -->
+    <circle
+       cx="1800"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle58" />
+    <!-- Circle -->
+    <circle
+       cx="2250"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle60" />
+    <!-- Circle -->
+    <circle
+       cx="2700"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle62" />
+    <!-- Circle -->
+    <circle
+       cx="15300"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle64" />
+    <!-- Circle -->
+    <circle
+       cx="15750"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle66" />
+    <!-- Circle -->
+    <circle
+       cx="16200"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="10800"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="11250"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="11700"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="6300"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="6750"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="7200"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle80" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect82" />
+    <!-- Line: box -->
+    <rect
+       x="1800"
+       y="9225"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect84" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect86" />
+    <!-- Line: box -->
+    <rect
+       x="6300"
+       y="9270"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect88" />
+    <!-- Line: box -->
+    <rect
+       x="8955"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect90" />
+    <!-- Line: box -->
+    <rect
+       x="10755"
+       y="9270"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect92" />
+    <!-- Line: box -->
+    <rect
+       x="13455"
+       y="11475"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect94" />
+    <!-- Line: box -->
+    <rect
+       x="15255"
+       y="9270"
+       width="2700"
+       height="1800"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect96" />
+    <!-- Line -->
+    <polyline
+       points="11700,3600 10197,2310 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline98" />
+    <!-- Arrowhead on XXXpoint 11700 3600 - 10056 2190-->
+    <!-- Line -->
+    <polyline
+       points="6300,3600 7800,2310 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline102" />
+    <!-- Arrowhead on XXXpoint 6300 3600 - 7941 2190-->
+    <!-- Line -->
+    <polyline
+       points="3150,6300 4650,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline106" />
+    <!-- Arrowhead on XXXpoint 3150 6300 - 4791 4890-->
+    <!-- Line -->
+    <polyline
+       points="14850,6300 13347,5010 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline110" />
+    <!-- Arrowhead on XXXpoint 14850 6300 - 13206 4890-->
+    <!-- Line -->
+    <polyline
+       points="1350,11475 1350,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline114" />
+    <!-- Arrowhead on XXXpoint 1350 11475 - 1350 7560-->
+    <!-- Line -->
+    <polyline
+       points="16650,9225 16650,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline118" />
+    <!-- Arrowhead on XXXpoint 16650 9225 - 16650 7560-->
+    <!-- Line -->
+    <polyline
+       points="14850,11475 14850,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline122" />
+    <!-- Arrowhead on XXXpoint 14850 11475 - 14850 7560-->
+    <!-- Line -->
+    <polyline
+       points="12150,9225 12150,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline126" />
+    <!-- Arrowhead on XXXpoint 12150 9225 - 12150 7560-->
+    <!-- Line -->
+    <polyline
+       points="10350,11475 10350,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline130" />
+    <!-- Arrowhead on XXXpoint 10350 11475 - 10350 7560-->
+    <!-- Line -->
+    <polyline
+       points="7650,9225 7650,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline134" />
+    <!-- Arrowhead on XXXpoint 7650 9225 - 7650 7560-->
+    <!-- Line -->
+    <polyline
+       points="5850,11475 5850,7746 "
+       style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline138" />
+    <!-- Arrowhead on XXXpoint 5850 11475 - 5850 7560-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12375"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text142">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12375"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5625"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text146">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5625"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text148">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6750"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text150">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6750"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text152">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11250"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text154">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11250"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text156">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15750"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text158">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15750"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text160">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text164">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text166">CPU 0</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text168">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text170">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="10800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text172">CPU 15</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="9675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text174">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="10125"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text176">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5850"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text178">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5850"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text180">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5850"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text182">CPU 21823</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7650"
+       y="10845"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text184">CPU 21839</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7650"
+       y="10170"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text186">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7650"
+       y="9720"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text188">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10305"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text190">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10305"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text192">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10305"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text194">CPU 43679</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="10845"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text196">CPU 43695</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="10170"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text198">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="9720"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text200">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14805"
+       y="11925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text202">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14805"
+       y="12375"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text204">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14805"
+       y="13050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text206">CPU 65519</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="16605"
+       y="10845"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text208">CPU 65535</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="16605"
+       y="10170"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text210">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="16605"
+       y="9720"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text212">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="675"
+       y="450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="start"
+       id="text214">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9000"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text216">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9000"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text218">rcu_node</text>
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/TreeLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
new file mode 100644
index 0000000..7a7eb3b
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg

@@ -0,0 +1,828 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:41:29 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="17.7in"
+   height="10.4in"
+   viewBox="-66 -66 21237 12507"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="TreeLevel.fig">
+  <metadata
+     id="metadata216">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs214">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3974"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1023"
+     inkscape:window-height="1148"
+     id="namedview212"
+     showgrid="false"
+     inkscape:zoom="0.55869424"
+     inkscape:cx="796.50006"
+     inkscape:cy="467.99997"
+     inkscape:window-x="897"
+     inkscape:window-y="24"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="20655"
+       height="8325"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="14130"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="7380"
+       y="3600"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="8505"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="13005"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="17505"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="4005"
+       y="6300"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect18" />
+    <!-- Line: box -->
+    <rect
+       x="10755"
+       y="900"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect20" />
+    <!-- Line -->
+    <polyline
+       points="6255,9225 6255,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline22" />
+    <!-- Arrowhead on XXXpoint 6255 9225 - 6255 7560-->
+    <!-- Circle -->
+    <circle
+       cx="11655"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle26" />
+    <!-- Circle -->
+    <circle
+       cx="12105"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle28" />
+    <!-- Circle -->
+    <circle
+       cx="12555"
+       cy="4275"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle30" />
+    <!-- Line -->
+    <polyline
+       points="9855,6300 11355,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 9855 6300 - 11496 4890-->
+    <!-- Line -->
+    <polyline
+       points="14355,6300 12852,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 14355 6300 - 12711 4890-->
+    <!-- Circle -->
+    <circle
+       cx="17055"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle40" />
+    <!-- Circle -->
+    <circle
+       cx="16605"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle42" />
+    <!-- Circle -->
+    <circle
+       cx="16155"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle44" />
+    <!-- Circle -->
+    <circle
+       cx="12555"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle46" />
+    <!-- Circle -->
+    <circle
+       cx="12105"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle48" />
+    <!-- Circle -->
+    <circle
+       cx="11655"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle50" />
+    <!-- Circle -->
+    <circle
+       cx="8055"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle52" />
+    <!-- Circle -->
+    <circle
+       cx="7605"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle54" />
+    <!-- Circle -->
+    <circle
+       cx="7155"
+       cy="6975"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle56" />
+    <!-- Circle -->
+    <circle
+       cx="4905"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle58" />
+    <!-- Circle -->
+    <circle
+       cx="5355"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle60" />
+    <!-- Circle -->
+    <circle
+       cx="5805"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle62" />
+    <!-- Circle -->
+    <circle
+       cx="18405"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle64" />
+    <!-- Circle -->
+    <circle
+       cx="18855"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle66" />
+    <!-- Circle -->
+    <circle
+       cx="19305"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="13905"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="14355"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="14805"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="9405"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="9855"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="10305"
+       cy="8775"
+       r="114"
+       style="fill:#000000;stroke:#000000;stroke-width:21;"
+       id="circle80" />
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="1125"
+       width="3150"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect82" />
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="2250"
+       width="3150"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect84" />
+    <!-- Line: box -->
+    <rect
+       x="225"
+       y="3375"
+       width="3150"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect86" />
+    <!-- Line -->
+    <polyline
+       points="14805,3600 13302,2310 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline88" />
+    <!-- Arrowhead on XXXpoint 14805 3600 - 13161 2190-->
+    <!-- Line -->
+    <polyline
+       points="9405,3600 10905,2310 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline92" />
+    <!-- Arrowhead on XXXpoint 9405 3600 - 11046 2190-->
+    <!-- Line -->
+    <polyline
+       points="6255,6300 7755,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline96" />
+    <!-- Arrowhead on XXXpoint 6255 6300 - 7896 4890-->
+    <!-- Line -->
+    <polyline
+       points="17955,6300 16452,5010 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline100" />
+    <!-- Arrowhead on XXXpoint 17955 6300 - 16311 4890-->
+    <!-- Line -->
+    <polyline
+       points="4455,11025 4455,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline104" />
+    <!-- Arrowhead on XXXpoint 4455 11025 - 4455 7560-->
+    <!-- Line -->
+    <polyline
+       points="19755,9225 19755,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline108" />
+    <!-- Arrowhead on XXXpoint 19755 9225 - 19755 7560-->
+    <!-- Line -->
+    <polyline
+       points="17955,11025 17955,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline112" />
+    <!-- Arrowhead on XXXpoint 17955 11025 - 17955 7560-->
+    <!-- Line -->
+    <polyline
+       points="15255,9225 15255,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline116" />
+    <!-- Arrowhead on XXXpoint 15255 9225 - 15255 7560-->
+    <!-- Line -->
+    <polyline
+       points="13455,11025 13455,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline120" />
+    <!-- Arrowhead on XXXpoint 13455 11025 - 13455 7560-->
+    <!-- Line -->
+    <polyline
+       points="10755,9225 10755,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline124" />
+    <!-- Arrowhead on XXXpoint 10755 9225 - 10755 7560-->
+    <!-- Line -->
+    <polyline
+       points="8955,11025 8955,7746 "
+       style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline128" />
+    <!-- Arrowhead on XXXpoint 8955 11025 - 8955 7560-->
+    <!-- Line: box -->
+    <rect
+       x="12105"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect132" />
+    <!-- Line: box -->
+    <rect
+       x="13905"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect134" />
+    <!-- Line: box -->
+    <rect
+       x="16605"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect136" />
+    <!-- Line: box -->
+    <rect
+       x="18405"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect138" />
+    <!-- Line: box -->
+    <rect
+       x="9405"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect140" />
+    <!-- Line: box -->
+    <rect
+       x="7605"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect142" />
+    <!-- Line: box -->
+    <rect
+       x="4905"
+       y="9225"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect144" />
+    <!-- Line: box -->
+    <rect
+       x="3105"
+       y="11025"
+       width="2700"
+       height="1350"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect146" />
+    <!-- Line -->
+    <polyline
+       points="3375,1575 10701,1575 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline148" />
+    <!-- Arrowhead on XXXpoint 3375 1575 - 10890 1575-->
+    <!-- Line -->
+    <polyline
+       points="3375,3825 4050,3825 4050,5400 2700,5400 2700,6975 3951,6975 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline152" />
+    <!-- Arrowhead on XXXpoint 2700 6975 - 4140 6975-->
+    <!-- Line -->
+    <polyline
+       points="3375,2700 5175,2700 5175,4275 7326,4275 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline156" />
+    <!-- Arrowhead on XXXpoint 5175 4275 - 7515 4275-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15480"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text160">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15480"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text164">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8730"
+       y="4500"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text166">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9855"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text168">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="9855"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text170">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14355"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text172">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="14355"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text174">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="18855"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text176">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="18855"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text178">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5355"
+       y="6750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text180">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5355"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text182">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text184">-&gt;level[0]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="2925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text186">-&gt;level[1]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text188">-&gt;level[2]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text190">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="12105"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="middle"
+       id="text192">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6255"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text194">CPU 15</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4455"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text196">CPU 0</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="19755"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text198">CPU 65535</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="17955"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text200">CPU 65519</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="15255"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text202">CPU 43695</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="13455"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text204">CPU 43679</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="10755"
+       y="10125"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text206">CPU 21839</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="8955"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text208">CPU 21823</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="450"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="288"
+       text-anchor="start"
+       id="text210">struct rcu_state</text>
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/TreeMapping.svg b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
new file mode 100644
index 0000000..729cfa9
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg

@@ -0,0 +1,305 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:43:22 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="3.1in"
+   height="0.9in"
+   viewBox="-12 -12 3699 1074"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="TreeMapping.fig">
+  <metadata
+     id="metadata66">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs64">
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Lend"
+       style="overflow:visible;">
+      <path
+         id="path3836"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(1.1) rotate(180) translate(1,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow2Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Mend"
+       style="overflow:visible;">
+      <path
+         id="path3842"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(0.6) rotate(180) translate(0,0)" />
+    </marker>
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3824"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="991"
+     inkscape:window-height="606"
+     id="namedview62"
+     showgrid="false"
+     inkscape:zoom="3.0752688"
+     inkscape:cx="139.5"
+     inkscape:cy="40.5"
+     inkscape:window-x="891"
+     inkscape:window-y="177"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="3675"
+       height="1050"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="600"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="1125"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="1650"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="2175"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="3225"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect18" />
+    <!-- Line -->
+    <polyline
+       points="675,375 675,150 300,150 300,358 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+    <!-- Line -->
+    <polyline
+       points="1200,675 1200,900 300,900 300,691 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline24" />
+    <!-- Arrowhead on XXXpoint 300 900 - 300 660-->
+    <!-- Line -->
+    <polyline
+       points="1725,375 1725,150 900,150 900,358 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline28" />
+    <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+    <!-- Line -->
+    <polyline
+       points="2250,375 2250,75 825,75 825,358 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+    <!-- Line -->
+    <polyline
+       points="2775,675 2775,900 1425,900 1425,691 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 1425 900 - 1425 660-->
+    <!-- Line -->
+    <polyline
+       points="3300,675 3300,975 1350,975 1350,691 "
+       style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 1350 975 - 1350 660-->
+    <!-- Line: box -->
+    <rect
+       x="2700"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect44" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="300"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text46">0:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text48">4:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1875"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text50">0:1  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text52">2:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2925"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text54">4:5  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3450"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text56">6:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="825"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text58">0:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3600"
+       y="150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="end"
+       id="text60">struct rcu_state</text>
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
new file mode 100644
index 0000000..5b416a4
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg

@@ -0,0 +1,380 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:45:19 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="3.1in"
+   height="1.8in"
+   viewBox="-12 -12 3699 2124"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="TreeMappingLevel.svg">
+  <metadata
+     id="metadata98">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs96">
+    <marker
+       inkscape:stockid="Arrow2Lend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow2Lend"
+       style="overflow:visible;">
+      <path
+         id="path3868"
+         style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+         d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+         transform="scale(1.1) rotate(180) translate(1,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1598"
+     inkscape:window-height="1211"
+     id="namedview94"
+     showgrid="false"
+     inkscape:zoom="5.2508961"
+     inkscape:cx="139.5"
+     inkscape:cy="81"
+     inkscape:window-x="840"
+     inkscape:window-y="122"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="3675"
+       height="2100"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="1350"
+       width="750"
+       height="225"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="1575"
+       width="750"
+       height="225"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="1800"
+       width="750"
+       height="225"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect12" />
+    <!-- Arc -->
+    <path
+       style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+       d="M 1800,900 A 118 118  0  0  0  1800  1125 "
+       id="path14" />
+    <!-- Arc -->
+    <path
+       style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+       d="M 750,900 A 75 75  0  0  0  750  1050 "
+       id="path16" />
+    <!-- Line -->
+    <polyline
+       points="750,900 750,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline18" />
+    <!-- Arrowhead on XXXpoint 750 900 - 750 660-->
+    <!-- Line: box -->
+    <rect
+       x="75"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect22" />
+    <!-- Line: box -->
+    <rect
+       x="600"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect24" />
+    <!-- Line: box -->
+    <rect
+       x="1650"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect26" />
+    <!-- Line: box -->
+    <rect
+       x="2175"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="3225"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect30" />
+    <!-- Line -->
+    <polyline
+       points="675,375 675,150 300,150 300,358 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+    <!-- Line -->
+    <polyline
+       points="1725,375 1725,150 900,150 900,358 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+    <!-- Line -->
+    <polyline
+       points="2250,375 2250,75 825,75 825,358 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+    <!-- Line -->
+    <polyline
+       points="2775,675 2775,975 1425,975 1425,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 1425 975 - 1425 660-->
+    <!-- Line: box -->
+    <rect
+       x="2700"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect48" />
+    <!-- Line: box -->
+    <rect
+       x="1125"
+       y="375"
+       width="375"
+       height="300"
+       rx="0"
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect50" />
+    <!-- Line -->
+    <polyline
+       points="3300,675 3300,1050 1350,1050 1350,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline52" />
+    <!-- Arrowhead on XXXpoint 1350 1050 - 1350 660-->
+    <!-- Line -->
+    <polyline
+       points="825,1425 975,1425 975,1200 225,1200 225,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline56" />
+    <!-- Arrowhead on XXXpoint 225 1200 - 225 660-->
+    <!-- Line -->
+    <polyline
+       points="1200,675 1200,975 300,975 300,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 300 975 - 300 660-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="150"
+       y="1500"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="108"
+       text-anchor="start"
+       id="text64">-&gt;level[0]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="150"
+       y="1725"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="108"
+       text-anchor="start"
+       id="text66">-&gt;level[1]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="150"
+       y="1950"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="108"
+       text-anchor="start"
+       id="text68">-&gt;level[2]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="300"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text70">0:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1350"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text72">4:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1875"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text74">0:1  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text76">2:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2925"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text78">4:5  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3450"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text80">6:7  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="825"
+       y="525"
+       fill="#000000"
+       font-family="Times"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="middle"
+       id="text82">0:3  </text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3600"
+       y="150"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="normal"
+       font-size="96"
+       text-anchor="end"
+       id="text84">struct rcu_state</text>
+    <!-- Line -->
+    <polyline
+       points="825,1875 1800,1875 1800,1125 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:none"
+       id="polyline86" />
+    <!-- Line -->
+    <polyline
+       points="1800,900 1800,691 "
+       style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+       id="polyline88" />
+    <!-- Arrowhead on XXXpoint 1800 900 - 1800 660-->
+    <!-- Line -->
+    <polyline
+       points="825,1650 1200,1650 1200,1125 750,1125 750,1050 "
+       style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline92" />
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/blkd_task.svg b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
new file mode 100644
index 0000000..00e810b
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/blkd_task.svg

@@ -0,0 +1,843 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:35:03 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="10.1in"
+   height="8.6in"
+   viewBox="-44 -44 12088 10288"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="blkd_task.fig">
+  <metadata
+     id="metadata212">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs210">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3970"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="1087"
+     inkscape:window-height="1144"
+     id="namedview208"
+     showgrid="false"
+     inkscape:zoom="1.0495049"
+     inkscape:cx="454.50003"
+     inkscape:cy="387.00003"
+     inkscape:window-x="833"
+     inkscape:window-y="28"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="450"
+       y="0"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="4950"
+       y="4950"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="600"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect10" />
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5688,5912 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline12" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+    <polyline
+       points="5714 6068 5704 5822 5598 6044 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline14" />
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4486,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline16" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+    <polyline
+       points="4514 7418 4506 7172 4396 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline18" />
+    <!-- Line -->
+    <polyline
+       points="1040,9300 1476,7262 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+    <polyline
+       points="1504 7418 1496 7172 1386 7394 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline22" />
+    <!-- Line -->
+    <polyline
+       points="2240,8100 2676,6062 "
+       style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="polyline24" />
+    <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+    <polyline
+       points="2704 6218 2696 5972 2586 6194 "
+       style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+       id="polyline26" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="450"
+       width="6300"
+       height="7350"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="1050"
+       width="5700"
+       height="3750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+       id="rect30" />
+    <!-- Line -->
+    <polyline
+       points="1350,3450 2350,2590 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline32" />
+    <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+    <!-- Line -->
+    <polyline
+       points="4950,3450 3948,2590 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline36" />
+    <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+    <!-- Line -->
+    <polyline
+       points="4050,6600 4050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline40" />
+    <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+    <!-- Line -->
+    <polyline
+       points="1050,6600 1050,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline44" />
+    <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,5400 2250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline48" />
+    <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+    <!-- Line -->
+    <polyline
+       points="2250,8100 2250,6364 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline52" />
+    <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+    <!-- Line -->
+    <polyline
+       points="1050,9300 1050,7564 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline56" />
+    <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+    <!-- Line -->
+    <polyline
+       points="4050,9300 4050,7564 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+    <!-- Line -->
+    <polyline
+       points="5250,8100 5250,6364 "
+       style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline64" />
+    <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+    <!-- Circle -->
+    <circle
+       cx="2850"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle68" />
+    <!-- Circle -->
+    <circle
+       cx="3150"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle70" />
+    <!-- Circle -->
+    <circle
+       cx="3450"
+       cy="3900"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle72" />
+    <!-- Circle -->
+    <circle
+       cx="1350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle74" />
+    <!-- Circle -->
+    <circle
+       cx="1650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle76" />
+    <!-- Circle -->
+    <circle
+       cx="1950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle78" />
+    <!-- Circle -->
+    <circle
+       cx="4350"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle80" />
+    <!-- Circle -->
+    <circle
+       cx="4650"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle82" />
+    <!-- Circle -->
+    <circle
+       cx="4950"
+       cy="5100"
+       r="76"
+       style="fill:#000000;stroke:#000000;stroke-width:14;"
+       id="circle84" />
+    <!-- Line: box -->
+    <rect
+       x="750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect86" />
+    <!-- Line: box -->
+    <rect
+       x="300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect88" />
+    <!-- Line: box -->
+    <rect
+       x="4500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect90" />
+    <!-- Line: box -->
+    <rect
+       x="3300"
+       y="6600"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect92" />
+    <!-- Line: box -->
+    <rect
+       x="2250"
+       y="1650"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect94" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect96" />
+    <!-- Line: box -->
+    <rect
+       x="1350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect98" />
+    <!-- Line: box -->
+    <rect
+       x="3000"
+       y="9300"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect100" />
+    <!-- Line: box -->
+    <rect
+       x="4350"
+       y="8100"
+       width="2100"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+       id="rect102" />
+    <!-- Line: box -->
+    <rect
+       x="1500"
+       y="5400"
+       width="1500"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect104" />
+    <!-- Line -->
+    <polygon
+       points="5550,3450 7350,2850 7350,5100 5550,4350 5550,3450 "
+       style="stroke:#000000;stroke-width:14; stroke-linejoin:miter; stroke-linecap:butt; stroke-dasharray:120 120;fill:#ffbfbf; "
+       id="polygon106" />
+    <!-- Line -->
+    <polyline
+       points="9300,3150 10734,3150 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline108" />
+    <!-- Arrowhead on XXXpoint 9300 3150 - 10860 3150-->
+    <!-- Line: box -->
+    <rect
+       x="10800"
+       y="2850"
+       width="1200"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect112" />
+    <!-- Line -->
+    <polyline
+       points="11400,3600 11400,4284 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline114" />
+    <!-- Arrowhead on XXXpoint 11400 3600 - 11400 4410-->
+    <!-- Line: box -->
+    <rect
+       x="10800"
+       y="4350"
+       width="1200"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect118" />
+    <!-- Line -->
+    <polyline
+       points="11400,5100 11400,5784 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline120" />
+    <!-- Arrowhead on XXXpoint 11400 5100 - 11400 5910-->
+    <!-- Line: box -->
+    <rect
+       x="10800"
+       y="5850"
+       width="1200"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect124" />
+    <!-- Line -->
+    <polyline
+       points="9300,3900 9900,3900 9900,4650 10734,4650 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline126" />
+    <!-- Arrowhead on XXXpoint 9900 4650 - 10860 4650-->
+    <!-- Line -->
+    <polyline
+       points="9300,4650 9600,4650 9600,6150 10734,6150 "
+       style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline130" />
+    <!-- Arrowhead on XXXpoint 9600 6150 - 10860 6150-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6450"
+       y="300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text134">rcu_bh</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="1950"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text136">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="3150"
+       y="2250"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text138">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text140">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text142">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text144">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text146">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text148">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text150">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="5700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text152">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5250"
+       y="6000"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text154">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="6900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text156">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="7200"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text158">rcu_data</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="450"
+       y="1350"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text160">struct rcu_state</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text162">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="1050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text164">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9600"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text166">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4050"
+       y="9900"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text168">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text170">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="2400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text172">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8400"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text174">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="5400"
+       y="8700"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text176">rcu_dynticks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="6000"
+       y="750"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="192"
+       text-anchor="end"
+       id="text178">rcu_sched</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11400"
+       y="3300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="216"
+       text-anchor="middle"
+       id="text180">T3</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11400"
+       y="4800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="216"
+       text-anchor="middle"
+       id="text182">T2</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11400"
+       y="6300"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="216"
+       text-anchor="middle"
+       id="text184">T1</text>
+    <!-- Line -->
+    <polyline
+       points="5250,5400 5250,4414 "
+       style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline186" />
+    <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+    <!-- Line: box -->
+    <rect
+       x="3750"
+       y="3450"
+       width="1800"
+       height="900"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect190" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="2850"
+       width="1950"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect192" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="3600"
+       width="1950"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect194" />
+    <!-- Line: box -->
+    <rect
+       x="7350"
+       y="4350"
+       width="1950"
+       height="750"
+       rx="0"
+       style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+       id="rect196" />
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text198">rcu_node</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="4650"
+       y="3750"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="middle"
+       id="text200">struct</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7500"
+       y="3300"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text202">blkd_tasks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7500"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text204">gp_tasks</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="7500"
+       y="4800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="192"
+       text-anchor="start"
+       id="text206">exp_tasks</text>
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
new file mode 100644
index 0000000..abc4cc7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg

@@ -0,0 +1,396 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec  9 17:39:46 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="10.4in"
+   height="10.4in"
+   viewBox="-66 -66 12507 12507"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="nxtlist.fig">
+  <metadata
+     id="metadata94">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <defs
+     id="defs92">
+    <marker
+       inkscape:stockid="Arrow1Mend"
+       orient="auto"
+       refY="0.0"
+       refX="0.0"
+       id="Arrow1Mend"
+       style="overflow:visible;">
+      <path
+         id="path3852"
+         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+         transform="scale(0.4) rotate(180) translate(10,0)" />
+    </marker>
+  </defs>
+  <sodipodi:namedview
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1"
+     objecttolerance="10"
+     gridtolerance="10"
+     guidetolerance="10"
+     inkscape:pageopacity="0"
+     inkscape:pageshadow="2"
+     inkscape:window-width="925"
+     inkscape:window-height="928"
+     id="namedview90"
+     showgrid="false"
+     inkscape:zoom="0.80021373"
+     inkscape:cx="467.99997"
+     inkscape:cy="467.99997"
+     inkscape:window-x="948"
+     inkscape:window-y="73"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="g4" />
+  <g
+     style="stroke-width:.025in; fill:none"
+     id="g4">
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="0"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect6" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="1125"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect8" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="2250"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect10" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="3375"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect12" />
+    <!-- Line: box -->
+    <rect
+       x="0"
+       y="4500"
+       width="7875"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+       id="rect14" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="0"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect16" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="1125"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect18" />
+    <!-- Line -->
+    <polyline
+       points="11475,2250 11475,3276 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline20" />
+    <!-- Arrowhead on XXXpoint 11475 2250 - 11475 3465-->
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="6750"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect24" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="7875"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect26" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="10125"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect28" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="11250"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect30" />
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="3375"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect32" />
+    <!-- Line -->
+    <polyline
+       points="11475,5625 11475,6651 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline34" />
+    <!-- Arrowhead on XXXpoint 11475 5625 - 11475 6840-->
+    <!-- Line -->
+    <polyline
+       points="7875,225 10476,225 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline38" />
+    <!-- Arrowhead on XXXpoint 7875 225 - 10665 225-->
+    <!-- Line -->
+    <polyline
+       points="7875,1350 9675,1350 9675,675 7971,675 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline42" />
+    <!-- Arrowhead on XXXpoint 9675 675 - 7785 675-->
+    <!-- Line -->
+    <polyline
+       points="7875,2475 9675,2475 9675,4725 10476,4725 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline46" />
+    <!-- Arrowhead on XXXpoint 9675 4725 - 10665 4725-->
+    <!-- Line -->
+    <polyline
+       points="7875,3600 9225,3600 9225,5175 10476,5175 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline50" />
+    <!-- Arrowhead on XXXpoint 9225 5175 - 10665 5175-->
+    <!-- Line -->
+    <polyline
+       points="7875,4725 8775,4725 8775,11475 10476,11475 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline54" />
+    <!-- Arrowhead on XXXpoint 8775 11475 - 10665 11475-->
+    <!-- Line: box -->
+    <rect
+       x="10575"
+       y="4500"
+       width="1800"
+       height="1125"
+       rx="0"
+       style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+       id="rect58" />
+    <!-- Line -->
+    <polyline
+       points="11475,9000 11475,10026 "
+       style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+       id="polyline60" />
+    <!-- Arrowhead on XXXpoint 11475 9000 - 11475 10215-->
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="675"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text64">nxtlist</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="1800"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text66">nxttail[RCU_DONE_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="2925"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text68">nxttail[RCU_WAIT_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="4050"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text70">nxttail[RCU_NEXT_READY_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="225"
+       y="5175"
+       fill="#000000"
+       font-family="Courier"
+       font-style="normal"
+       font-weight="bold"
+       font-size="324"
+       text-anchor="start"
+       id="text72">nxttail[RCU_NEXT_TAIL]</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="675"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text74">CB 1</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="1800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text76">next</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="7425"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text78">CB 3</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="8550"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text80">next</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="10800"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text82">CB 4</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="11925"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text84">next</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="4050"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text86">CB 2</text>
+    <!-- Text -->
+    <text
+       xml:space="preserve"
+       x="11475"
+       y="5175"
+       fill="#000000"
+       font-family="Helvetica"
+       font-style="normal"
+       font-weight="normal"
+       font-size="324"
+       text-anchor="middle"
+       id="text88">next</text>
+  </g>
+</svg>

diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
deleted file mode 100644
index 7496a55..0000000
--- a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
+++ /dev/null
Binary files differ

diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
deleted file mode 100644
index ebcbeee..0000000
--- a/Documentation/RCU/Design/Requirements/RCUApplicability.svg
+++ /dev/null

@@ -1,237 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5d -->
-
-<!-- CreationDate: Tue Mar  4 18:34:25 2014 -->
-
-<!-- Magnification: 3.000 -->
-
-<svg
-   xmlns:dc="http://purl.org/dc/elements/1.1/"
-   xmlns:cc="http://creativecommons.org/ns#"
-   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-   xmlns:svg="http://www.w3.org/2000/svg"
-   xmlns="http://www.w3.org/2000/svg"
-   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
-   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
-   width="1089.1382"
-   height="668.21368"
-   viewBox="-2121 -36 14554.634 8876.4061"
-   id="svg2"
-   version="1.1"
-   inkscape:version="0.48.3.1 r9886"
-   sodipodi:docname="RCUApplicability.svg">
-  <metadata
-     id="metadata40">
-    <rdf:RDF>
-      <cc:Work
-         rdf:about="">
-        <dc:format>image/svg+xml</dc:format>
-        <dc:type
-           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title />
-      </cc:Work>
-    </rdf:RDF>
-  </metadata>
-  <defs
-     id="defs38" />
-  <sodipodi:namedview
-     pagecolor="#ffffff"
-     bordercolor="#666666"
-     borderopacity="1"
-     objecttolerance="10"
-     gridtolerance="10"
-     guidetolerance="10"
-     inkscape:pageopacity="0"
-     inkscape:pageshadow="2"
-     inkscape:window-width="849"
-     inkscape:window-height="639"
-     id="namedview36"
-     showgrid="false"
-     inkscape:zoom="0.51326165"
-     inkscape:cx="544.56912"
-     inkscape:cy="334.10686"
-     inkscape:window-x="149"
-     inkscape:window-y="448"
-     inkscape:window-maximized="0"
-     inkscape:current-layer="g4"
-     fit-margin-top="5"
-     fit-margin-left="5"
-     fit-margin-right="5"
-     fit-margin-bottom="5" />
-  <g
-     style="fill:none;stroke-width:0.025in"
-     id="g4"
-     transform="translate(-2043.6828,14.791398)">
-    <!-- Line: box -->
-    <rect
-       x="0"
-       y="0"
-       width="14400"
-       height="8775"
-       rx="0"
-       style="fill:#ffa1a1;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect6" />
-    <!-- Line: box -->
-    <rect
-       x="1350"
-       y="0"
-       width="11700"
-       height="6075"
-       rx="0"
-       style="fill:#ffff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect8" />
-    <!-- Line: box -->
-    <rect
-       x="2700"
-       y="0"
-       width="9000"
-       height="4275"
-       rx="0"
-       style="fill:#00ff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect10" />
-    <!-- Line: box -->
-    <rect
-       x="4050"
-       y="0"
-       width="6300"
-       height="2475"
-       rx="0"
-       style="fill:#87cfff;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
-       id="rect12" />
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="900"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text14"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3017">Read-Mostly, Stale &amp;</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="1350"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text16"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3019">Inconsistent Data OK</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="1800"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text18"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3021">(RCU Works Great!!!)</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="3825"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text20"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3023">(RCU Works Well)</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="3375"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text22"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3025">Read-Mostly, Need Consistent Data</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="5175"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text24"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3027">Read-Write, Need Consistent Data</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="6975"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text26"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">Update-Mostly, Need Consistent Data</text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="5625"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text28"
-       sodipodi:linespacing="125%"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
-         style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-         id="tspan3029">(RCU Might Be OK...)</tspan></text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="7875"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text30"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">(1) Provide Existence Guarantees For Update-Friendly Mechanisms</text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="8325"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text32"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">(2) Provide Wait-Free Read-Side Primitives for Real-Time Use)</text>
-    <!-- Text -->
-    <text
-       xml:space="preserve"
-       x="7200"
-       y="7425"
-       font-style="normal"
-       font-weight="normal"
-       font-size="324"
-       id="text34"
-       style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
-       sodipodi:linespacing="125%">(RCU is Very Unlikely to be the Right Tool For The Job, But it Can:</text>
-  </g>
-</svg>

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index a725f99..e7e24b3 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html

@@ -1,5 +1,3 @@
-<!-- DO NOT HAND EDIT. -->
-<!-- Instead, edit Documentation/RCU/Design/Requirements/Requirements.htmlx and run 'sh htmlqqz.sh Documentation/RCU/Design/Requirements/Requirements' -->
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
         "http://www.w3.org/TR/html4/loose.dtd">
         <html>
@@ -65,8 +63,8 @@
 
 <p>
 This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+however, the answers to each quick quiz immediately follows the quiz.
+Select the big white space with your mouse to see the answer.
 
 <h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
 
@@ -153,13 +151,27 @@
 </blockquote>
 cannot happen.
 
-<p><a name="Quick Quiz 1"><b>Quick Quiz 1</b>:</a>
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<br><a href="#qq1answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Wait a minute!
+	You said that updaters can make useful forward progress concurrently
+	with readers, but pre-existing readers will block
+	<tt>synchronize_rcu()</tt>!!!
+	Just who are you trying to fool???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	First, if updaters do not wish to be blocked by readers, they can use
+	<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+	be discussed later.
+	Second, even when using <tt>synchronize_rcu()</tt>, the other
+	update-side code does run concurrently with readers, whether
+	pre-existing or not.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 This scenario resembles one of the first uses of RCU in
@@ -210,9 +222,20 @@
 with <tt>recovery()</tt>, but with little or no synchronization
 overhead in <tt>do_something_dlm()</tt>.
 
-<p><a name="Quick Quiz 2"><b>Quick Quiz 2</b>:</a>
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-<br><a href="#qq2answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Without that extra grace period, memory reordering could result in
+	<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+	concurrently with the last bits of <tt>recovery()</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 In order to avoid fatal problems such as deadlocks,
@@ -332,12 +355,27 @@
 optimizations, for example, the use of <tt>gp</tt> as a scratch
 location immediately preceding the assignment.
 
-<p><a name="Quick Quiz 3"><b>Quick Quiz 3</b>:</a>
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-<br><a href="#qq3answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+	two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+	from being reordered.
+	Can't that also cause problems?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	No, it cannot.
+	The readers cannot see either of these two fields until
+	the assignment to <tt>gp</tt>, by which time both fields are
+	fully initialized.
+	So reordering the assignments
+	to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+	cause any problems.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 It is tempting to assume that the reader need not do anything special
@@ -494,11 +532,42 @@
 	code protected by the corresponding update-side lock.
 </ol>
 
-<p><a name="Quick Quiz 4"><b>Quick Quiz 4</b>:</a>
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<br><a href="#qq4answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Without the <tt>rcu_dereference()</tt> or the
+	<tt>rcu_access_pointer()</tt>, what destructive optimizations
+	might the compiler make use of?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Let's start with what happens to <tt>do_something_gp()</tt>
+	if it fails to use <tt>rcu_dereference()</tt>.
+	It could reuse a value formerly fetched from this same pointer.
+	It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+	manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+	mash-up of two distince pointer values.
+	It might even use value-speculation optimizations, where it makes
+	a wrong guess, but by the time it gets around to checking the
+	value, an update has changed the pointer to match the wrong guess.
+	Too bad about any dereferences that returned pre-initialization garbage
+	in the meantime!
+	</font>
+
+	<p><font color="ffffff">
+	For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+	to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+	the above optimizations are harmless.
+	However,
+	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+	<tt>sparse</tt> will complain if you
+	define <tt>gp</tt> with <tt>__rcu</tt> and then
+	access it without using
+	either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 In short, RCU's publish-subscribe guarantee is provided by the combination
@@ -571,17 +640,156 @@
 	<tt>synchronize_rcu()</tt> migrates in the meantime.
 </ol>
 
-<p><a name="Quick Quiz 5"><b>Quick Quiz 5</b>:</a>
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<br><a href="#qq5answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Given that multiple CPUs can start RCU read-side critical sections
+	at any time without any ordering whatsoever, how can RCU possibly
+	tell whether or not a given RCU read-side critical section starts
+	before a given instance of <tt>synchronize_rcu()</tt>?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	If RCU cannot tell whether or not a given
+	RCU read-side critical section starts before a
+	given instance of <tt>synchronize_rcu()</tt>,
+	then it must assume that the RCU read-side critical section
+	started first.
+	In other words, a given instance of <tt>synchronize_rcu()</tt>
+	can avoid waiting on a given RCU read-side critical section only
+	if it can prove that <tt>synchronize_rcu()</tt> started first.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
-<p><a name="Quick Quiz 6"><b>Quick Quiz 6</b>:</a>
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<br><a href="#qq6answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	The first and second guarantees require unbelievably strict ordering!
+	Are all these memory barriers <i> really</i> required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Yes, they really are required.
+	To see why the first guarantee is required, consider the following
+	sequence of events:
+	</font>
+
+	<ol>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>rcu_read_lock()</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>q = rcu_dereference(gp);
+		/* Very likely to return p. */</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>list_del_rcu(p);</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>synchronize_rcu()</tt> starts.
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>do_something_with(q-&gt;a);
+		/* No smp_mb(), so might happen after kfree(). */</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>rcu_read_unlock()</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>synchronize_rcu()</tt> returns.
+		</font>
+	<li>	<font color="ffffff">
+		CPU 0: <tt>kfree(p);</tt>
+		</font>
+	</ol>
+
+	<p><font color="ffffff">
+	Therefore, there absolutely must be a full memory barrier between the
+	end of the RCU read-side critical section and the end of the
+	grace period.
+	</font>
+
+	<p><font color="ffffff">
+	The sequence of events demonstrating the necessity of the second rule
+	is roughly similar:
+	</font>
+
+	<ol>
+	<li>	<font color="ffffff">CPU 0: <tt>list_del_rcu(p);</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> starts.
+		</font>
+	<li>	<font color="ffffff">CPU 1: <tt>rcu_read_lock()</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 1: <tt>q = rcu_dereference(gp);
+		/* Might return p if no memory barrier. */</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> returns.
+		</font>
+	<li>	<font color="ffffff">CPU 0: <tt>kfree(p);</tt>
+		</font>
+	<li>	<font color="ffffff">
+		CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+		</font>
+	<li>	<font color="ffffff">CPU 1: <tt>rcu_read_unlock()</tt>
+		</font>
+	</ol>
+
+	<p><font color="ffffff">
+	And similarly, without a memory barrier between the beginning of the
+	grace period and the beginning of the RCU read-side critical section,
+	CPU&nbsp;1 might end up accessing the freelist.
+	</font>
+
+	<p><font color="ffffff">
+	The &ldquo;as if&rdquo; rule of course applies, so that any
+	implementation that acts as if the appropriate memory barriers
+	were in place is a correct implementation.
+	That said, it is much easier to fool yourself into believing
+	that you have adhered to the as-if rule than it is to actually
+	adhere to it!
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	You claim that <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+	generate absolutely no code in some kernel builds.
+	This means that the compiler might arbitrarily rearrange consecutive
+	RCU read-side critical sections.
+	Given such rearrangement, if a given RCU read-side critical section
+	is done, how can you be sure that all prior RCU read-side critical
+	sections are done?
+	Won't the compiler rearrangements make that impossible to determine?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In cases where <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+	generate absolutely no code, RCU infers quiescent states only at
+	special locations, for example, within the scheduler.
+	Because calls to <tt>schedule()</tt> had better prevent calling-code
+	accesses to shared variables from being rearranged across the call to
+	<tt>schedule()</tt>, if RCU detects the end of a given RCU read-side
+	critical section, it will necessarily detect the end of all prior
+	RCU read-side critical sections, no matter how aggressively the
+	compiler scrambles the code.
+	</font>
+
+	<p><font color="ffffff">
+	Again, this all assumes that the compiler cannot scramble code across
+	calls to the scheduler, out of interrupt handlers, into the idle loop,
+	into user-mode code, and so on.
+	But if your kernel build allows that sort of scrambling, you have broken
+	far more than just RCU!
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 Note that these memory-barrier requirements do not replace the fundamental
@@ -626,9 +834,19 @@
 <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
 described later in this document.
 
-<p><a name="Quick Quiz 7"><b>Quick Quiz 7</b>:</a>
-But how does the upgrade-to-write operation exclude other readers?
-<br><a href="#qq7answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	But how does the upgrade-to-write operation exclude other readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	It doesn't, just like normal RCU updates, which also do not exclude
+	RCU readers.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 This guarantee allows lookup code to be shared between read-side
@@ -714,9 +932,20 @@
 This is by design:  Any significant ordering constraints would slow down
 these fast-path APIs.
 
-<p><a name="Quick Quiz 8"><b>Quick Quiz 8</b>:</a>
-Can't the compiler also reorder this code?
-<br><a href="#qq8answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Can't the compiler also reorder this code?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	No, the volatile casts in <tt>READ_ONCE()</tt> and
+	<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+	this particular case.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
 
@@ -769,10 +998,28 @@
 starts, and <tt>synchronize_rcu()</tt> is under no
 obligation to wait for these new readers.
 
-<p><a name="Quick Quiz 9"><b>Quick Quiz 9</b>:</a>
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<br><a href="#qq9answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Suppose that synchronize_rcu() did wait until <i>all</i>
+	readers had completed instead of waiting only on
+	pre-existing readers.
+	For how long would the updater be able to rely on there
+	being no readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	For no time at all.
+	Even if <tt>synchronize_rcu()</tt> were to wait until
+	all readers had completed, a new reader might start immediately after
+	<tt>synchronize_rcu()</tt> completed.
+	Therefore, the code following
+	<tt>synchronize_rcu()</tt> can <i>never</i> rely on there being
+	no readers.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
 Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
@@ -969,11 +1216,24 @@
 As a result, an RCU read-side critical section cannot partition a pair
 of RCU grace periods.
 
-<p><a name="Quick Quiz 10"><b>Quick Quiz 10</b>:</a>
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<br><a href="#qq10answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	How long a sequence of grace periods, each separated by an RCU
+	read-side critical section, would be required to partition the RCU
+	read-side critical sections at the beginning and end of the chain?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In theory, an infinite number.
+	In practice, an unknown number that is sensitive to both implementation
+	details and timing considerations.
+	Therefore, even in practice, RCU users must abide by the
+	theoretical rather than the practical answer.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <h3><a name="Disabling Preemption Does Not Block Grace Periods">
 Disabling Preemption Does Not Block Grace Periods</a></h3>
@@ -1109,12 +1369,27 @@
 <h3><a name="Specialization">Specialization</a></h3>
 
 <p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
+RCU is and always has been intended primarily for read-mostly situations,
+which means that RCU's read-side primitives are optimized, often at the
 expense of its update-side primitives.
+Experience thus far is captured by the following list of situations:
 
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+<ol>
+<li>	Read-mostly data, where stale and inconsistent data is not
+	a problem:   RCU works great!
+<li>	Read-mostly data, where data must be consistent:
+	RCU works well.
+<li>	Read-write data, where data must be consistent:
+	RCU <i>might</i> work OK.
+	Or not.
+<li>	Write-mostly data, where data must be consistent:
+	RCU is very unlikely to be the right tool for the job,
+	with the following exceptions, where RCU can provide:
+	<ol type=a>
+	<li>	Existence guarantees for update-friendly mechanisms.
+	<li>	Wait-free read-side primitives for real-time use.
+	</ol>
+</ol>
 
 <p>
 This focus on read-mostly situations means that RCU must interoperate
@@ -1127,9 +1402,43 @@
 including spinlocks, sequence locks, atomic operations, reference
 counters, and memory barriers.
 
-<p><a name="Quick Quiz 11"><b>Quick Quiz 11</b>:</a>
-What about sleeping locks?
-<br><a href="#qq11answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	What about sleeping locks?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	These are forbidden within Linux-kernel RCU read-side critical
+	sections because it is not legal to place a quiescent state
+	(in this case, voluntary context switch) within an RCU read-side
+	critical section.
+	However, sleeping locks may be used within userspace RCU read-side
+	critical sections, and also within Linux-kernel sleepable RCU
+	<a href="#Sleepable RCU"><font color="ffffff">(SRCU)</font></a>
+	read-side critical sections.
+	In addition, the -rt patchset turns spinlocks into a
+	sleeping locks so that the corresponding critical sections
+	can be preempted, which also means that these sleeplockified
+	spinlocks (but not other sleeping locks!)  may be acquire within
+	-rt-Linux-kernel RCU read-side critical sections.
+	</font>
+
+	<p><font color="ffffff">
+	Note that it <i>is</i> legal for a normal RCU read-side
+	critical section to conditionally acquire a sleeping locks
+	(as in <tt>mutex_trylock()</tt>), but only as long as it does
+	not loop indefinitely attempting to conditionally acquire that
+	sleeping locks.
+	The key point is that things like <tt>mutex_trylock()</tt>
+	either return with the mutex held, or return an error indication if
+	the mutex was not immediately available.
+	Either way, <tt>mutex_trylock()</tt> returns immediately without
+	sleeping.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 It often comes as a surprise that many algorithms do not require a
@@ -1160,10 +1469,7 @@
 One of our pair of veternarians might wait 30 seconds before pronouncing
 the cat dead, while the other might insist on waiting a full minute.
 The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+the final 30 seconds of the minute following the last heartbeat.
 
 <p>
 Interestingly enough, this same situation applies to hardware.
@@ -1343,7 +1649,8 @@
 <tt>synchronize_rcu_expedited()</tt> would be legal,
 including within preempt-disable code, <tt>local_bh_disable()</tt> code,
 interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers
+and from idle and offline CPUs.
 The callback function (<tt>remove_gp_cb()</tt> in this case) will be
 executed within softirq (software interrupt) environment within the
 Linux kernel,
@@ -1354,12 +1661,27 @@
 Long-running operations should be relegated to separate threads or
 (in the Linux kernel) workqueues.
 
-<p><a name="Quick Quiz 12"><b>Quick Quiz 12</b>:</a>
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<br><a href="#qq12answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+	After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+	structure, which would interact badly with concurrent insertions.
+	Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+	any changes, including any insertions that <tt>rcu_dereference()</tt>
+	would protect against.
+	Therefore, any insertions will be delayed until after
+	<tt>-&gt;gp_lock</tt>
+	is released on line&nbsp;25, which in turn means that
+	<tt>rcu_access_pointer()</tt> suffices.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 However, all that <tt>remove_gp_cb()</tt> is doing is
@@ -1406,14 +1728,31 @@
 so the very few places that needed something like
 <tt>synchronize_rcu()</tt> simply open-coded it.
 
-<p><a name="Quick Quiz 13"><b>Quick Quiz 13</b>:</a>
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<br><a href="#qq13answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	Earlier it was claimed that <tt>call_rcu()</tt> and
+	<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+	by readers.
+	But how can that be correct, given that the invocation of the callback
+	and the freeing of the memory (respectively) must still wait for
+	a grace period to elapse?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	We could define things this way, but keep in mind that this sort of
+	definition would say that updates in garbage-collected languages
+	cannot complete until the next time the garbage collector runs,
+	which does not seem at all reasonable.
+	The key point is that in most cases, an updater using either
+	<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+	next update as soon as it has invoked <tt>call_rcu()</tt> or
+	<tt>kfree_rcu()</tt>, without having to wait for a subsequent
+	grace period.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 But what if the updater must wait for the completion of code to be
@@ -1838,11 +2177,26 @@
 Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
 initialization can result in deadlock.
 
-<p><a name="Quick Quiz 14"><b>Quick Quiz 14</b>:</a>
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<br><a href="#qq14answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	So what happens with <tt>synchronize_rcu()</tt> during
+	scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+	kernels?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+	maps directly to <tt>synchronize_sched()</tt>.
+	Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+	boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+	However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+	so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+	during scheduler initialization.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
 
 <p>
 I learned of these boot-time requirements as a result of a series of
@@ -2171,6 +2525,14 @@
 also simplified handling of a number of race conditions.
 
 <p>
+RCU must avoid degrading real-time response for CPU-bound threads, whether
+executing in usermode (which is one use case for
+<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
+That said, CPU-bound loops in the kernel must execute
+<tt>cond_resched_rcu_qs()</tt> at least once per few tens of milliseconds
+in order to avoid receiving an IPI from RCU.
+
+<p>
 Finally, RCU's status as a synchronization primitive means that
 any RCU failure can result in arbitrary memory corruption that can be
 extremely difficult to debug.
@@ -2223,6 +2585,8 @@
 <li>	<a href="#Sched Flavor">Sched Flavor</a>
 <li>	<a href="#Sleepable RCU">Sleepable RCU</a>
 <li>	<a href="#Tasks RCU">Tasks RCU</a>
+<li>	<a href="#Waiting for Multiple Grace Periods">
+	Waiting for Multiple Grace Periods</a>
 </ol>
 
 <h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
@@ -2472,6 +2836,94 @@
 <tt>synchronize_rcu_tasks()</tt>, and
 <tt>rcu_barrier_tasks()</tt>.
 
+<h3><a name="Waiting for Multiple Grace Periods">
+Waiting for Multiple Grace Periods</a></h3>
+
+<p>
+Perhaps you have an RCU protected data structure that is accessed from
+RCU read-side critical sections, from softirq handlers, and from
+hardware interrupt handlers.
+That is three flavors of RCU, the normal flavor, the bottom-half flavor,
+and the sched flavor.
+How to wait for a compound grace period?
+
+<p>
+The best approach is usually to &ldquo;just say no!&rdquo; and
+insert <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+around each RCU read-side critical section, regardless of what
+environment it happens to be in.
+But suppose that some of the RCU read-side critical sections are
+on extremely hot code paths, and that use of <tt>CONFIG_PREEMPT=n</tt>
+is not a viable option, so that <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are not free.
+What then?
+
+<p>
+You <i>could</i> wait on all three grace periods in succession, as follows:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu();
+ 2 synchronize_rcu_bh();
+ 3 synchronize_sched();
+</pre>
+</blockquote>
+
+<p>
+This works, but triples the update-side latency penalty.
+In cases where this is not acceptable, <tt>synchronize_rcu_mult()</tt>
+may be used to wait on all three flavors of grace period concurrently:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched);
+</pre>
+</blockquote>
+
+<p>
+But what if it is necessary to also wait on SRCU?
+This can be done as follows:
+
+<blockquote>
+<pre>
+ 1 static void call_my_srcu(struct rcu_head *head,
+ 2        void (*func)(struct rcu_head *head))
+ 3 {
+ 4   call_srcu(&amp;my_srcu, head, func);
+ 5 }
+ 6
+ 7 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched, call_my_srcu);
+</pre>
+</blockquote>
+
+<p>
+If you needed to wait on multiple different flavors of SRCU
+(but why???), you would need to create a wrapper function resembling
+<tt>call_my_srcu()</tt> for each SRCU flavor.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+	But what if I need to wait for multiple RCU flavors, but I also need
+	the grace periods to be expedited?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+	If you are using expedited grace periods, there should be less penalty
+	for waiting on them in succession.
+	But if that is nevertheless a problem, you can use workqueues
+	or multiple kthreads to wait on the various expedited grace
+	periods concurrently.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>
+Again, it is usually better to adjust the RCU read-side critical sections
+to use a single flavor of RCU, but when this is not feasible, you can use
+<tt>synchronize_rcu_mult()</tt>.
+
 <h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
 
 <p>
@@ -2569,329 +3021,4 @@
 under the terms of the Creative Commons Attribution-Share Alike 3.0
 United States license.
 
-<h3><a name="Answers to Quick Quizzes">
-Answers to Quick Quizzes</a></h3>
-
-<a name="qq1answer"></a>
-<p><b>Quick Quiz 1</b>:
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-
-
-</p><p><b>Answer</b>:
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-
-
-</p><p><a href="#Quick%20Quiz%201"><b>Back to Quick Quiz 1</b>.</a>
-
-<a name="qq2answer"></a>
-<p><b>Quick Quiz 2</b>:
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-
-
-</p><p><b>Answer</b>:
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%202"><b>Back to Quick Quiz 2</b>.</a>
-
-<a name="qq3answer"></a>
-<p><b>Quick Quiz 3</b>:
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-
-
-</p><p><b>Answer</b>:
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-cause any problems.
-
-
-</p><p><a href="#Quick%20Quiz%203"><b>Back to Quick Quiz 3</b>.</a>
-
-<a name="qq4answer"></a>
-<p><b>Quick Quiz 4</b>:
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-
-
-</p><p><b>Answer</b>:
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%204"><b>Back to Quick Quiz 4</b>.</a>
-
-<a name="qq5answer"></a>
-<p><b>Quick Quiz 5</b>:
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-
-
-</p><p><b>Answer</b>:
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-
-
-</p><p><a href="#Quick%20Quiz%205"><b>Back to Quick Quiz 5</b>.</a>
-
-<a name="qq6answer"></a>
-<p><b>Quick Quiz 6</b>:
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-
-
-</p><p><b>Answer</b>:
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Very likely to return p. */</tt>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>do_something_with(q-&gt;a);
-	/* No smp_mb(), so might happen after kfree(). */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Might return p if no memory barrier. */</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-<li>	CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU&nbsp;1 might end up accessing the freelist.
-
-<p>
-The &ldquo;as if&rdquo; rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-
-
-</p><p><a href="#Quick%20Quiz%206"><b>Back to Quick Quiz 6</b>.</a>
-
-<a name="qq7answer"></a>
-<p><b>Quick Quiz 7</b>:
-But how does the upgrade-to-write operation exclude other readers?
-
-
-</p><p><b>Answer</b>:
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-
-
-</p><p><a href="#Quick%20Quiz%207"><b>Back to Quick Quiz 7</b>.</a>
-
-<a name="qq8answer"></a>
-<p><b>Quick Quiz 8</b>:
-Can't the compiler also reorder this code?
-
-
-</p><p><b>Answer</b>:
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-
-
-</p><p><a href="#Quick%20Quiz%208"><b>Back to Quick Quiz 8</b>.</a>
-
-<a name="qq9answer"></a>
-<p><b>Quick Quiz 9</b>:
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-
-
-</p><p><b>Answer</b>:
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-
-
-</p><p><a href="#Quick%20Quiz%209"><b>Back to Quick Quiz 9</b>.</a>
-
-<a name="qq10answer"></a>
-<p><b>Quick Quiz 10</b>:
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-
-
-</p><p><b>Answer</b>:
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-
-
-</p><p><a href="#Quick%20Quiz%2010"><b>Back to Quick Quiz 10</b>.</a>
-
-<a name="qq11answer"></a>
-<p><b>Quick Quiz 11</b>:
-What about sleeping locks?
-
-
-</p><p><b>Answer</b>:
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-
-
-</p><p><a href="#Quick%20Quiz%2011"><b>Back to Quick Quiz 11</b>.</a>
-
-<a name="qq12answer"></a>
-<p><b>Quick Quiz 12</b>:
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-
-
-</p><p><b>Answer</b>:
-Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
-is released on line&nbsp;25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-
-
-</p><p><a href="#Quick%20Quiz%2012"><b>Back to Quick Quiz 12</b>.</a>
-
-<a name="qq13answer"></a>
-<p><b>Quick Quiz 13</b>:
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-
-
-</p><p><b>Answer</b>:
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-
-
-</p><p><a href="#Quick%20Quiz%2013"><b>Back to Quick Quiz 13</b>.</a>
-
-<a name="qq14answer"></a>
-<p><b>Quick Quiz 14</b>:
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-
-
-</p><p><b>Answer</b>:
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-
-
-</p><p><a href="#Quick%20Quiz%2014"><b>Back to Quick Quiz 14</b>.</a>
-
-
 </body></html>

diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx
deleted file mode 100644
index 3a97ba4..0000000
--- a/Documentation/RCU/Design/Requirements/Requirements.htmlx
+++ /dev/null

@@ -1,2741 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
-        "http://www.w3.org/TR/html4/loose.dtd">
-        <html>
-        <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
-        <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-
-<h1>A Tour Through RCU's Requirements</h1>
-
-<p>Copyright IBM Corporation, 2015</p>
-<p>Author: Paul E.&nbsp;McKenney</p>
-<p><i>The initial version of this document appeared in the
-<a href="https://lwn.net/">LWN</a> articles
-<a href="https://lwn.net/Articles/652156/">here</a>,
-<a href="https://lwn.net/Articles/652677/">here</a>, and
-<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
-
-<h2>Introduction</h2>
-
-<p>
-Read-copy update (RCU) is a synchronization mechanism that is often
-used as a replacement for reader-writer locking.
-RCU is unusual in that updaters do not block readers,
-which means that RCU's read-side primitives can be exceedingly fast
-and scalable.
-In addition, updaters can make useful forward progress concurrently
-with readers.
-However, all this concurrency between RCU readers and updaters does raise
-the question of exactly what RCU readers are doing, which in turn
-raises the question of exactly what RCU's requirements are.
-
-<p>
-This document therefore summarizes RCU's requirements, and can be thought
-of as an informal, high-level specification for RCU.
-It is important to understand that RCU's specification is primarily
-empirical in nature;
-in fact, I learned about many of these requirements the hard way.
-This situation might cause some consternation, however, not only
-has this learning process been a lot of fun, but it has also been
-a great privilege to work with so many people willing to apply
-technologies in interesting new ways.
-
-<p>
-All that aside, here are the categories of currently known RCU requirements:
-</p>
-
-<ol>
-<li>	<a href="#Fundamental Requirements">
-	Fundamental Requirements</a>
-<li>	<a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
-<li>	<a href="#Parallelism Facts of Life">
-	Parallelism Facts of Life</a>
-<li>	<a href="#Quality-of-Implementation Requirements">
-	Quality-of-Implementation Requirements</a>
-<li>	<a href="#Linux Kernel Complications">
-	Linux Kernel Complications</a>
-<li>	<a href="#Software-Engineering Requirements">
-	Software-Engineering Requirements</a>
-<li>	<a href="#Other RCU Flavors">
-	Other RCU Flavors</a>
-<li>	<a href="#Possible Future Changes">
-	Possible Future Changes</a>
-</ol>
-
-<p>
-This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
-
-<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
-
-<p>
-RCU's fundamental requirements are the closest thing RCU has to hard
-mathematical requirements.
-These are:
-
-<ol>
-<li>	<a href="#Grace-Period Guarantee">
-	Grace-Period Guarantee</a>
-<li>	<a href="#Publish-Subscribe Guarantee">
-	Publish-Subscribe Guarantee</a>
-<li>	<a href="#Memory-Barrier Guarantees">
-	Memory-Barrier Guarantees</a>
-<li>	<a href="#RCU Primitives Guaranteed to Execute Unconditionally">
-	RCU Primitives Guaranteed to Execute Unconditionally</a>
-<li>	<a href="#Guaranteed Read-to-Write Upgrade">
-	Guaranteed Read-to-Write Upgrade</a>
-</ol>
-
-<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
-
-<p>
-RCU's grace-period guarantee is unusual in being premeditated:
-Jack Slingwine and I had this guarantee firmly in mind when we started
-work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
-That said, the past two decades of experience with RCU have produced
-a much more detailed understanding of this guarantee.
-
-<p>
-RCU's grace-period guarantee allows updaters to wait for the completion
-of all pre-existing RCU read-side critical sections.
-An RCU read-side critical section
-begins with the marker <tt>rcu_read_lock()</tt> and ends with
-the marker <tt>rcu_read_unlock()</tt>.
-These markers may be nested, and RCU treats a nested set as one
-big RCU read-side critical section.
-Production-quality implementations of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
-fact have exactly zero overhead in Linux kernels built for production
-use with <tt>CONFIG_PREEMPT=n</tt>.
-
-<p>
-This guarantee allows ordering to be enforced with extremely low
-overhead to readers, for example:
-
-<blockquote>
-<pre>
- 1 int x, y;
- 2
- 3 void thread0(void)
- 4 {
- 5   rcu_read_lock();
- 6   r1 = READ_ONCE(x);
- 7   r2 = READ_ONCE(y);
- 8   rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13   WRITE_ONCE(x, 1);
-14   synchronize_rcu();
-15   WRITE_ONCE(y, 1);
-16 }
-</pre>
-</blockquote>
-
-<p>
-Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
-all pre-existing readers, any instance of <tt>thread0()</tt> that
-loads a value of zero from <tt>x</tt> must complete before
-<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
-also load a value of zero from <tt>y</tt>.
-Similarly, any instance of <tt>thread0()</tt> that loads a value of
-one from <tt>y</tt> must have started after the
-<tt>synchronize_rcu()</tt> started, and must therefore also load
-a value of one from <tt>x</tt>.
-Therefore, the outcome:
-<blockquote>
-<pre>
-(r1 == 0 &amp;&amp; r2 == 1)
-</pre>
-</blockquote>
-cannot happen.
-
-<p>@@QQ@@
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<p>@@QQA@@
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-<p>@@QQE@@
-
-<p>
-This scenario resembles one of the first uses of RCU in
-<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
-which managed a distributed lock manager's transition into
-a state suitable for handling recovery from node failure,
-more or less as follows:
-
-<blockquote>
-<pre>
- 1 #define STATE_NORMAL        0
- 2 #define STATE_WANT_RECOVERY 1
- 3 #define STATE_RECOVERING    2
- 4 #define STATE_WANT_NORMAL   3
- 5
- 6 int state = STATE_NORMAL;
- 7
- 8 void do_something_dlm(void)
- 9 {
-10   int state_snap;
-11
-12   rcu_read_lock();
-13   state_snap = READ_ONCE(state);
-14   if (state_snap == STATE_NORMAL)
-15     do_something();
-16   else
-17     do_something_carefully();
-18   rcu_read_unlock();
-19 }
-20
-21 void start_recovery(void)
-22 {
-23   WRITE_ONCE(state, STATE_WANT_RECOVERY);
-24   synchronize_rcu();
-25   WRITE_ONCE(state, STATE_RECOVERING);
-26   recovery();
-27   WRITE_ONCE(state, STATE_WANT_NORMAL);
-28   synchronize_rcu();
-29   WRITE_ONCE(state, STATE_NORMAL);
-30 }
-</pre>
-</blockquote>
-
-<p>
-The RCU read-side critical section in <tt>do_something_dlm()</tt>
-works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
-to guarantee that <tt>do_something()</tt> never runs concurrently
-with <tt>recovery()</tt>, but with little or no synchronization
-overhead in <tt>do_something_dlm()</tt>.
-
-<p>@@QQ@@
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-<p>@@QQA@@
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-<p>@@QQE@@
-
-<p>
-In order to avoid fatal problems such as deadlocks,
-an RCU read-side critical section must not contain calls to
-<tt>synchronize_rcu()</tt>.
-Similarly, an RCU read-side critical section must not
-contain anything that waits, directly or indirectly, on completion of
-an invocation of <tt>synchronize_rcu()</tt>.
-
-<p>
-Although RCU's grace-period guarantee is useful in and of itself, with
-<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
-it would be good to be able to use RCU to coordinate read-side
-access to linked data structures.
-For this, the grace-period guarantee is not sufficient, as can
-be seen in function <tt>add_gp_buggy()</tt> below.
-We will look at the reader's code later, but in the meantime, just think of
-the reader as locklessly picking up the <tt>gp</tt> pointer,
-and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
-<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   p-&gt;a = a;
-12   p-&gt;b = a;
-13   gp = p; /* ORDERING BUG */
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The problem is that both the compiler and weakly ordered CPUs are within
-their rights to reorder this code as follows:
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy_optimized(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-<b>11   gp = p; /* ORDERING BUG */
-12   p-&gt;a = a;
-13   p-&gt;b = a;</b>
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-If an RCU reader fetches <tt>gp</tt> just after
-<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
-it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
-fields.
-And this is but one of many ways in which compiler and hardware optimizations
-could cause trouble.
-Therefore, we clearly need some way to prevent the compiler and the CPU from
-reordering in this manner, which brings us to the publish-subscribe
-guarantee discussed in the next section.
-
-<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
-
-<p>
-RCU's publish-subscribe guarantee allows data to be inserted
-into a linked data structure without disrupting RCU readers.
-The updater uses <tt>rcu_assign_pointer()</tt> to insert the
-new data, and readers use <tt>rcu_dereference()</tt> to
-access data, whether new or old.
-The following shows an example of insertion:
-
-<blockquote>
-<pre>
- 1 bool add_gp(int a, int b)
- 2 {
- 3   p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4   if (!p)
- 5     return -ENOMEM;
- 6   spin_lock(&amp;gp_lock);
- 7   if (rcu_access_pointer(gp)) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   p-&gt;a = a;
-12   p-&gt;b = a;
-13   rcu_assign_pointer(gp, p);
-14   spin_unlock(&amp;gp_lock);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
-equivalent to a simple assignment statement, but also guarantees
-that its assignment will
-happen after the two assignments in lines&nbsp;11 and&nbsp;12,
-similar to the C11 <tt>memory_order_release</tt> store operation.
-It also prevents any number of &ldquo;interesting&rdquo; compiler
-optimizations, for example, the use of <tt>gp</tt> as a scratch
-location immediately preceding the assignment.
-
-<p>@@QQ@@
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-<p>@@QQA@@
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-cause any problems.
-<p>@@QQE@@
-
-<p>
-It is tempting to assume that the reader need not do anything special
-to control its accesses to the RCU-protected data,
-as shown in <tt>do_something_gp_buggy()</tt> below:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy(void)
- 2 {
- 3   rcu_read_lock();
- 4   p = gp;  /* OPTIMIZATIONS GALORE!!! */
- 5   if (p) {
- 6     do_something(p-&gt;a, p-&gt;b);
- 7     rcu_read_unlock();
- 8     return true;
- 9   }
-10   rcu_read_unlock();
-11   return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-However, this temptation must be resisted because there are a
-surprisingly large number of ways that the compiler
-(to say nothing of
-<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
-can trip this code up.
-For but one example, if the compiler were short of registers, it
-might choose to refetch from <tt>gp</tt> rather than keeping
-a separate copy in <tt>p</tt> as follows:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy_optimized(void)
- 2 {
- 3   rcu_read_lock();
- 4   if (gp) { /* OPTIMIZATIONS GALORE!!! */
-<b> 5     do_something(gp-&gt;a, gp-&gt;b);</b>
- 6     rcu_read_unlock();
- 7     return true;
- 8   }
- 9   rcu_read_unlock();
-10   return false;
-11 }
-</pre>
-</blockquote>
-
-<p>
-If this function ran concurrently with a series of updates that
-replaced the current structure with a new one,
-the fetches of <tt>gp-&gt;a</tt>
-and <tt>gp-&gt;b</tt> might well come from two different structures,
-which could cause serious confusion.
-To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
-<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp(void)
- 2 {
- 3   rcu_read_lock();
- 4   p = rcu_dereference(gp);
- 5   if (p) {
- 6     do_something(p-&gt;a, p-&gt;b);
- 7     rcu_read_unlock();
- 8     return true;
- 9   }
-10   rcu_read_unlock();
-11   return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
-memory barriers in the Linux kernel.
-Should a
-<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
-ever appear, then <tt>rcu_dereference()</tt> could be implemented
-as a <tt>memory_order_consume</tt> load.
-Regardless of the exact implementation, a pointer fetched by
-<tt>rcu_dereference()</tt> may not be used outside of the
-outermost RCU read-side critical section containing that
-<tt>rcu_dereference()</tt>, unless protection of
-the corresponding data element has been passed from RCU to some
-other synchronization mechanism, most commonly locking or
-<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
-
-<p>
-In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
-use <tt>rcu_dereference()</tt>, and these two RCU API elements
-work together to ensure that readers have a consistent view of
-newly added data elements.
-
-<p>
-Of course, it is also necessary to remove elements from RCU-protected
-data structures, for example, using the following process:
-
-<ol>
-<li>	Remove the data element from the enclosing structure.
-<li>	Wait for all pre-existing RCU read-side critical sections
-	to complete (because only pre-existing readers can possibly have
-	a reference to the newly removed data element).
-<li>	At this point, only the updater has a reference to the
-	newly removed data element, so it can safely reclaim
-	the data element, for example, by passing it to <tt>kfree()</tt>.
-</ol>
-
-This process is implemented by <tt>remove_gp_synchronous()</tt>:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_synchronous(void)
- 2 {
- 3   struct foo *p;
- 4
- 5   spin_lock(&amp;gp_lock);
- 6   p = rcu_access_pointer(gp);
- 7   if (!p) {
- 8     spin_unlock(&amp;gp_lock);
- 9     return false;
-10   }
-11   rcu_assign_pointer(gp, NULL);
-12   spin_unlock(&amp;gp_lock);
-13   synchronize_rcu();
-14   kfree(p);
-15   return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-This function is straightforward, with line&nbsp;13 waiting for a grace
-period before line&nbsp;14 frees the old data element.
-This waiting ensures that readers will reach line&nbsp;7 of
-<tt>do_something_gp()</tt> before the data element referenced by
-<tt>p</tt> is freed.
-The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
-<tt>rcu_dereference()</tt>, except that:
-
-<ol>
-<li>	The value returned by <tt>rcu_access_pointer()</tt>
-	cannot be dereferenced.
-	If you want to access the value pointed to as well as
-	the pointer itself, use <tt>rcu_dereference()</tt>
-	instead of <tt>rcu_access_pointer()</tt>.
-<li>	The call to <tt>rcu_access_pointer()</tt> need not be
-	protected.
-	In contrast, <tt>rcu_dereference()</tt> must either be
-	within an RCU read-side critical section or in a code
-	segment where the pointer cannot change, for example, in
-	code protected by the corresponding update-side lock.
-</ol>
-
-<p>@@QQ@@
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<p>@@QQA@@
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-<p>@@QQE@@
-
-<p>
-In short, RCU's publish-subscribe guarantee is provided by the combination
-of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
-This guarantee allows data elements to be safely added to RCU-protected
-linked data structures without disrupting RCU readers.
-This guarantee can be used in combination with the grace-period
-guarantee to also allow data elements to be removed from RCU-protected
-linked data structures, again without disrupting RCU readers.
-
-<p>
-This guarantee was only partially premeditated.
-DYNIX/ptx used an explicit memory barrier for publication, but had nothing
-resembling <tt>rcu_dereference()</tt> for subscription, nor did it
-have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt>.
-The need for these operations made itself known quite suddenly at a
-late-1990s meeting with the DEC Alpha architects, back in the days when
-DEC was still a free-standing company.
-It took the Alpha architects a good hour to convince me that any sort
-of barrier would ever be needed, and it then took me a good <i>two</i> hours
-to convince them that their documentation did not make this point clear.
-More recent work with the C and C++ standards committees have provided
-much education on tricks and traps from the compiler.
-In short, compilers were much less tricky in the early 1990s, but in
-2015, don't even think about omitting <tt>rcu_dereference()</tt>!
-
-<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
-
-<p>
-The previous section's simple linked-data-structure scenario clearly
-demonstrates the need for RCU's stringent memory-ordering guarantees on
-systems with more than one CPU:
-
-<ol>
-<li>	Each CPU that has an RCU read-side critical section that
-	begins before <tt>synchronize_rcu()</tt> starts is
-	guaranteed to execute a full memory barrier between the time
-	that the RCU read-side critical section ends and the time that
-	<tt>synchronize_rcu()</tt> returns.
-	Without this guarantee, a pre-existing RCU read-side critical section
-	might hold a reference to the newly removed <tt>struct foo</tt>
-	after the <tt>kfree()</tt> on line&nbsp;14 of
-	<tt>remove_gp_synchronous()</tt>.
-<li>	Each CPU that has an RCU read-side critical section that ends
-	after <tt>synchronize_rcu()</tt> returns is guaranteed
-	to execute a full memory barrier between the time that
-	<tt>synchronize_rcu()</tt> begins and the time that the RCU
-	read-side critical section begins.
-	Without this guarantee, a later RCU read-side critical section
-	running after the <tt>kfree()</tt> on line&nbsp;14 of
-	<tt>remove_gp_synchronous()</tt> might
-	later run <tt>do_something_gp()</tt> and find the
-	newly deleted <tt>struct foo</tt>.
-<li>	If the task invoking <tt>synchronize_rcu()</tt> remains
-	on a given CPU, then that CPU is guaranteed to execute a full
-	memory barrier sometime during the execution of
-	<tt>synchronize_rcu()</tt>.
-	This guarantee ensures that the <tt>kfree()</tt> on
-	line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
-	execute after the removal on line&nbsp;11.
-<li>	If the task invoking <tt>synchronize_rcu()</tt> migrates
-	among a group of CPUs during that invocation, then each of the
-	CPUs in that group is guaranteed to execute a full memory barrier
-	sometime during the execution of <tt>synchronize_rcu()</tt>.
-	This guarantee also ensures that the <tt>kfree()</tt> on
-	line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
-	execute after the removal on
-	line&nbsp;11, but also in the case where the thread executing the
-	<tt>synchronize_rcu()</tt> migrates in the meantime.
-</ol>
-
-<p>@@QQ@@
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<p>@@QQA@@
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-<p>@@QQE@@
-
-<p>@@QQ@@
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<p>@@QQA@@
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Very likely to return p. */</tt>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>do_something_with(q-&gt;a);
-	/* No smp_mb(), so might happen after kfree(). */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li>	CPU 0: <tt>list_del_rcu(p);</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li>	CPU 1: <tt>rcu_read_lock()</tt>
-<li>	CPU 1: <tt>q = rcu_dereference(gp);
-	/* Might return p if no memory barrier. */</tt>
-<li>	CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li>	CPU 0: <tt>kfree(p);</tt>
-<li>	CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-<li>	CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU&nbsp;1 might end up accessing the freelist.
-
-<p>
-The &ldquo;as if&rdquo; rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-<p>@@QQE@@
-
-<p>
-Note that these memory-barrier requirements do not replace the fundamental
-RCU requirement that a grace period wait for all pre-existing readers.
-On the contrary, the memory barriers called out in this section must operate in
-such a way as to <i>enforce</i> this fundamental requirement.
-Of course, different implementations enforce this requirement in different
-ways, but enforce it they must.
-
-<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
-
-<p>
-The common-case RCU primitives are unconditional.
-They are invoked, they do their job, and they return, with no possibility
-of error, and no need to retry.
-This is a key RCU design philosophy.
-
-<p>
-However, this philosophy is pragmatic rather than pigheaded.
-If someone comes up with a good justification for a particular conditional
-RCU primitive, it might well be implemented and added.
-After all, this guarantee was reverse-engineered, not premeditated.
-The unconditional nature of the RCU primitives was initially an
-accident of implementation, and later experience with synchronization
-primitives with conditional primitives caused me to elevate this
-accident to a guarantee.
-Therefore, the justification for adding a conditional primitive to
-RCU would need to be based on detailed and compelling use cases.
-
-<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
-
-<p>
-As far as RCU is concerned, it is always possible to carry out an
-update within an RCU read-side critical section.
-For example, that RCU read-side critical section might search for
-a given data element, and then might acquire the update-side
-spinlock in order to update that element, all while remaining
-in that RCU read-side critical section.
-Of course, it is necessary to exit the RCU read-side critical section
-before invoking <tt>synchronize_rcu()</tt>, however, this
-inconvenience can be avoided through use of the
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
-described later in this document.
-
-<p>@@QQ@@
-But how does the upgrade-to-write operation exclude other readers?
-<p>@@QQA@@
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-<p>@@QQE@@
-
-<p>
-This guarantee allows lookup code to be shared between read-side
-and update-side code, and was premeditated, appearing in the earliest
-DYNIX/ptx RCU documentation.
-
-<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
-
-<p>
-RCU provides extremely lightweight readers, and its read-side guarantees,
-though quite useful, are correspondingly lightweight.
-It is therefore all too easy to assume that RCU is guaranteeing more
-than it really is.
-Of course, the list of things that RCU does not guarantee is infinitely
-long, however, the following sections list a few non-guarantees that
-have caused confusion.
-Except where otherwise noted, these non-guarantees were premeditated.
-
-<ol>
-<li>	<a href="#Readers Impose Minimal Ordering">
-	Readers Impose Minimal Ordering</a>
-<li>	<a href="#Readers Do Not Exclude Updaters">
-	Readers Do Not Exclude Updaters</a>
-<li>	<a href="#Updaters Only Wait For Old Readers">
-	Updaters Only Wait For Old Readers</a>
-<li>	<a href="#Grace Periods Don't Partition Read-Side Critical Sections">
-	Grace Periods Don't Partition Read-Side Critical Sections</a>
-<li>	<a href="#Read-Side Critical Sections Don't Partition Grace Periods">
-	Read-Side Critical Sections Don't Partition Grace Periods</a>
-<li>	<a href="#Disabling Preemption Does Not Block Grace Periods">
-	Disabling Preemption Does Not Block Grace Periods</a>
-</ol>
-
-<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
-
-<p>
-Reader-side markers such as <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
-except through their interaction with the grace-period APIs such as
-<tt>synchronize_rcu()</tt>.
-To see this, consider the following pair of threads:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(x, 1);
- 5   rcu_read_unlock();
- 6   rcu_read_lock();
- 7   WRITE_ONCE(y, 1);
- 8   rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13   rcu_read_lock();
-14   r1 = READ_ONCE(y);
-15   rcu_read_unlock();
-16   rcu_read_lock();
-17   r2 = READ_ONCE(x);
-18   rcu_read_unlock();
-19 }
-</pre>
-</blockquote>
-
-<p>
-After <tt>thread0()</tt> and <tt>thread1()</tt> execute
-concurrently, it is quite possible to have
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0)
-</pre>
-</blockquote>
-
-(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
-which would not be possible if <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> had much in the way of ordering
-properties.
-But they do not, so the CPU is within its rights
-to do significant reordering.
-This is by design:  Any significant ordering constraints would slow down
-these fast-path APIs.
-
-<p>@@QQ@@
-Can't the compiler also reorder this code?
-<p>@@QQA@@
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-<p>@@QQE@@
-
-<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
-
-<p>
-Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
-exclude updates.
-All they do is to prevent grace periods from ending.
-The following example illustrates this:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   r1 = READ_ONCE(y);
- 5   if (r1) {
- 6     do_something_with_nonzero_x();
- 7     r2 = READ_ONCE(x);
- 8     WARN_ON(!r2); /* BUG!!! */
- 9   }
-10   rcu_read_unlock();
-11 }
-12
-13 void thread1(void)
-14 {
-15   spin_lock(&amp;my_lock);
-16   WRITE_ONCE(x, 1);
-17   WRITE_ONCE(y, 1);
-18   spin_unlock(&amp;my_lock);
-19 }
-</pre>
-</blockquote>
-
-<p>
-If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
-excluded the <tt>thread1()</tt> function's update,
-the <tt>WARN_ON()</tt> could never fire.
-But the fact is that <tt>rcu_read_lock()</tt> does not exclude
-much of anything aside from subsequent grace periods, of which
-<tt>thread1()</tt> has none, so the
-<tt>WARN_ON()</tt> can and does fire.
-
-<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
-
-<p>
-It might be tempting to assume that after <tt>synchronize_rcu()</tt>
-completes, there are no readers executing.
-This temptation must be avoided because
-new readers can start immediately after <tt>synchronize_rcu()</tt>
-starts, and <tt>synchronize_rcu()</tt> is under no
-obligation to wait for these new readers.
-
-<p>@@QQ@@
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<p>@@QQA@@
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-<p>@@QQE@@
-
-<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
-Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
-
-<p>
-It is tempting to assume that if any part of one RCU read-side critical
-section precedes a given grace period, and if any part of another RCU
-read-side critical section follows that same grace period, then all of
-the first RCU read-side critical section must precede all of the second.
-However, this just isn't the case: A single grace period does not
-partition the set of RCU read-side critical sections.
-An example of this situation can be illustrated as follows, where
-<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   rcu_read_lock();
-19   r2 = READ_ONCE(b);
-20   r3 = READ_ONCE(c);
-21   rcu_read_unlock();
-22 }
-</pre>
-</blockquote>
-
-<p>
-It turns out that the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
-</pre>
-</blockquote>
-
-is entirely possible.
-The following figure show how this can happen, with each circled
-<tt>QS</tt> indicating the point at which RCU recorded a
-<i>quiescent state</i> for each thread, that is, a state in which
-RCU knows that the thread cannot be in the midst of an RCU read-side
-critical section that started before the current grace period:
-
-<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
-
-<p>
-If it is necessary to partition RCU read-side critical sections in this
-manner, it is necessary to use two grace periods, where the first
-grace period is known to end before the second grace period starts:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   r2 = READ_ONCE(c);
-19   synchronize_rcu();
-20   WRITE_ONCE(d, 1);
-21 }
-22
-23 void thread3(void)
-24 {
-25   rcu_read_lock();
-26   r3 = READ_ONCE(b);
-27   r4 = READ_ONCE(d);
-28   rcu_read_unlock();
-29 }
-</pre>
-</blockquote>
-
-<p>
-Here, if <tt>(r1 == 1)</tt>, then
-<tt>thread0()</tt>'s write to <tt>b</tt> must happen
-before the end of <tt>thread1()</tt>'s grace period.
-If in addition <tt>(r4 == 1)</tt>, then
-<tt>thread3()</tt>'s read from <tt>b</tt> must happen
-after the beginning of <tt>thread2()</tt>'s grace period.
-If it is also the case that <tt>(r2 == 1)</tt>, then the
-end of <tt>thread1()</tt>'s grace period must precede the
-beginning of <tt>thread2()</tt>'s grace period.
-This mean that the two RCU read-side critical sections cannot overlap,
-guaranteeing that <tt>(r3 == 1)</tt>.
-As a result, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
-</pre>
-</blockquote>
-
-cannot happen.
-
-<p>
-This non-requirement was also non-premeditated, but became apparent
-when studying RCU's interaction with memory ordering.
-
-<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
-Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
-
-<p>
-It is also tempting to assume that if an RCU read-side critical section
-happens between a pair of grace periods, then those grace periods cannot
-overlap.
-However, this temptation leads nowhere good, as can be illustrated by
-the following, with all variables initially zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3   rcu_read_lock();
- 4   WRITE_ONCE(a, 1);
- 5   WRITE_ONCE(b, 1);
- 6   rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11   r1 = READ_ONCE(a);
-12   synchronize_rcu();
-13   WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18   rcu_read_lock();
-19   WRITE_ONCE(d, 1);
-20   r2 = READ_ONCE(c);
-21   rcu_read_unlock();
-22 }
-23
-24 void thread3(void)
-25 {
-26   r3 = READ_ONCE(d);
-27   synchronize_rcu();
-28   WRITE_ONCE(e, 1);
-29 }
-30
-31 void thread4(void)
-32 {
-33   rcu_read_lock();
-34   r4 = READ_ONCE(b);
-35   r5 = READ_ONCE(e);
-36   rcu_read_unlock();
-37 }
-</pre>
-</blockquote>
-
-<p>
-In this case, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
-</pre>
-</blockquote>
-
-is entirely possible, as illustrated below:
-
-<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
-
-<p>
-Again, an RCU read-side critical section can overlap almost all of a
-given grace period, just so long as it does not overlap the entire
-grace period.
-As a result, an RCU read-side critical section cannot partition a pair
-of RCU grace periods.
-
-<p>@@QQ@@
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<p>@@QQA@@
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-<p>@@QQE@@
-
-<h3><a name="Disabling Preemption Does Not Block Grace Periods">
-Disabling Preemption Does Not Block Grace Periods</a></h3>
-
-<p>
-There was a time when disabling preemption on any given CPU would block
-subsequent grace periods.
-However, this was an accident of implementation and is not a requirement.
-And in the current Linux-kernel implementation, disabling preemption
-on a given CPU in fact does not block grace periods, as Oleg Nesterov
-<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
-
-<p>
-If you need a preempt-disable region to block grace periods, you need to add
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
-as follows:
-
-<blockquote>
-<pre>
- 1 preempt_disable();
- 2 rcu_read_lock();
- 3 do_something();
- 4 rcu_read_unlock();
- 5 preempt_enable();
- 6
- 7 /* Spinlocks implicitly disable preemption. */
- 8 spin_lock(&amp;mylock);
- 9 rcu_read_lock();
-10 do_something();
-11 rcu_read_unlock();
-12 spin_unlock(&amp;mylock);
-</pre>
-</blockquote>
-
-<p>
-In theory, you could enter the RCU read-side critical section first,
-but it is more efficient to keep the entire RCU read-side critical
-section contained in the preempt-disable region as shown above.
-Of course, RCU read-side critical sections that extend outside of
-preempt-disable regions will work correctly, but such critical sections
-can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
-more work.
-And no, this is <i>not</i> an invitation to enclose all of your RCU
-read-side critical sections within preempt-disable regions, because
-doing so would degrade real-time response.
-
-<p>
-This non-requirement appeared with preemptible RCU.
-If you need a grace period that waits on non-preemptible code regions, use
-<a href="#Sched Flavor">RCU-sched</a>.
-
-<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
-
-<p>
-These parallelism facts of life are by no means specific to RCU, but
-the RCU implementation must abide by them.
-They therefore bear repeating:
-
-<ol>
-<li>	Any CPU or task may be delayed at any time,
-	and any attempts to avoid these delays by disabling
-	preemption, interrupts, or whatever are completely futile.
-	This is most obvious in preemptible user-level
-	environments and in virtualized environments (where
-	a given guest OS's VCPUs can be preempted at any time by
-	the underlying hypervisor), but can also happen in bare-metal
-	environments due to ECC errors, NMIs, and other hardware
-	events.
-	Although a delay of more than about 20 seconds can result
-	in splats, the RCU implementation is obligated to use
-	algorithms that can tolerate extremely long delays, but where
-	&ldquo;extremely long&rdquo; is not long enough to allow
-	wrap-around when incrementing a 64-bit counter.
-<li>	Both the compiler and the CPU can reorder memory accesses.
-	Where it matters, RCU must use compiler directives and
-	memory-barrier instructions to preserve ordering.
-<li>	Conflicting writes to memory locations in any given cache line
-	will result in expensive cache misses.
-	Greater numbers of concurrent writes and more-frequent
-	concurrent writes will result in more dramatic slowdowns.
-	RCU is therefore obligated to use algorithms that have
-	sufficient locality to avoid significant performance and
-	scalability problems.
-<li>	As a rough rule of thumb, only one CPU's worth of processing
-	may be carried out under the protection of any given exclusive
-	lock.
-	RCU must therefore use scalable locking designs.
-<li>	Counters are finite, especially on 32-bit systems.
-	RCU's use of counters must therefore tolerate counter wrap,
-	or be designed such that counter wrap would take way more
-	time than a single system is likely to run.
-	An uptime of ten years is quite possible, a runtime
-	of a century much less so.
-	As an example of the latter, RCU's dyntick-idle nesting counter
-	allows 54 bits for interrupt nesting level (this counter
-	is 64 bits even on a 32-bit system).
-	Overflowing this counter requires 2<sup>54</sup>
-	half-interrupts on a given CPU without that CPU ever going idle.
-	If a half-interrupt happened every microsecond, it would take
-	570 years of runtime to overflow this counter, which is currently
-	believed to be an acceptably long time.
-<li>	Linux systems can have thousands of CPUs running a single
-	Linux kernel in a single shared-memory environment.
-	RCU must therefore pay close attention to high-end scalability.
-</ol>
-
-<p>
-This last parallelism fact of life means that RCU must pay special
-attention to the preceding facts of life.
-The idea that Linux might scale to systems with thousands of CPUs would
-have been met with some skepticism in the 1990s, but these requirements
-would have otherwise have been unsurprising, even in the early 1990s.
-
-<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
-
-<p>
-These sections list quality-of-implementation requirements.
-Although an RCU implementation that ignores these requirements could
-still be used, it would likely be subject to limitations that would
-make it inappropriate for industrial-strength production use.
-Classes of quality-of-implementation requirements are as follows:
-
-<ol>
-<li>	<a href="#Specialization">Specialization</a>
-<li>	<a href="#Performance and Scalability">Performance and Scalability</a>
-<li>	<a href="#Composability">Composability</a>
-<li>	<a href="#Corner Cases">Corner Cases</a>
-</ol>
-
-<p>
-These classes is covered in the following sections.
-
-<h3><a name="Specialization">Specialization</a></h3>
-
-<p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
-expense of its update-side primitives.
-
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
-
-<p>
-This focus on read-mostly situations means that RCU must interoperate
-with other synchronization primitives.
-For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
-examples discussed earlier use RCU to protect readers and locking to
-coordinate updaters.
-However, the need extends much farther, requiring that a variety of
-synchronization primitives be legal within RCU read-side critical sections,
-including spinlocks, sequence locks, atomic operations, reference
-counters, and memory barriers.
-
-<p>@@QQ@@
-What about sleeping locks?
-<p>@@QQA@@
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-<p>@@QQE@@
-
-<p>
-It often comes as a surprise that many algorithms do not require a
-consistent view of data, but many can function in that mode,
-with network routing being the poster child.
-Internet routing algorithms take significant time to propagate
-updates, so that by the time an update arrives at a given system,
-that system has been sending network traffic the wrong way for
-a considerable length of time.
-Having a few threads continue to send traffic the wrong way for a
-few more milliseconds is clearly not a problem:  In the worst case,
-TCP retransmissions will eventually get the data where it needs to go.
-In general, when tracking the state of the universe outside of the
-computer, some level of inconsistency must be tolerated due to
-speed-of-light delays if nothing else.
-
-<p>
-Furthermore, uncertainty about external state is inherent in many cases.
-For example, a pair of veternarians might use heartbeat to determine
-whether or not a given cat was alive.
-But how long should they wait after the last heartbeat to decide that
-the cat is in fact dead?
-Waiting less than 400 milliseconds makes no sense because this would
-mean that a relaxed cat would be considered to cycle between death
-and life more than 100 times per minute.
-Moreover, just as with human beings, a cat's heart might stop for
-some period of time, so the exact wait period is a judgment call.
-One of our pair of veternarians might wait 30 seconds before pronouncing
-the cat dead, while the other might insist on waiting a full minute.
-The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
-
-<p>
-Interestingly enough, this same situation applies to hardware.
-When push comes to shove, how do we tell whether or not some
-external server has failed?
-We send messages to it periodically, and declare it failed if we
-don't receive a response within a given period of time.
-Policy decisions can usually tolerate short
-periods of inconsistency.
-The policy was decided some time ago, and is only now being put into
-effect, so a few milliseconds of delay is normally inconsequential.
-
-<p>
-However, there are algorithms that absolutely must see consistent data.
-For example, the translation between a user-level SystemV semaphore
-ID to the corresponding in-kernel data structure is protected by RCU,
-but it is absolutely forbidden to update a semaphore that has just been
-removed.
-In the Linux kernel, this need for consistency is accommodated by acquiring
-spinlocks located in the in-kernel data structure from within
-the RCU read-side critical section, and this is indicated by the
-green box in the figure above.
-Many other techniques may be used, and are in fact used within the
-Linux kernel.
-
-<p>
-In short, RCU is not required to maintain consistency, and other
-mechanisms may be used in concert with RCU when consistency is required.
-RCU's specialization allows it to do its job extremely well, and its
-ability to interoperate with other synchronization mechanisms allows
-the right mix of synchronization tools to be used for a given job.
-
-<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
-
-<p>
-Energy efficiency is a critical component of performance today,
-and Linux-kernel RCU implementations must therefore avoid unnecessarily
-awakening idle CPUs.
-I cannot claim that this requirement was premeditated.
-In fact, I learned of it during a telephone conversation in which I
-was given &ldquo;frank and open&rdquo; feedback on the importance
-of energy efficiency in battery-powered systems and on specific
-energy-efficiency shortcomings of the Linux-kernel RCU implementation.
-In my experience, the battery-powered embedded community will consider
-any unnecessary wakeups to be extremely unfriendly acts.
-So much so that mere Linux-kernel-mailing-list posts are
-insufficient to vent their ire.
-
-<p>
-Memory consumption is not particularly important for in most
-situations, and has become decreasingly
-so as memory sizes have expanded and memory
-costs have plummeted.
-However, as I learned from Matt Mackall's
-<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
-efforts, memory footprint is critically important on single-CPU systems with
-non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
-<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
-was born.
-Josh Triplett has since taken over the small-memory banner with his
-<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
-project, which resulted in
-<a href="#Sleepable RCU">SRCU</a>
-becoming optional for those kernels not needing it.
-
-<p>
-The remaining performance requirements are, for the most part,
-unsurprising.
-For example, in keeping with RCU's read-side specialization,
-<tt>rcu_dereference()</tt> should have negligible overhead (for
-example, suppression of a few minor compiler optimizations).
-Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
-
-<p>
-In preemptible environments, in the case where the RCU read-side
-critical section was not preempted (as will be the case for the
-highest-priority real-time process), <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have minimal overhead.
-In particular, they should not contain atomic read-modify-write
-operations, memory-barrier instructions, preemption disabling,
-interrupt disabling, or backwards branches.
-However, in the case where the RCU read-side critical section was preempted,
-<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
-This is why it is better to nest an RCU read-side critical section
-within a preempt-disable region than vice versa, at least in cases
-where that critical section is short enough to avoid unduly degrading
-real-time latencies.
-
-<p>
-The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
-optimized for throughput.
-It may therefore incur several milliseconds of latency in addition to
-the duration of the longest RCU read-side critical section.
-On the other hand, multiple concurrent invocations of
-<tt>synchronize_rcu()</tt> are required to use batching optimizations
-so that they can be satisfied by a single underlying grace-period-wait
-operation.
-For example, in the Linux kernel, it is not unusual for a single
-grace-period-wait operation to serve more than
-<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
-of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
-overhead down to nearly zero.
-However, the grace-period optimization is also required to avoid
-measurable degradation of real-time scheduling and interrupt latencies.
-
-<p>
-In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
-latencies are unacceptable.
-In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
-instead, reducing the grace-period latency down to a few tens of
-microseconds on small systems, at least in cases where the RCU read-side
-critical sections are short.
-There are currently no special latency requirements for
-<tt>synchronize_rcu_expedited()</tt> on large systems, but,
-consistent with the empirical nature of the RCU specification,
-that is subject to change.
-However, there most definitely are scalability requirements:
-A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
-CPUs should at least make reasonable forward progress.
-In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
-is permitted to impose modest degradation of real-time latency
-on non-idle online CPUs.
-That said, it will likely be necessary to take further steps to reduce this
-degradation, hopefully to roughly that of a scheduling-clock interrupt.
-
-<p>
-There are a number of situations where even
-<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
-latency is unacceptable.
-In these situations, the asynchronous <tt>call_rcu()</tt> can be
-used in place of <tt>synchronize_rcu()</tt> as follows:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2   int a;
- 3   int b;
- 4   struct rcu_head rh;
- 5 };
- 6
- 7 static void remove_gp_cb(struct rcu_head *rhp)
- 8 {
- 9   struct foo *p = container_of(rhp, struct foo, rh);
-10
-11   kfree(p);
-12 }
-13
-14 bool remove_gp_asynchronous(void)
-15 {
-16   struct foo *p;
-17
-18   spin_lock(&amp;gp_lock);
-19   p = rcu_dereference(gp);
-20   if (!p) {
-21     spin_unlock(&amp;gp_lock);
-22     return false;
-23   }
-24   rcu_assign_pointer(gp, NULL);
-25   call_rcu(&amp;p-&gt;rh, remove_gp_cb);
-26   spin_unlock(&amp;gp_lock);
-27   return true;
-28 }
-</pre>
-</blockquote>
-
-<p>
-A definition of <tt>struct foo</tt> is finally needed, and appears
-on lines&nbsp;1-5.
-The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
-on line&nbsp;25, and will be invoked after the end of a subsequent
-grace period.
-This gets the same effect as <tt>remove_gp_synchronous()</tt>,
-but without forcing the updater to wait for a grace period to elapse.
-The <tt>call_rcu()</tt> function may be used in a number of
-situations where neither <tt>synchronize_rcu()</tt> nor
-<tt>synchronize_rcu_expedited()</tt> would be legal,
-including within preempt-disable code, <tt>local_bh_disable()</tt> code,
-interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
-The callback function (<tt>remove_gp_cb()</tt> in this case) will be
-executed within softirq (software interrupt) environment within the
-Linux kernel,
-either within a real softirq handler or under the protection
-of <tt>local_bh_disable()</tt>.
-In both the Linux kernel and in userspace, it is bad practice to
-write an RCU callback function that takes too long.
-Long-running operations should be relegated to separate threads or
-(in the Linux kernel) workqueues.
-
-<p>@@QQ@@
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<p>@@QQA@@
-Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
-is released on line&nbsp;25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-<p>@@QQE@@
-
-<p>
-However, all that <tt>remove_gp_cb()</tt> is doing is
-invoking <tt>kfree()</tt> on the data element.
-This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
-which allows &ldquo;fire and forget&rdquo; operation as shown below:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2   int a;
- 3   int b;
- 4   struct rcu_head rh;
- 5 };
- 6
- 7 bool remove_gp_faf(void)
- 8 {
- 9   struct foo *p;
-10
-11   spin_lock(&amp;gp_lock);
-12   p = rcu_dereference(gp);
-13   if (!p) {
-14     spin_unlock(&amp;gp_lock);
-15     return false;
-16   }
-17   rcu_assign_pointer(gp, NULL);
-18   kfree_rcu(p, rh);
-19   spin_unlock(&amp;gp_lock);
-20   return true;
-21 }
-</pre>
-</blockquote>
-
-<p>
-Note that <tt>remove_gp_faf()</tt> simply invokes
-<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
-further attention to the subsequent grace period and <tt>kfree()</tt>.
-It is permissible to invoke <tt>kfree_rcu()</tt> from the same
-environments as for <tt>call_rcu()</tt>.
-Interestingly enough, DYNIX/ptx had the equivalents of
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
-<tt>synchronize_rcu()</tt>.
-This was due to the fact that RCU was not heavily used within DYNIX/ptx,
-so the very few places that needed something like
-<tt>synchronize_rcu()</tt> simply open-coded it.
-
-<p>@@QQ@@
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<p>@@QQA@@
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-<p>@@QQE@@
-
-<p>
-But what if the updater must wait for the completion of code to be
-executed after the end of the grace period, but has other tasks
-that can be carried out in the meantime?
-The polling-style <tt>get_state_synchronize_rcu()</tt> and
-<tt>cond_synchronize_rcu()</tt> functions may be used for this
-purpose, as shown below:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_poll(void)
- 2 {
- 3   struct foo *p;
- 4   unsigned long s;
- 5
- 6   spin_lock(&amp;gp_lock);
- 7   p = rcu_access_pointer(gp);
- 8   if (!p) {
- 9     spin_unlock(&amp;gp_lock);
-10     return false;
-11   }
-12   rcu_assign_pointer(gp, NULL);
-13   spin_unlock(&amp;gp_lock);
-14   s = get_state_synchronize_rcu();
-15   do_something_while_waiting();
-16   cond_synchronize_rcu(s);
-17   kfree(p);
-18   return true;
-19 }
-</pre>
-</blockquote>
-
-<p>
-On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
-&ldquo;cookie&rdquo; from RCU,
-then line&nbsp;15 carries out other tasks,
-and finally, line&nbsp;16 returns immediately if a grace period has
-elapsed in the meantime, but otherwise waits as required.
-The need for <tt>get_state_synchronize_rcu</tt> and
-<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
-so it is too early to tell whether they will stand the test of time.
-
-<p>
-RCU thus provides a range of tools to allow updaters to strike the
-required tradeoff between latency, flexibility and CPU overhead.
-
-<h3><a name="Composability">Composability</a></h3>
-
-<p>
-Composability has received much attention in recent years, perhaps in part
-due to the collision of multicore hardware with object-oriented techniques
-designed in single-threaded environments for single-threaded use.
-And in theory, RCU read-side critical sections may be composed, and in
-fact may be nested arbitrarily deeply.
-In practice, as with all real-world implementations of composable
-constructs, there are limitations.
-
-<p>
-Implementations of RCU for which <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> generate no code, such as
-Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
-nested arbitrarily deeply.
-After all, there is no overhead.
-Except that if all these instances of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> are visible to the compiler,
-compilation will eventually fail due to exhausting memory,
-mass storage, or user patience, whichever comes first.
-If the nesting is not visible to the compiler, as is the case with
-mutually recursive functions each in its own translation unit,
-stack overflow will result.
-If the nesting takes the form of loops, either the control variable
-will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
-Nevertheless, this class of RCU implementations is one
-of the most composable constructs in existence.
-
-<p>
-RCU implementations that explicitly track nesting depth
-are limited by the nesting-depth counter.
-For example, the Linux kernel's preemptible RCU limits nesting to
-<tt>INT_MAX</tt>.
-This should suffice for almost all practical purposes.
-That said, a consecutive pair of RCU read-side critical sections
-between which there is an operation that waits for a grace period
-cannot be enclosed in another RCU read-side critical section.
-This is because it is not legal to wait for a grace period within
-an RCU read-side critical section:  To do so would result either
-in deadlock or
-in RCU implicitly splitting the enclosing RCU read-side critical
-section, neither of which is conducive to a long-lived and prosperous
-kernel.
-
-<p>
-It is worth noting that RCU is not alone in limiting composability.
-For example, many transactional-memory implementations prohibit
-composing a pair of transactions separated by an irrevocable
-operation (for example, a network receive operation).
-For another example, lock-based critical sections can be composed
-surprisingly freely, but only if deadlock is avoided.
-
-<p>
-In short, although RCU read-side critical sections are highly composable,
-care is required in some situations, just as is the case for any other
-composable synchronization mechanism.
-
-<h3><a name="Corner Cases">Corner Cases</a></h3>
-
-<p>
-A given RCU workload might have an endless and intense stream of
-RCU read-side critical sections, perhaps even so intense that there
-was never a point in time during which there was not at least one
-RCU read-side critical section in flight.
-RCU cannot allow this situation to block grace periods:  As long as
-all the RCU read-side critical sections are finite, grace periods
-must also be finite.
-
-<p>
-That said, preemptible RCU implementations could potentially result
-in RCU read-side critical sections being preempted for long durations,
-which has the effect of creating a long-duration RCU read-side
-critical section.
-This situation can arise only in heavily loaded systems, but systems using
-real-time priorities are of course more vulnerable.
-Therefore, RCU priority boosting is provided to help deal with this
-case.
-That said, the exact requirements on RCU priority boosting will likely
-evolve as more experience accumulates.
-
-<p>
-Other workloads might have very high update rates.
-Although one can argue that such workloads should instead use
-something other than RCU, the fact remains that RCU must
-handle such workloads gracefully.
-This requirement is another factor driving batching of grace periods,
-but it is also the driving force behind the checks for large numbers
-of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
-Finally, high update rates should not delay RCU read-side critical
-sections, although some read-side delays can occur when using
-<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>try_stop_cpus()</tt>.
-(In the future, <tt>synchronize_rcu_expedited()</tt> will be
-converted to use lighter-weight inter-processor interrupts (IPIs),
-but this will still disturb readers, though to a much smaller degree.)
-
-<p>
-Although all three of these corner cases were understood in the early
-1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
-in a tight loop
-in the early 2000s suddenly provided a much deeper appreciation of the
-high-update-rate corner case.
-This test also motivated addition of some RCU code to react to high update
-rates, for example, if a given CPU finds itself with more than 10,000
-RCU callbacks queued, it will cause RCU to take evasive action by
-more aggressively starting grace periods and more aggressively forcing
-completion of grace-period processing.
-This evasive action causes the grace period to complete more quickly,
-but at the cost of restricting RCU's batching optimizations, thus
-increasing the CPU overhead incurred by that grace period.
-
-<h2><a name="Software-Engineering Requirements">
-Software-Engineering Requirements</a></h2>
-
-<p>
-Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
-guard against mishaps and misuse:
-
-<ol>
-<li>	It is all too easy to forget to use <tt>rcu_read_lock()</tt>
-	everywhere that it is needed, so kernels built with
-	<tt>CONFIG_PROVE_RCU=y</tt> will spat if
-	<tt>rcu_dereference()</tt> is used outside of an
-	RCU read-side critical section.
-	Update-side code can use <tt>rcu_dereference_protected()</tt>,
-	which takes a
-	<a href="https://lwn.net/Articles/371986/">lockdep expression</a>
-	to indicate what is providing the protection.
-	If the indicated protection is not provided, a lockdep splat
-	is emitted.
-
-	<p>
-	Code shared between readers and updaters can use
-	<tt>rcu_dereference_check()</tt>, which also takes a
-	lockdep expression, and emits a lockdep splat if neither
-	<tt>rcu_read_lock()</tt> nor the indicated protection
-	is in place.
-	In addition, <tt>rcu_dereference_raw()</tt> is used in those
-	(hopefully rare) cases where the required protection cannot
-	be easily described.
-	Finally, <tt>rcu_read_lock_held()</tt> is provided to
-	allow a function to verify that it has been invoked within
-	an RCU read-side critical section.
-	I was made aware of this set of requirements shortly after Thomas
-	Gleixner audited a number of RCU uses.
-<li>	A given function might wish to check for RCU-related preconditions
-	upon entry, before using any other RCU API.
-	The <tt>rcu_lockdep_assert()</tt> does this job,
-	asserting the expression in kernels having lockdep enabled
-	and doing nothing otherwise.
-<li>	It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
-	and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
-	substituting a simple assignment.
-	To catch this sort of error, a given RCU-protected pointer may be
-	tagged with <tt>__rcu</tt>, after which running sparse
-	with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
-	about simple-assignment accesses to that pointer.
-	Arnd Bergmann made me aware of this requirement, and also
-	supplied the needed
-	<a href="https://lwn.net/Articles/376011/">patch series</a>.
-<li>	Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
-	will splat if a data element is passed to <tt>call_rcu()</tt>
-	twice in a row, without a grace period in between.
-	(This error is similar to a double free.)
-	The corresponding <tt>rcu_head</tt> structures that are
-	dynamically allocated are automatically tracked, but
-	<tt>rcu_head</tt> structures allocated on the stack
-	must be initialized with <tt>init_rcu_head_on_stack()</tt>
-	and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
-	Similarly, statically allocated non-stack <tt>rcu_head</tt>
-	structures must be initialized with <tt>init_rcu_head()</tt>
-	and cleaned up with <tt>destroy_rcu_head()</tt>.
-	Mathieu Desnoyers made me aware of this requirement, and also
-	supplied the needed
-	<a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
-<li>	An infinite loop in an RCU read-side critical section will
-	eventually trigger an RCU CPU stall warning splat, with
-	the duration of &ldquo;eventually&rdquo; being controlled by the
-	<tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
-	alternatively, by the
-	<tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
-	parameter.
-	However, RCU is not obligated to produce this splat
-	unless there is a grace period waiting on that particular
-	RCU read-side critical section.
-	<p>
-	Some extreme workloads might intentionally delay
-	RCU grace periods, and systems running those workloads can
-	be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
-	to suppress the splats.
-	This kernel parameter may also be set via <tt>sysfs</tt>.
-	Furthermore, RCU CPU stall warnings are counter-productive
-	during sysrq dumps and during panics.
-	RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
-	<tt>rcu_sysrq_end()</tt> API members to be called before
-	and after long sysrq dumps.
-	RCU also supplies the <tt>rcu_panic()</tt> notifier that is
-	automatically invoked at the beginning of a panic to suppress
-	further RCU CPU stall warnings.
-
-	<p>
-	This requirement made itself known in the early 1990s, pretty
-	much the first time that it was necessary to debug a CPU stall.
-	That said, the initial implementation in DYNIX/ptx was quite
-	generic in comparison with that of Linux.
-<li>	Although it would be very good to detect pointers leaking out
-	of RCU read-side critical sections, there is currently no
-	good way of doing this.
-	One complication is the need to distinguish between pointers
-	leaking and pointers that have been handed off from RCU to
-	some other synchronization mechanism, for example, reference
-	counting.
-<li>	In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
-	information is provided via both debugfs and event tracing.
-<li>	Open-coded use of <tt>rcu_assign_pointer()</tt> and
-	<tt>rcu_dereference()</tt> to create typical linked
-	data structures can be surprisingly error-prone.
-	Therefore, RCU-protected
-	<a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
-	and, more recently, RCU-protected
-	<a href="https://lwn.net/Articles/612100/">hash tables</a>
-	are available.
-	Many other special-purpose RCU-protected data structures are
-	available in the Linux kernel and the userspace RCU library.
-<li>	Some linked structures are created at compile time, but still
-	require <tt>__rcu</tt> checking.
-	The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
-	purpose.
-<li>	It is not necessary to use <tt>rcu_assign_pointer()</tt>
-	when creating linked structures that are to be published via
-	a single external pointer.
-	The <tt>RCU_INIT_POINTER()</tt> macro is provided for
-	this task and also for assigning <tt>NULL</tt> pointers
-	at runtime.
-</ol>
-
-<p>
-This not a hard-and-fast list:  RCU's diagnostic capabilities will
-continue to be guided by the number and type of usage bugs found
-in real-world RCU usage.
-
-<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
-
-<p>
-The Linux kernel provides an interesting environment for all kinds of
-software, including RCU.
-Some of the relevant points of interest are as follows:
-
-<ol>
-<li>	<a href="#Configuration">Configuration</a>.
-<li>	<a href="#Firmware Interface">Firmware Interface</a>.
-<li>	<a href="#Early Boot">Early Boot</a>.
-<li>	<a href="#Interrupts and NMIs">
-	Interrupts and non-maskable interrupts (NMIs)</a>.
-<li>	<a href="#Loadable Modules">Loadable Modules</a>.
-<li>	<a href="#Hotplug CPU">Hotplug CPU</a>.
-<li>	<a href="#Scheduler and RCU">Scheduler and RCU</a>.
-<li>	<a href="#Tracing and RCU">Tracing and RCU</a>.
-<li>	<a href="#Energy Efficiency">Energy Efficiency</a>.
-<li>	<a href="#Memory Efficiency">Memory Efficiency</a>.
-<li>	<a href="#Performance, Scalability, Response Time, and Reliability">
-	Performance, Scalability, Response Time, and Reliability</a>.
-</ol>
-
-<p>
-This list is probably incomplete, but it does give a feel for the
-most notable Linux-kernel complications.
-Each of the following sections covers one of the above topics.
-
-<h3><a name="Configuration">Configuration</a></h3>
-
-<p>
-RCU's goal is automatic configuration, so that almost nobody
-needs to worry about RCU's <tt>Kconfig</tt> options.
-And for almost all users, RCU does in fact work well
-&ldquo;out of the box.&rdquo;
-
-<p>
-However, there are specialized use cases that are handled by
-kernel boot parameters and <tt>Kconfig</tt> options.
-Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
-about new <tt>Kconfig</tt> options, which requires almost all of them
-be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
-
-<p>
-This all should be quite obvious, but the fact remains that
-Linus Torvalds recently had to
-<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
-me of this requirement.
-
-<h3><a name="Firmware Interface">Firmware Interface</a></h3>
-
-<p>
-In many cases, kernel obtains information about the system from the
-firmware, and sometimes things are lost in translation.
-Or the translation is accurate, but the original message is bogus.
-
-<p>
-For example, some systems' firmware overreports the number of CPUs,
-sometimes by a large factor.
-If RCU naively believed the firmware, as it used to do,
-it would create too many per-CPU kthreads.
-Although the resulting system will still run correctly, the extra
-kthreads needlessly consume memory and can cause confusion
-when they show up in <tt>ps</tt> listings.
-
-<p>
-RCU must therefore wait for a given CPU to actually come online before
-it can allow itself to believe that the CPU actually exists.
-The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
-come online) cause a number of
-<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
-
-<h3><a name="Early Boot">Early Boot</a></h3>
-
-<p>
-The Linux kernel's boot sequence is an interesting process,
-and RCU is used early, even before <tt>rcu_init()</tt>
-is invoked.
-In fact, a number of RCU's primitives can be used as soon as the
-initial task's <tt>task_struct</tt> is available and the
-boot CPU's per-CPU variables are set up.
-The read-side primitives (<tt>rcu_read_lock()</tt>,
-<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
-and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
-as will <tt>rcu_assign_pointer()</tt>.
-
-<p>
-Although <tt>call_rcu()</tt> may be invoked at any
-time during boot, callbacks are not guaranteed to be invoked until after
-the scheduler is fully up and running.
-This delay in callback invocation is due to the fact that RCU does not
-invoke callbacks until it is fully initialized, and this full initialization
-cannot occur until after the scheduler has initialized itself to the
-point where RCU can spawn and run its kthreads.
-In theory, it would be possible to invoke callbacks earlier,
-however, this is not a panacea because there would be severe restrictions
-on what operations those callbacks could invoke.
-
-<p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
-<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
-(<a href="#Bottom-Half Flavor">discussed below</a>),
-and
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
-will all operate normally
-during very early boot, the reason being that there is only one CPU
-and preemption is disabled.
-This means that the call <tt>synchronize_rcu()</tt> (or friends)
-itself is a quiescent
-state and thus a grace period, so the early-boot implementation can
-be a no-op.
-
-<p>
-Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
-continue to operate normally through the remainder of boot, courtesy
-of the fact that preemption is disabled across their RCU read-side
-critical sections and also courtesy of the fact that there is still
-only one CPU.
-However, once the scheduler starts initializing, preemption is enabled.
-There is still only a single CPU, but the fact that preemption is enabled
-means that the no-op implementation of <tt>synchronize_rcu()</tt> no
-longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
-Therefore, as soon as the scheduler starts initializing, the early-boot
-fastpath is disabled.
-This means that <tt>synchronize_rcu()</tt> switches to its runtime
-mode of operation where it posts callbacks, which in turn means that
-any call to <tt>synchronize_rcu()</tt> will block until the corresponding
-callback is invoked.
-Unfortunately, the callback cannot be invoked until RCU's runtime
-grace-period machinery is up and running, which cannot happen until
-the scheduler has initialized itself sufficiently to allow RCU's
-kthreads to be spawned.
-Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
-initialization can result in deadlock.
-
-<p>@@QQ@@
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<p>@@QQA@@
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-<p>@@QQE@@
-
-<p>
-I learned of these boot-time requirements as a result of a series of
-system hangs.
-
-<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
-
-<p>
-The Linux kernel has interrupts, and RCU read-side critical sections are
-legal within interrupt handlers and within interrupt-disabled regions
-of code, as are invocations of <tt>call_rcu()</tt>.
-
-<p>
-Some Linux-kernel architectures can enter an interrupt handler from
-non-idle process context, and then just never leave it, instead stealthily
-transitioning back to process context.
-This trick is sometimes used to invoke system calls from inside the kernel.
-These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
-about how it counts interrupt nesting levels.
-I learned of this requirement the hard way during a rewrite
-of RCU's dyntick-idle code.
-
-<p>
-The Linux kernel has non-maskable interrupts (NMIs), and
-RCU read-side critical sections are legal within NMI handlers.
-Thankfully, RCU update-side primitives, including
-<tt>call_rcu()</tt>, are prohibited within NMI handlers.
-
-<p>
-The name notwithstanding, some Linux-kernel architectures
-can have nested NMIs, which RCU must handle correctly.
-Andy Lutomirski
-<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
-with this requirement;
-he also kindly surprised me with
-<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
-that meets this requirement.
-
-<h3><a name="Loadable Modules">Loadable Modules</a></h3>
-
-<p>
-The Linux kernel has loadable modules, and these modules can
-also be unloaded.
-After a given module has been unloaded, any attempt to call
-one of its functions results in a segmentation fault.
-The module-unload functions must therefore cancel any
-delayed calls to loadable-module functions, for example,
-any outstanding <tt>mod_timer()</tt> must be dealt with
-via <tt>del_timer_sync()</tt> or similar.
-
-<p>
-Unfortunately, there is no way to cancel an RCU callback;
-once you invoke <tt>call_rcu()</tt>, the callback function is
-going to eventually be invoked, unless the system goes down first.
-Because it is normally considered socially irresponsible to crash the system
-in response to a module unload request, we need some other way
-to deal with in-flight RCU callbacks.
-
-<p>
-RCU therefore provides
-<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
-which waits until all in-flight RCU callbacks have been invoked.
-If a module uses <tt>call_rcu()</tt>, its exit function should therefore
-prevent any future invocation of <tt>call_rcu()</tt>, then invoke
-<tt>rcu_barrier()</tt>.
-In theory, the underlying module-unload code could invoke
-<tt>rcu_barrier()</tt> unconditionally, but in practice this would
-incur unacceptable latencies.
-
-<p>
-Nikita Danilov noted this requirement for an analogous filesystem-unmount
-situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
-The need for <tt>rcu_barrier()</tt> for module unloading became
-apparent later.
-
-<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
-
-<p>
-The Linux kernel supports CPU hotplug, which means that CPUs
-can come and go.
-It is of course illegal to use any RCU API member from an offline CPU.
-This requirement was present from day one in DYNIX/ptx, but
-on the other hand, the Linux kernel's CPU-hotplug implementation
-is &ldquo;interesting.&rdquo;
-
-<p>
-The Linux-kernel CPU-hotplug implementation has notifiers that
-are used to allow the various kernel subsystems (including RCU)
-to respond appropriately to a given CPU-hotplug operation.
-Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even normal synchronous grace-period operations
-such as <tt>synchronize_rcu()</tt>.
-However, expedited grace-period operations such as
-<tt>synchronize_rcu_expedited()</tt> are not supported,
-due to the fact that current implementations block CPU-hotplug
-operations, which could result in deadlock.
-
-<p>
-In addition, all-callback-wait operations such as
-<tt>rcu_barrier()</tt> are also not supported, due to the
-fact that there are phases of CPU-hotplug operations where
-the outgoing CPU's callbacks will not be invoked until after
-the CPU-hotplug operation ends, which could also result in deadlock.
-
-<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
-
-<p>
-RCU depends on the scheduler, and the scheduler uses RCU to
-protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1)&nbsp;it releases them before exiting that same
-RCU read-side critical section, or
-(2)&nbsp;interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
-In particular, <tt>rcu_read_unlock()</tt> must tolerate an
-interrupt where the interrupt handler invokes both
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-This possibility requires <tt>rcu_read_unlock()</tt> to use
-negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-<p>
-This pair of mutual scheduler-RCU requirements came as a
-<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
-
-<p>
-As noted above, RCU makes use of kthreads, and it is necessary to
-avoid excessive CPU-time accumulation by these kthreads.
-This requirement was no surprise, but RCU's violation of it
-when running context-switch-heavy workloads when built with
-<tt>CONFIG_NO_HZ_FULL=y</tt>
-<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
-RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
-but there is room for further improvement.
-
-<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
-
-<p>
-It is possible to use tracing on RCU code, but tracing itself
-uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
-is provided for use by tracing, which avoids the destructive
-recursion that could otherwise ensue.
-This API is also used by virtualization in some architectures,
-where RCU readers execute in environments in which tracing
-cannot be used.
-The tracing folks both located the requirement and provided the
-needed fix, so this surprise requirement was relatively painless.
-
-<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
-
-<p>
-Interrupting idle CPUs is considered socially unacceptable,
-especially by people with battery-powered embedded systems.
-RCU therefore conserves energy by detecting which CPUs are
-idle, including tracking CPUs that have been interrupted from idle.
-This is a large part of the energy-efficiency requirement,
-so I learned of this via an irate phone call.
-
-<p>
-Because RCU avoids interrupting idle CPUs, it is illegal to
-execute an RCU read-side critical section on an idle CPU.
-(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
-if you try it.)
-The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
-event tracing is provided to work around this restriction.
-In addition, <tt>rcu_is_watching()</tt> may be used to
-test whether or not it is currently legal to run RCU read-side
-critical sections on this CPU.
-I learned of the need for diagnostics on the one hand
-and <tt>RCU_NONIDLE()</tt> on the other while inspecting
-idle-loop code.
-Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
-which is used quite heavily in the idle loop.
-
-<p>
-It is similarly socially unacceptable to interrupt an
-<tt>nohz_full</tt> CPU running in userspace.
-RCU must therefore track <tt>nohz_full</tt> userspace
-execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
-time, and be able to determine whether or not some other CPU spent
-any time idle and/or executing in userspace.
-
-<p>
-These energy-efficiency requirements have proven quite difficult to
-understand and to meet, for example, there have been more than five
-clean-sheet rewrites of RCU's energy-efficiency code, the last of
-which was finally able to demonstrate
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
-As noted earlier,
-I learned of many of these requirements via angry phone calls:
-Flaming me on the Linux-kernel mailing list was apparently not
-sufficient to fully vent their ire at RCU's energy-efficiency bugs!
-
-<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
-
-<p>
-Although small-memory non-realtime systems can simply use Tiny RCU,
-code size is only one aspect of memory efficiency.
-Another aspect is the size of the <tt>rcu_head</tt> structure
-used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
-Although this structure contains nothing more than a pair of pointers,
-it does appear in many RCU-protected data structures, including
-some that are size critical.
-The <tt>page</tt> structure is a case in point, as evidenced by
-the many occurrences of the <tt>union</tt> keyword within that structure.
-
-<p>
-This need for memory efficiency is one reason that RCU uses hand-crafted
-singly linked lists to track the <tt>rcu_head</tt> structures that
-are waiting for a grace period to elapse.
-It is also the reason why <tt>rcu_head</tt> structures do not contain
-debug information, such as fields tracking the file and line of the
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
-Although this information might appear in debug-only kernel builds at some
-point, in the meantime, the <tt>-&gt;func</tt> field will often provide
-the needed debug information.
-
-<p>
-However, in some cases, the need for memory efficiency leads to even
-more extreme measures.
-Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
-shares storage with a great many other structures that are used at
-various points in the corresponding page's lifetime.
-In order to correctly resolve certain
-<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
-the Linux kernel's memory-management subsystem needs a particular bit
-to remain zero during all phases of grace-period processing,
-and that bit happens to map to the bottom bit of the
-<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
-RCU makes this guarantee as long as <tt>call_rcu()</tt>
-is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
-or some future &ldquo;lazy&rdquo;
-variant of <tt>call_rcu()</tt> that might one day be created for
-energy-efficiency purposes.
-
-<h3><a name="Performance, Scalability, Response Time, and Reliability">
-Performance, Scalability, Response Time, and Reliability</a></h3>
-
-<p>
-Expanding on the
-<a href="#Performance and Scalability">earlier discussion</a>,
-RCU is used heavily by hot code paths in performance-critical
-portions of the Linux kernel's networking, security, virtualization,
-and scheduling code paths.
-RCU must therefore use efficient implementations, especially in its
-read-side primitives.
-To that end, it would be good if preemptible RCU's implementation
-of <tt>rcu_read_lock()</tt> could be inlined, however, doing
-this requires resolving <tt>#include</tt> issues with the
-<tt>task_struct</tt> structure.
-
-<p>
-The Linux kernel supports hardware configurations with up to
-4096 CPUs, which means that RCU must be extremely scalable.
-Algorithms that involve frequent acquisitions of global locks or
-frequent atomic operations on global variables simply cannot be
-tolerated within the RCU implementation.
-RCU therefore makes heavy use of a combining tree based on the
-<tt>rcu_node</tt> structure.
-RCU is required to tolerate all CPUs continuously invoking any
-combination of RCU's runtime primitives with minimal per-operation
-overhead.
-In fact, in many cases, increasing load must <i>decrease</i> the
-per-operation overhead, witness the batching optimizations for
-<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
-<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
-As a general rule, RCU must cheerfully accept whatever the
-rest of the Linux kernel decides to throw at it.
-
-<p>
-The Linux kernel is used for real-time workloads, especially
-in conjunction with the
-<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
-The real-time-latency response requirements are such that the
-traditional approach of disabling preemption across RCU
-read-side critical sections is inappropriate.
-Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
-use an RCU implementation that allows RCU read-side critical
-sections to be preempted.
-This requirement made its presence known after users made it
-clear that an earlier
-<a href="https://lwn.net/Articles/107930/">real-time patch</a>
-did not meet their needs, in conjunction with some
-<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
-encountered by a very early version of the -rt patchset.
-
-<p>
-In addition, RCU must make do with a sub-100-microsecond real-time latency
-budget.
-In fact, on smaller systems with the -rt patchset, the Linux kernel
-provides sub-20-microsecond real-time latencies for the whole kernel,
-including RCU.
-RCU's scalability and latency must therefore be sufficient for
-these sorts of configurations.
-To my surprise, the sub-100-microsecond real-time latency budget
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
-applies to even the largest systems [PDF]</a>,
-up to and including systems with 4096 CPUs.
-This real-time requirement motivated the grace-period kthread, which
-also simplified handling of a number of race conditions.
-
-<p>
-Finally, RCU's status as a synchronization primitive means that
-any RCU failure can result in arbitrary memory corruption that can be
-extremely difficult to debug.
-This means that RCU must be extremely reliable, which in
-practice also means that RCU must have an aggressive stress-test
-suite.
-This stress-test suite is called <tt>rcutorture</tt>.
-
-<p>
-Although the need for <tt>rcutorture</tt> was no surprise,
-the current immense popularity of the Linux kernel is posing
-interesting&mdash;and perhaps unprecedented&mdash;validation
-challenges.
-To see this, keep in mind that there are well over one billion
-instances of the Linux kernel running today, given Android
-smartphones, Linux-powered televisions, and servers.
-This number can be expected to increase sharply with the advent of
-the celebrated Internet of Things.
-
-<p>
-Suppose that RCU contains a race condition that manifests on average
-once per million years of runtime.
-This bug will be occurring about three times per <i>day</i> across
-the installed base.
-RCU could simply hide behind hardware error rates, given that no one
-should really expect their smartphone to last for a million years.
-However, anyone taking too much comfort from this thought should
-consider the fact that in most jurisdictions, a successful multi-year
-test of a given mechanism, which might include a Linux kernel,
-suffices for a number of types of safety-critical certifications.
-In fact, rumor has it that the Linux kernel is already being used
-in production for safety-critical applications.
-I don't know about you, but I would feel quite bad if a bug in RCU
-killed someone.
-Which might explain my recent focus on validation and verification.
-
-<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
-
-<p>
-One of the more surprising things about RCU is that there are now
-no fewer than five <i>flavors</i>, or API families.
-In addition, the primary flavor that has been the sole focus up to
-this point has two different implementations, non-preemptible and
-preemptible.
-The other four flavors are listed below, with requirements for each
-described in a separate section.
-
-<ol>
-<li>	<a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
-<li>	<a href="#Sched Flavor">Sched Flavor</a>
-<li>	<a href="#Sleepable RCU">Sleepable RCU</a>
-<li>	<a href="#Tasks RCU">Tasks RCU</a>
-</ol>
-
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
-
-<p>
-The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
-hence the &ldquo;_bh&rdquo; abbreviations)
-flavor of RCU, or <i>RCU-bh</i>, was developed by
-Dipankar Sarma to provide a flavor of RCU that could withstand the
-network-based denial-of-service attacks researched by Robert
-Olsson.
-These attacks placed so much networking load on the system
-that some of the CPUs never exited softirq execution,
-which in turn prevented those CPUs from ever executing a context switch,
-which, in the RCU implementation of that time, prevented grace periods
-from ever ending.
-The result was an out-of-memory condition and a system hang.
-
-<p>
-The solution was the creation of RCU-bh, which does
-<tt>local_bh_disable()</tt>
-across its read-side critical sections, and which uses the transition
-from one type of softirq processing to another as a quiescent state
-in addition to context switch, idle, user mode, and offline.
-This means that RCU-bh grace periods can complete even when some of
-the CPUs execute in softirq indefinitely, thus allowing algorithms
-based on RCU-bh to withstand network-based denial-of-service attacks.
-
-<p>
-Because
-<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
-disable and re-enable softirq handlers, any attempt to start a softirq
-handlers during the
-RCU-bh read-side critical section will be deferred.
-In this case, <tt>rcu_read_unlock_bh()</tt>
-will invoke softirq processing, which can take considerable time.
-One can of course argue that this softirq overhead should be associated
-with the code following the RCU-bh read-side critical section rather
-than <tt>rcu_read_unlock_bh()</tt>, but the fact
-is that most profiling tools cannot be expected to make this sort
-of fine distinction.
-For example, suppose that a three-millisecond-long RCU-bh read-side
-critical section executes during a time of heavy networking load.
-There will very likely be an attempt to invoke at least one softirq
-handler during that three milliseconds, but any such invocation will
-be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
-This can of course make it appear at first glance as if
-<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
-includes
-<tt>rcu_read_lock_bh()</tt>,
-<tt>rcu_read_unlock_bh()</tt>,
-<tt>rcu_dereference_bh()</tt>,
-<tt>rcu_dereference_bh_check()</tt>,
-<tt>synchronize_rcu_bh()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>,
-<tt>call_rcu_bh()</tt>,
-<tt>rcu_barrier_bh()</tt>, and
-<tt>rcu_read_lock_bh_held()</tt>.
-
-<h3><a name="Sched Flavor">Sched Flavor</a></h3>
-
-<p>
-Before preemptible RCU, waiting for an RCU grace period had the
-side effect of also waiting for all pre-existing interrupt
-and NMI handlers.
-However, there are legitimate preemptible-RCU implementations that
-do not have this property, given that any point in the code outside
-of an RCU read-side critical section can be a quiescent state.
-Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
-RCU in that an RCU-sched grace period waits for for pre-existing
-interrupt and NMI handlers.
-In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
-APIs have identical implementations, while kernels built with
-<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
-
-<p>
-Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-disable and re-enable preemption, respectively.
-This means that if there was a preemption attempt during the
-RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
-will enter the scheduler, with all the latency and overhead entailed.
-Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
-as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
-However, the highest-priority task won't be preempted, so that task
-will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
-includes
-<tt>rcu_read_lock_sched()</tt>,
-<tt>rcu_read_unlock_sched()</tt>,
-<tt>rcu_read_lock_sched_notrace()</tt>,
-<tt>rcu_read_unlock_sched_notrace()</tt>,
-<tt>rcu_dereference_sched()</tt>,
-<tt>rcu_dereference_sched_check()</tt>,
-<tt>synchronize_sched()</tt>,
-<tt>synchronize_rcu_sched_expedited()</tt>,
-<tt>call_rcu_sched()</tt>,
-<tt>rcu_barrier_sched()</tt>, and
-<tt>rcu_read_lock_sched_held()</tt>.
-However, anything that disables preemption also marks an RCU-sched
-read-side critical section, including
-<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
-<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
-and so on.
-
-<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
-
-<p>
-For well over a decade, someone saying &ldquo;I need to block within
-an RCU read-side critical section&rdquo; was a reliable indication
-that this someone did not understand RCU.
-After all, if you are always blocking in an RCU read-side critical
-section, you can probably afford to use a higher-overhead synchronization
-mechanism.
-However, that changed with the advent of the Linux kernel's notifiers,
-whose RCU read-side critical
-sections almost never sleep, but sometimes need to.
-This resulted in the introduction of
-<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
-or <i>SRCU</i>.
-
-<p>
-SRCU allows different domains to be defined, with each such domain
-defined by an instance of an <tt>srcu_struct</tt> structure.
-A pointer to this structure must be passed in to each SRCU function,
-for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
-<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
-The key benefit of these domains is that a slow SRCU reader in one
-domain does not delay an SRCU grace period in some other domain.
-That said, one consequence of these domains is that read-side code
-must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
-to <tt>srcu_read_unlock()</tt>, for example, as follows:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-As noted above, it is legal to block within SRCU read-side critical sections,
-however, with great power comes great responsibility.
-If you block forever in one of a given domain's SRCU read-side critical
-sections, then that domain's grace periods will also be blocked forever.
-Of course, one good way to block forever is to deadlock, which can
-happen if any operation in a given domain's SRCU read-side critical
-section can block waiting, either directly or indirectly, for that domain's
-grace period to elapse.
-For example, this results in a self-deadlock:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 synchronize_srcu(&amp;ss);
- 6 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-However, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
-deadlock would still be possible.
-Furthermore, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
-and if an <tt>ss1</tt>-domain SRCU read-side critical section
-acquired another mutex that was held across as <tt>ss</tt>-domain
-<tt>synchronize_srcu()</tt>,
-deadlock would again be possible.
-Such a deadlock cycle could extend across an arbitrarily large number
-of different SRCU domains.
-Again, with great power comes great responsibility.
-
-<p>
-Unlike the other RCU flavors, SRCU read-side critical sections can
-run on idle and even offline CPUs.
-This ability requires that <tt>srcu_read_lock()</tt> and
-<tt>srcu_read_unlock()</tt> contain memory barriers, which means
-that SRCU readers will run a bit slower than would RCU readers.
-It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
-API, which, in combination with <tt>srcu_read_unlock()</tt>,
-guarantees a full memory barrier.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
-includes
-<tt>srcu_read_lock()</tt>,
-<tt>srcu_read_unlock()</tt>,
-<tt>srcu_dereference()</tt>,
-<tt>srcu_dereference_check()</tt>,
-<tt>synchronize_srcu()</tt>,
-<tt>synchronize_srcu_expedited()</tt>,
-<tt>call_srcu()</tt>,
-<tt>srcu_barrier()</tt>, and
-<tt>srcu_read_lock_held()</tt>.
-It also includes
-<tt>DEFINE_SRCU()</tt>,
-<tt>DEFINE_STATIC_SRCU()</tt>, and
-<tt>init_srcu_struct()</tt>
-APIs for defining and initializing <tt>srcu_struct</tt> structures.
-
-<h3><a name="Tasks RCU">Tasks RCU</a></h3>
-
-<p>
-Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
-binary rewriting required to install different types of probes.
-It would be good to be able to free old trampolines, which sounds
-like a job for some form of RCU.
-However, because it is necessary to be able to install a trace
-anywhere in the code, it is not possible to use read-side markers
-such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-In addition, it does not work to have these markers in the trampoline
-itself, because there would need to be instructions following
-<tt>rcu_read_unlock()</tt>.
-Although <tt>synchronize_rcu()</tt> would guarantee that execution
-reached the <tt>rcu_read_unlock()</tt>, it would not be able to
-guarantee that execution had completely left the trampoline.
-
-<p>
-The solution, in the form of
-<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
-is to have implicit
-read-side critical sections that are delimited by voluntary context
-switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched_rcu_qs()</tt>, and
-<tt>synchronize_rcu_tasks()</tt>.
-In addition, transitions to and from userspace execution also delimit
-tasks-RCU read-side critical sections.
-
-<p>
-The tasks-RCU API is quite compact, consisting only of
-<tt>call_rcu_tasks()</tt>,
-<tt>synchronize_rcu_tasks()</tt>, and
-<tt>rcu_barrier_tasks()</tt>.
-
-<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
-
-<p>
-One of the tricks that RCU uses to attain update-side scalability is
-to increase grace-period latency with increasing numbers of CPUs.
-If this becomes a serious problem, it will be necessary to rework the
-grace-period state machine so as to avoid the need for the additional
-latency.
-
-<p>
-Expedited grace periods scan the CPUs, so their latency and overhead
-increases with increasing numbers of CPUs.
-If this becomes a serious problem on large systems, it will be necessary
-to do some redesign to avoid this scalability problem.
-
-<p>
-RCU disables CPU hotplug in a few places, perhaps most notably in the
-expedited grace-period and <tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use expedited grace periods in CPU-hotplug
-notifiers, it will be necessary to avoid disabling CPU hotplug.
-This would introduce some complexity, so there had better be a <i>very</i>
-good reason.
-
-<p>
-The tradeoff between grace-period latency on the one hand and interruptions
-of other CPUs on the other hand may need to be re-examined.
-The desire is of course for zero grace-period latency as well as zero
-interprocessor interrupts undertaken during an expedited grace period
-operation.
-While this ideal is unlikely to be achievable, it is quite possible that
-further improvements can be made.
-
-<p>
-The multiprocessor implementations of RCU use a combining tree that
-groups CPUs so as to reduce lock contention and increase cache locality.
-However, this combining tree does not spread its memory across NUMA
-nodes nor does it align the CPU groups with hardware features such
-as sockets or cores.
-Such spreading and alignment is currently believed to be unnecessary
-because the hotpath read-side primitives do not access the combining
-tree, nor does <tt>call_rcu()</tt> in the common case.
-If you believe that your architecture needs such spreading and alignment,
-then your architecture should also benefit from the
-<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
-to the number of CPUs in a socket, NUMA node, or whatever.
-If the number of CPUs is too large, use a fraction of the number of
-CPUs.
-If the number of CPUs is a large prime number, well, that certainly
-is an &ldquo;interesting&rdquo; architectural choice!
-More flexible arrangements might be considered, but only if
-<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
-if the inadequacy has been demonstrated by a carefully run and
-realistic system-level workload.
-
-<p>
-Please note that arrangements that require RCU to remap CPU numbers will
-require extremely good demonstration of need and full exploration of
-alternatives.
-
-<p>
-There is an embarrassingly large number of flavors of RCU, and this
-number has been increasing over time.
-Perhaps it will be possible to combine some at some future date.
-
-<p>
-RCU's various kthreads are reasonably recent additions.
-It is quite likely that adjustments will be required to more gracefully
-handle extreme loads.
-It might also be necessary to be able to relate CPU utilization by
-RCU's kthreads and softirq handlers to the code that instigated this
-CPU utilization.
-For example, RCU callback overhead might be charged back to the
-originating <tt>call_rcu()</tt> instance, though probably not
-in production kernels.
-
-<h2><a name="Summary">Summary</a></h2>
-
-<p>
-This document has presented more than two decade's worth of RCU
-requirements.
-Given that the requirements keep changing, this will not be the last
-word on this subject, but at least it serves to get an important
-subset of the requirements set forth.
-
-<h2><a name="Acknowledgments">Acknowledgments</a></h2>
-
-I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
-Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
-Andy Lutomirski for their help in rendering
-this article human readable, and to Michelle Rankin for her support
-of this effort.
-Other contributions are acknowledged in the Linux kernel's git archive.
-The cartoon is copyright (c) 2013 by Melissa Broussard,
-and is provided
-under the terms of the Creative Commons Attribution-Share Alike 3.0
-United States license.
-
-<p>@@QQAL@@
-
-</body></html>

diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh
deleted file mode 100755
index d354f06..0000000
--- a/Documentation/RCU/Design/htmlqqz.sh
+++ /dev/null

@@ -1,108 +0,0 @@
-#!/bin/sh
-#
-# Usage: sh htmlqqz.sh file
-#
-# Extracts and converts quick quizzes in a proto-HTML document file.htmlx.
-# Commands, all of which must be on a line by themselves:
-#
-#	"<p>@@QQ@@": Start of a quick quiz.
-#	"<p>@@QQA@@": Start of a quick-quiz answer.
-#	"<p>@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz.
-#	"<p>@@QQAL@@": Place to put quick-quiz answer list.
-#
-# Places the result in file.html.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (c) 2013 Paul E. McKenney, IBM Corporation.
-
-fn=$1
-if test ! -r $fn.htmlx
-then
-	echo "Error: $fn.htmlx unreadable."
-	exit 1
-fi
-
-echo "<!-- DO NOT HAND EDIT. -->" > $fn.html
-echo "<!-- Instead, edit $fn.htmlx and run 'sh htmlqqz.sh $fn' -->" >> $fn.html
-awk < $fn.htmlx >> $fn.html '
-
-state == "" && $1 != "<p>@@QQ@@" && $1 != "<p>@@QQAL@@" {
-	print $0;
-	if ($0 ~ /^<p>@@QQ/)
-		print "Bad Quick Quiz command: " NR " (expected <p>@@QQ@@ or <p>@@QQAL@@)." > "/dev/stderr"
-	next;
-}
-
-state == "" && $1 == "<p>@@QQ@@" {
-	qqn++;
-	qqlineno = NR;
-	haveqq = 1;
-	state = "qq";
-	print "<p><a name=\"Quick Quiz " qqn "\"><b>Quick Quiz " qqn "</b>:</a>"
-	next;
-}
-
-state == "qq" && $1 != "<p>@@QQA@@" {
-	qq[qqn] = qq[qqn] $0 "\n";
-	print $0
-	if ($0 ~ /^<p>@@QQ/)
-		print "Bad Quick Quiz command: " NR ". (expected <p>@@QQA@@)" > "/dev/stderr"
-	next;
-}
-
-state == "qq" && $1 == "<p>@@QQA@@" {
-	state = "qqa";
-	print "<br><a href=\"#qq" qqn "answer\">Answer</a>"
-	next;
-}
-
-state == "qqa" && $1 != "<p>@@QQE@@" {
-	qqa[qqn] = qqa[qqn] $0 "\n";
-	if ($0 ~ /^<p>@@QQ/)
-		print "Bad Quick Quiz command: " NR " (expected <p>@@QQE@@)." > "/dev/stderr"
-	next;
-}
-
-state == "qqa" && $1 == "<p>@@QQE@@" {
-	state = "";
-	next;
-}
-
-state == "" && $1 == "<p>@@QQAL@@" {
-	haveqq = "";
-	print "<h3><a name=\"Answers to Quick Quizzes\">"
-	print "Answers to Quick Quizzes</a></h3>"
-	print "";
-	for (i = 1; i <= qqn; i++) {
-		print "<a name=\"qq" i "answer\"></a>"
-		print "<p><b>Quick Quiz " i "</b>:"
-		print qq[i];
-		print "";
-		print "</p><p><b>Answer</b>:"
-		print qqa[i];
-		print "";
-		print "</p><p><a href=\"#Quick%20Quiz%20" i "\"><b>Back to Quick Quiz " i "</b>.</a>"
-		print "";
-	}
-	next;
-}
-
-END {
-	if (state != "")
-		print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr"
-	else if (haveqq)
-		print "Missing \"<p>@@QQAL@@\", no Quick Quiz." > "/dev/stderr"
-}'

diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index ec6998b..00a3a38 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt

@@ -237,17 +237,17 @@
 
 The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
 
-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
 
 These fields are as follows:
 
 o	"s" is the sequence number, with an odd number indicating that
 	an expedited grace period is in progress.
 
-o	"wd0", "wd1", "wd2", and "wd3" are the number of times that an
-	attempt to start an expedited grace period found that someone
-	else had completed an expedited grace period that satisfies the
-	attempted request.  "Our work is done."
+o	"wd1", "wd2", and "wd3" are the number of times that an attempt
+	to start an expedited grace period found that someone else had
+	completed an expedited grace period that satisfies the attempted
+	request.  "Our work is done."
 
 o	"n" is number of times that a concurrent CPU-hotplug operation
 	forced a fallback to a normal grace period.

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index dc49c67..111770f 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt

@@ -681,22 +681,30 @@
 RCU is analogous to reader-writer locking.  The following unified
 diff shows how closely related RCU and reader-writer locking can be.
 
+	@@ -5,5 +5,5 @@ struct el {
+	 	int data;
+	 	/* Other data fields */
+	 };
+	-rwlock_t listmutex;
+	+spinlock_t listmutex;
+	 struct el head;
+
 	@@ -13,15 +14,15 @@
 		struct list_head *lp;
 		struct el *p;
 
-	-	read_lock();
+	-	read_lock(&listmutex);
 	-	list_for_each_entry(p, head, lp) {
 	+	rcu_read_lock();
 	+	list_for_each_entry_rcu(p, head, lp) {
 			if (p->key == key) {
 				*result = p->data;
-	-			read_unlock();
+	-			read_unlock(&listmutex);
 	+			rcu_read_unlock();
 				return 1;
 			}
 		}
-	-	read_unlock();
+	-	read_unlock(&listmutex);
 	+	rcu_read_unlock();
 		return 0;
 	 }
@@ -732,7 +740,7 @@
  5   int data;                          5   int data;
  6   /* Other data fields */            6   /* Other data fields */
  7 };                                   7 };
- 8 spinlock_t listmutex;                8 spinlock_t listmutex;
+ 8 rwlock_t listmutex;                  8 spinlock_t listmutex;
  9 struct el head;                      9 struct el head;
 
  1 int search(long key, int *result)    1 int search(long key, int *result)
@@ -740,15 +748,15 @@
  3   struct list_head *lp;              3   struct list_head *lp;
  4   struct el *p;                      4   struct el *p;
  5                                      5
- 6   read_lock();                       6   rcu_read_lock();
+ 6   read_lock(&listmutex);             6   rcu_read_lock();
  7   list_for_each_entry(p, head, lp) { 7   list_for_each_entry_rcu(p, head, lp) {
  8     if (p->key == key) {             8     if (p->key == key) {
  9       *result = p->data;             9       *result = p->data;
-10       read_unlock();                10       rcu_read_unlock();
+10       read_unlock(&listmutex);      10       rcu_read_unlock();
 11       return 1;                     11       return 1;
 12     }                               12     }
 13   }                                 13   }
-14   read_unlock();                    14   rcu_read_unlock();
+14   read_unlock(&listmutex);          14   rcu_read_unlock();
 15   return 0;                         15   return 0;
 16 }                                   16 }
 

diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
index 35c3f54..eb651a6 100644
--- a/Documentation/acpi/initrd_table_override.txt
+++ b/Documentation/acpi/initrd_table_override.txt

@@ -1,5 +1,5 @@
-Overriding ACPI tables via initrd
-=================================
+Upgrading ACPI tables via initrd
+================================
 
 1) Introduction (What is this about)
 2) What is this for
@@ -9,12 +9,14 @@
 1) What is this about
 ---------------------
 
-If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible to
-override nearly any ACPI table provided by the BIOS with an instrumented,
-modified one.
+If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
+upgrade the ACPI execution environment that is defined by the ACPI tables
+via upgrading the ACPI tables provided by the BIOS with an instrumented,
+modified, more recent version one, or installing brand new ACPI tables.
 
-For a full list of ACPI tables that can be overridden, take a look at
-the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/osl.c
+For a full list of ACPI tables that can be upgraded/installed, take a look
+at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
+drivers/acpi/tables.c.
 All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
 be overridable, except:
    - ACPI_SIG_RSDP (has a signature of 6 bytes)
@@ -25,17 +27,20 @@
 2) What is this for
 -------------------
 
-Please keep in mind that this is a debug option.
-ACPI tables should not get overridden for productive use.
-If BIOS ACPI tables are overridden the kernel will get tainted with the
-TAINT_OVERRIDDEN_ACPI_TABLE flag.
-Complain to your platform/BIOS vendor if you find a bug which is so sever
-that a workaround is not accepted in the Linux kernel.
+Complain to your platform/BIOS vendor if you find a bug which is so severe
+that a workaround is not accepted in the Linux kernel. And this facility
+allows you to upgrade the buggy tables before your platform/BIOS vendor
+releases an upgraded BIOS binary.
 
-Still, it can and should be enabled in any kernel, because:
-  - There is no functional change with not instrumented initrds
-  - It provides a powerful feature to easily debug and test ACPI BIOS table
-    compatibility with the Linux kernel.
+This facility can be used by platform/BIOS vendors to provide a Linux
+compatible environment without modifying the underlying platform firmware.
+
+This facility also provides a powerful feature to easily debug and test
+ACPI BIOS table compatibility with the Linux kernel by modifying old
+platform provided ACPI tables or inserting new ACPI tables.
+
+It can and should be enabled in any kernel because there is no functional
+change with not instrumented initrds.
 
 
 3) How does it work
@@ -50,23 +55,31 @@
 # For example add this statement into a _PRT (PCI Routing Table) function
 # of the DSDT:
 Store("HELLO WORLD", debug)
+# And increase the OEM Revision. For example, before modification:
+DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
+# After modification:
+DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
 iasl -sa dsdt.dsl
 # Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the
-# cpio archive.
-# The uncompressed cpio archive must be the first.
-# Other, typically compressed cpio archives, must be
-# concatenated on top of the uncompressed one.
+# They must be put into a /kernel/firmware/acpi directory inside the cpio
+# archive. Note that if the table put here matches a platform table
+# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
+# with a more recent OEM Revision, the platform table will be upgraded by
+# this table. If the table put here doesn't match a platform table
+# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
+# ID), this table will be appended.
 mkdir -p kernel/firmware/acpi
 cp dsdt.aml kernel/firmware/acpi
-# A maximum of: #define ACPI_OVERRIDE_TABLES 10
-# tables are  currently allowed (see osl.c):
+# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
+# (see osl.c):
 iasl -sa facp.dsl
 iasl -sa ssdt1.dsl
 cp facp.aml kernel/firmware/acpi
 cp ssdt1.aml kernel/firmware/acpi
-# Create the uncompressed cpio archive and concatenate the original initrd
-# on top:
+# The uncompressed cpio archive must be the first. Other, typically
+# compressed cpio archives, must be concatenated on top of the uncompressed
+# one. Following command creates the uncompressed cpio archive and
+# concatenates the original initrd on top:
 find kernel | cpio -H newc --create > /boot/instrumented_initrd
 cat /boot/initrd >>/boot/instrumented_initrd
 # reboot with increased acpi debug level, e.g. boot params:

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 56d6d8b..8d0df62 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt

@@ -132,6 +132,10 @@
 physical offset of the Image so it is recommended that the Image be
 placed as close as possible to the start of system RAM.
 
+If an initrd/initramfs is passed to the kernel at boot, it must reside
+entirely within a 1 GB aligned physical memory window of up to 32 GB in
+size that fully covers the kernel Image as well.
+
 Any memory described to the kernel (even that below the start of the
 image) which is not marked as reserved from the kernel (e.g., with a
 memreserve region in the device tree) will be considered as available to

diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
index 885f93d..5a6b160 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt

@@ -3,6 +3,7 @@
 The ECC Manager counts and corrects single bit errors and counts/handles
 double bit errors which are uncorrectable.
 
+Cyclone5 and Arria5 ECC Manager
 Required Properties:
 - compatible : Should be "altr,socfpga-ecc-manager"
 - #address-cells: must be 1
@@ -47,3 +48,52 @@
 			interrupts = <0 178 1>, <0 179 1>;
 		};
 	};
+
+Arria10 SoCFPGA ECC Manager
+The Arria10 SoC ECC Manager handles the IRQs for each peripheral
+in a shared register instead of individual IRQs like the Cyclone5
+and Arria5. Therefore the device tree is different as well.
+
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ecc-manager"
+- altr,sysgr-syscon : phandle to Arria10 System Manager Block
+	containing the ECC manager registers.
+- #address-cells: must be 1
+- #size-cells: must be 1
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt. Note the rising edge type.
+- ranges : standard definition, should translate from local addresses
+
+Subcomponents:
+
+L2 Cache ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+
+On-Chip RAM ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ocram-ecc"
+- reg        : Address and size for ECC block registers.
+
+Example:
+
+	eccmgr: eccmgr@ffd06000 {
+		compatible = "altr,socfpga-a10-ecc-manager";
+		altr,sysmgr-syscon = <&sysmgr>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
+			     <0 0 IRQ_TYPE_LEVEL_HIGH>;
+		ranges;
+
+		l2-ecc@ffd06010 {
+			compatible = "altr,socfpga-a10-l2-ecc";
+			reg = <0xffd06010 0x4>;
+		};
+
+		ocram-ecc@ff8c3000 {
+			compatible = "altr,socfpga-a10-ocram-ecc";
+			reg = <0xff8c3000 0x90>;
+		};
+	};

diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 6eb73be..74d5417 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt

@@ -22,10 +22,11 @@
 	"arm,arm11mpcore-pmu"
 	"arm,arm1176-pmu"
 	"arm,arm1136-pmu"
+	"brcm,vulcan-pmu"
+	"cavium,thunder-pmu"
 	"qcom,scorpion-pmu"
 	"qcom,scorpion-mp-pmu"
 	"qcom,krait-pmu"
-	"cavium,thunder-pmu"
 - interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
                interrupt (PPI) then 1 interrupt should be specified.
 

diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
new file mode 100644
index 0000000..fd459f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt

@@ -0,0 +1,26 @@
+
+* Samsung Exynos NoC (Network on Chip) Probe device
+
+The Samsung Exynos542x SoC has NoC (Network on Chip) Probe for NoC bus.
+NoC provides the primitive values to get the performance data. The packets
+that the Network on Chip (NoC) probes detects are transported over
+the network infrastructure to observer units. You can configure probes to
+capture packets with header or data on the data request response network,
+or as traffic debug or statistic collectors. Exynos542x bus has multiple
+NoC probes to provide bandwidth information about behavior of the SoC
+that you can use while analyzing system performance.
+
+Required properties:
+- compatible: Should be "samsung,exynos5420-nocp"
+- reg: physical base address of each NoC Probe and length of memory mapped region.
+
+Optional properties:
+- clock-names : the name of clock used by the NoC Probe, "nocp"
+- clocks : phandles for clock specified in "clock-names" property
+
+Example : NoC Probe nodes in Device Tree are listed below.
+
+	nocp_mem0_0: nocp@10CA1000 {
+		compatible = "samsung,exynos5420-nocp";
+		reg = <0x10CA1000 0x200>;
+	};

diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
new file mode 100644
index 0000000..d3ec8e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt

@@ -0,0 +1,409 @@
+* Generic Exynos Bus frequency device
+
+The Samsung Exynos SoC has many buses for data transfer between DRAM
+and sub-blocks in SoC. Most Exynos SoCs share the common architecture
+for buses. Generally, each bus of Exynos SoC includes a source clock
+and a power line, which are able to change the clock frequency
+of the bus in runtime. To monitor the usage of each bus in runtime,
+the driver uses the PPMU (Platform Performance Monitoring Unit), which
+is able to measure the current load of sub-blocks.
+
+The Exynos SoC includes the various sub-blocks which have the each AXI bus.
+The each AXI bus has the owned source clock but, has not the only owned
+power line. The power line might be shared among one more sub-blocks.
+So, we can divide into two type of device as the role of each sub-block.
+There are two type of bus devices as following:
+- parent bus device
+- passive bus device
+
+Basically, parent and passive bus device share the same power line.
+The parent bus device can only change the voltage of shared power line
+and the rest bus devices (passive bus device) depend on the decision of
+the parent bus device. If there are three blocks which share the VDD_xxx
+power line, Only one block should be parent device and then the rest blocks
+should depend on the parent device as passive device.
+
+	VDD_xxx |--- A block (parent)
+		|--- B block (passive)
+		|--- C block (passive)
+
+There are a little different composition among Exynos SoC because each Exynos
+SoC has different sub-blocks. Therefore, such difference should be specified
+in devicetree file instead of each device driver. In result, this driver
+is able to support the bus frequency for all Exynos SoCs.
+
+Required properties for all bus devices:
+- compatible: Should be "samsung,exynos-bus".
+- clock-names : the name of clock used by the bus, "bus".
+- clocks : phandles for clock specified in "clock-names" property.
+- operating-points-v2: the OPP table including frequency/voltage information
+  to support DVFS (Dynamic Voltage/Frequency Scaling) feature.
+
+Required properties only for parent bus device:
+- vdd-supply: the regulator to provide the buses with the voltage.
+- devfreq-events: the devfreq-event device to monitor the current utilization
+  of buses.
+
+Required properties only for passive bus device:
+- devfreq: the parent bus device.
+
+Optional properties only for parent bus device:
+- exynos,saturation-ratio: the percentage value which is used to calibrate
+			the performance count against total cycle count.
+- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
+			which is used to calculate the max voltage.
+
+Detailed correlation between sub-blocks and power line according to Exynos SoC:
+- In case of Exynos3250, there are two power line as following:
+	VDD_MIF |--- DMC
+
+	VDD_INT |--- LEFTBUS (parent device)
+		|--- PERIL
+		|--- MFC
+		|--- G3D
+		|--- RIGHTBUS
+		|--- PERIR
+		|--- FSYS
+		|--- LCD0
+		|--- PERIR
+		|--- ISP
+		|--- CAM
+
+- In case of Exynos4210, there is one power line as following:
+	VDD_INT |--- DMC (parent device)
+		|--- LEFTBUS
+		|--- PERIL
+		|--- MFC(L)
+		|--- G3D
+		|--- TV
+		|--- LCD0
+		|--- RIGHTBUS
+		|--- PERIR
+		|--- MFC(R)
+		|--- CAM
+		|--- FSYS
+		|--- GPS
+		|--- LCD0
+		|--- LCD1
+
+- In case of Exynos4x12, there are two power line as following:
+	VDD_MIF |--- DMC
+
+	VDD_INT |--- LEFTBUS (parent device)
+		|--- PERIL
+		|--- MFC(L)
+		|--- G3D
+		|--- TV
+		|--- IMAGE
+		|--- RIGHTBUS
+		|--- PERIR
+		|--- MFC(R)
+		|--- CAM
+		|--- FSYS
+		|--- GPS
+		|--- LCD0
+		|--- ISP
+
+- In case of Exynos5422, there are two power line as following:
+	VDD_MIF |--- DREX 0 (parent device, DRAM EXpress controller)
+	        |--- DREX 1
+
+	VDD_INT |--- NoC_Core (parent device)
+		|--- G2D
+		|--- G3D
+		|--- DISP1
+		|--- NoC_WCORE
+		|--- GSCL
+		|--- MSCL
+		|--- ISP
+		|--- MFC
+		|--- GEN
+		|--- PERIS
+		|--- PERIC
+		|--- FSYS
+		|--- FSYS2
+
+Example1:
+	Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
+	power line (regulator). The MIF (Memory Interface) AXI bus is used to
+	transfer data between DRAM and CPU and uses the VDD_MIF regulator.
+
+	- MIF (Memory Interface) block
+	: VDD_MIF |--- DMC (Dynamic Memory Controller)
+
+	- INT (Internal) block
+	: VDD_INT |--- LEFTBUS (parent device)
+		  |--- PERIL
+		  |--- MFC
+		  |--- G3D
+		  |--- RIGHTBUS
+		  |--- FSYS
+		  |--- LCD0
+		  |--- PERIR
+		  |--- ISP
+		  |--- CAM
+
+	- MIF bus's frequency/voltage table
+	-----------------------
+	|Lv| Freq   | Voltage |
+	-----------------------
+	|L1| 50000  |800000   |
+	|L2| 100000 |800000   |
+	|L3| 134000 |800000   |
+	|L4| 200000 |825000   |
+	|L5| 400000 |875000   |
+	-----------------------
+
+	- INT bus's frequency/voltage table
+	----------------------------------------------------------
+	|Block|LEFTBUS|RIGHTBUS|MCUISP |ISP    |PERIL  ||VDD_INT |
+	| name|       |LCD0    |       |       |       ||        |
+	|     |       |FSYS    |       |       |       ||        |
+	|     |       |MFC     |       |       |       ||        |
+	----------------------------------------------------------
+	|Mode |*parent|passive |passive|passive|passive||        |
+	----------------------------------------------------------
+	|Lv   |Frequency                               ||Voltage |
+	----------------------------------------------------------
+	|L1   |50000  |50000   |50000  |50000  |50000  ||900000  |
+	|L2   |80000  |80000   |80000  |80000  |80000  ||900000  |
+	|L3   |100000 |100000  |100000 |100000 |100000 ||1000000 |
+	|L4   |134000 |134000  |200000 |200000 |       ||1000000 |
+	|L5   |200000 |200000  |400000 |300000 |       ||1000000 |
+	----------------------------------------------------------
+
+Example2 :
+	The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi
+	is listed below:
+
+	bus_dmc: bus_dmc {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu_dmc CLK_DIV_DMC>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_dmc_opp_table>;
+		status = "disabled";
+	};
+
+	bus_dmc_opp_table: opp_table1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+			opp-microvolt = <800000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+			opp-microvolt = <800000>;
+		};
+		opp@134000000 {
+			opp-hz = /bits/ 64 <134000000>;
+			opp-microvolt = <800000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+			opp-microvolt = <825000>;
+		};
+		opp@400000000 {
+			opp-hz = /bits/ 64 <400000000>;
+			opp-microvolt = <875000>;
+		};
+	};
+
+	bus_leftbus: bus_leftbus {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_GDL>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_rightbus: bus_rightbus {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_GDR>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_lcd0: bus_lcd0 {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_160>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_fsys: bus_fsys {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_200>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_mcuisp: bus_mcuisp {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_400_MCUISP>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_mcuisp_opp_table>;
+		status = "disabled";
+	};
+
+	bus_isp: bus_isp {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_266>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_isp_opp_table>;
+		status = "disabled";
+	};
+
+	bus_peril: bus_peril {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_DIV_ACLK_100>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_peril_opp_table>;
+		status = "disabled";
+	};
+
+	bus_mfc: bus_mfc {
+		compatible = "samsung,exynos-bus";
+		clocks = <&cmu CLK_SCLK_MFC>;
+		clock-names = "bus";
+		operating-points-v2 = <&bus_leftbus_opp_table>;
+		status = "disabled";
+	};
+
+	bus_leftbus_opp_table: opp_table1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+			opp-microvolt = <900000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+			opp-microvolt = <900000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+			opp-microvolt = <1000000>;
+		};
+		opp@134000000 {
+			opp-hz = /bits/ 64 <134000000>;
+			opp-microvolt = <1000000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+			opp-microvolt = <1000000>;
+		};
+	};
+
+	bus_mcuisp_opp_table: opp_table2 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+		};
+		opp@400000000 {
+			opp-hz = /bits/ 64 <400000000>;
+		};
+	};
+
+	bus_isp_opp_table: opp_table3 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+		};
+		opp@200000000 {
+			opp-hz = /bits/ 64 <200000000>;
+		};
+		opp@300000000 {
+			opp-hz = /bits/ 64 <300000000>;
+		};
+	};
+
+	bus_peril_opp_table: opp_table4 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp@50000000 {
+			opp-hz = /bits/ 64 <50000000>;
+		};
+		opp@80000000 {
+			opp-hz = /bits/ 64 <80000000>;
+		};
+		opp@100000000 {
+			opp-hz = /bits/ 64 <100000000>;
+		};
+	};
+
+
+	Usage case to handle the frequency and voltage of bus on runtime
+	in exynos3250-rinato.dts is listed below:
+
+	&bus_dmc {
+		devfreq-events = <&ppmu_dmc0_3>, <&ppmu_dmc1_3>;
+		vdd-supply = <&buck1_reg>;	/* VDD_MIF */
+		status = "okay";
+	};
+
+	&bus_leftbus {
+		devfreq-events = <&ppmu_leftbus_3>, <&ppmu_rightbus_3>;
+		vdd-supply = <&buck3_reg>;
+		status = "okay";
+	};
+
+	&bus_rightbus {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_lcd0 {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_fsys {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_mcuisp {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_isp {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_peril {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};
+
+	&bus_mfc {
+		devfreq = <&bus_leftbus>;
+		status = "okay";
+	};

diff --git a/Documentation/devicetree/bindings/hwmon/ltc2978.txt b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
index a7afbf6..bf2a47b 100644
--- a/Documentation/devicetree/bindings/hwmon/ltc2978.txt
+++ b/Documentation/devicetree/bindings/hwmon/ltc2978.txt

@@ -13,6 +13,7 @@
   * "lltc,ltc3886"
   * "lltc,ltc3887"
   * "lltc,ltm2987"
+  * "lltc,ltm4675"
   * "lltc,ltm4676"
 - reg: I2C slave address
 

diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt
index 6841984..af10678 100644
--- a/Documentation/devicetree/bindings/leds/common.txt
+++ b/Documentation/devicetree/bindings/leds/common.txt

@@ -37,6 +37,9 @@
                      property is mandatory for the LEDs in the non-flash modes
                      (e.g. torch or indicator).
 
+- panic-indicator : This property specifies that the LED should be used,
+		    if at all possible, as a panic indicator.
+
 Required properties for flash LED child nodes:
 - flash-max-microamp : Maximum flash LED supply current in microamperes.
 - flash-max-timeout-us : Maximum timeout in microseconds after which the flash

diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt
index fea1ebf..cbbeb18 100644
--- a/Documentation/devicetree/bindings/leds/leds-gpio.txt
+++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt

@@ -23,6 +23,8 @@
   property is not present.
 - retain-state-suspended: (optional) The suspend state can be retained.Such
   as charge-led gpio.
+- panic-indicator : (optional)
+  see Documentation/devicetree/bindings/leds/common.txt
 
 Examples:
 

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
index 18d950d..88faa91 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt

@@ -38,7 +38,7 @@
 - bus-width:		Number of data lines.
 			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
 
-- max-frequency: 	Can be 200MHz, 100Mz or 50MHz (default) and used for
+- max-frequency:	Can be 200MHz, 100Mz or 50MHz (default) and used for
 			configuring the CCONFIG3 in the mmcss.
 			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
 
@@ -48,7 +48,7 @@
 - vqmmc-supply:		Phandle to the regulator dt node, mentioned as the vcc/vdd
 			supply in eMMC/SD specs.
 
-- sd-uhs--sdr50:	To enable the SDR50 in the mmcss.
+- sd-uhs-sdr50:	To enable the SDR50 in the mmcss.
 			See:  Documentation/devicetree/bindings/mmc/mmc.txt.
 
 - sd-uhs-sdr104:	To enable the SDR104 in the mmcss.

diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 7fb746d..0f610d4 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt

@@ -26,3 +26,6 @@
 
 Optional properties:
 - toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
+- pinctrl-names: should be "default", "state_uhs"
+- pinctrl-0: should contain default/high speed pin ctrl
+- pinctrl-1: should contain uhs mode pin ctrl

diff --git a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
index 8babdaa..6d1b797 100644
--- a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
+++ b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt

@@ -12,6 +12,12 @@
 
 - vmmc-supply:	a phandle of a regulator, supplying Vcc to the card
 - vqmmc-supply:	a phandle of a regulator, supplying VccQ to the card
+- pinctrl-names: Can contain a "default" entry and a "state_uhs"
+                 entry. The state_uhs entry is used together with the default
+                 entry when the board requires distinct settings for UHS speeds.
+
+- pinctrl-N: One property for each name listed in pinctrl-names, see
+             ../pinctrl/pinctrl-bindings.txt.
 
 Additionally any standard mmc bindings from mmc.txt can be used.
 

diff --git a/Documentation/devicetree/bindings/numa.txt b/Documentation/devicetree/bindings/numa.txt
new file mode 100644
index 0000000..21b3505
--- /dev/null
+++ b/Documentation/devicetree/bindings/numa.txt

@@ -0,0 +1,275 @@
+==============================================================================
+NUMA binding description.
+==============================================================================
+
+==============================================================================
+1 - Introduction
+==============================================================================
+
+Systems employing a Non Uniform Memory Access (NUMA) architecture contain
+collections of hardware resources including processors, memory, and I/O buses,
+that comprise what is commonly known as a NUMA node.
+Processor accesses to memory within the local NUMA node is generally faster
+than processor accesses to memory outside of the local NUMA node.
+DT defines interfaces that allow the platform to convey NUMA node
+topology information to OS.
+
+==============================================================================
+2 - numa-node-id
+==============================================================================
+
+For the purpose of identification, each NUMA node is associated with a unique
+token known as a node id. For the purpose of this binding
+a node id is a 32-bit integer.
+
+A device node is associated with a NUMA node by the presence of a
+numa-node-id property which contains the node id of the device.
+
+Example:
+	/* numa node 0 */
+	numa-node-id = <0>;
+
+	/* numa node 1 */
+	numa-node-id = <1>;
+
+==============================================================================
+3 - distance-map
+==============================================================================
+
+The optional device tree node distance-map describes the relative
+distance (memory latency) between all numa nodes.
+
+- compatible : Should at least contain "numa-distance-map-v1".
+
+- distance-matrix
+  This property defines a matrix to describe the relative distances
+  between all numa nodes.
+  It is represented as a list of node pairs and their relative distance.
+
+  Note:
+	1. Each entry represents distance from first node to second node.
+	The distances are equal in either direction.
+	2. The distance from a node to self (local distance) is represented
+	with value 10 and all internode distance should be represented with
+	a value greater than 10.
+	3. distance-matrix should have entries in lexicographical ascending
+	order of nodes.
+	4. There must be only one device node distance-map which must
+	reside in the root node.
+	5. If the distance-map node is not present, a default
+	distance-matrix is used.
+
+Example:
+	4 nodes connected in mesh/ring topology as below,
+
+		0_______20______1
+		|               |
+		|               |
+		20             20
+		|               |
+		|               |
+		|_______________|
+		3       20      2
+
+	if relative distance for each hop is 20,
+	then internode distance would be,
+	      0 -> 1 = 20
+	      1 -> 2 = 20
+	      2 -> 3 = 20
+	      3 -> 0 = 20
+	      0 -> 2 = 40
+	      1 -> 3 = 40
+
+     and dt presentation for this distance matrix is,
+
+		distance-map {
+			 compatible = "numa-distance-map-v1";
+			 distance-matrix = <0 0  10>,
+					   <0 1  20>,
+					   <0 2  40>,
+					   <0 3  20>,
+					   <1 0  20>,
+					   <1 1  10>,
+					   <1 2  20>,
+					   <1 3  40>,
+					   <2 0  40>,
+					   <2 1  20>,
+					   <2 2  10>,
+					   <2 3  20>,
+					   <3 0  20>,
+					   <3 1  40>,
+					   <3 2  20>,
+					   <3 3  10>;
+		};
+
+==============================================================================
+4 - Example dts
+==============================================================================
+
+Dual socket system consists of 2 boards connected through ccn bus and
+each board having one socket/soc of 8 cpus, memory and pci bus.
+
+	memory@c00000 {
+		device_type = "memory";
+		reg = <0x0 0xc00000 0x0 0x80000000>;
+		/* node 0 */
+		numa-node-id = <0>;
+	};
+
+	memory@10000000000 {
+		device_type = "memory";
+		reg = <0x100 0x0 0x0 0x80000000>;
+		/* node 1 */
+		numa-node-id = <1>;
+	};
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			/* node 0 */
+			numa-node-id = <0>;
+		};
+		cpu@1 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@2 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x2>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@3 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x3>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@4 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x4>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@5 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x5>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@6 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x6>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@7 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x7>;
+			enable-method = "psci";
+			numa-node-id = <0>;
+		};
+		cpu@8 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x8>;
+			enable-method = "psci";
+			/* node 1 */
+			numa-node-id = <1>;
+		};
+		cpu@9 {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0x9>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+		cpu@a {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0xa>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+		cpu@b {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0xb>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+		cpu@c {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0xc>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+		cpu@d {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0xd>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+		cpu@e {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0xe>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+		cpu@f {
+			device_type = "cpu";
+			compatible =  "arm,armv8";
+			reg = <0x0 0xf>;
+			enable-method = "psci";
+			numa-node-id = <1>;
+		};
+	};
+
+	pcie0: pcie0@848000000000 {
+		compatible = "arm,armv8";
+		device_type = "pci";
+		bus-range = <0 255>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0x8480 0x00000000 0 0x10000000>;  /* Configuration space */
+		ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>;
+		/* node 0 */
+		numa-node-id = <0>;
+        };
+
+	pcie1: pcie1@948000000000 {
+		compatible = "arm,armv8";
+		device_type = "pci";
+		bus-range = <0 255>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		reg = <0x9480 0x00000000 0 0x10000000>;  /* Configuration space */
+		ranges = <0x03000000 0x9010 0x00000000 0x9010 0x00000000 0x70 0x00000000>;
+		/* node 1 */
+		numa-node-id = <1>;
+        };
+
+	distance-map {
+		compatible = "numa-distance-map-v1";
+		distance-matrix = <0 0 10>,
+				  <0 1 20>,
+				  <1 1 10>;
+	};

diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index c84fb47..d23dc00 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt

@@ -37,8 +37,10 @@
   - "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
   - "rockchip,rk3399-io-voltage-domain" for rk3399
   - "rockchip,rk3399-pmu-io-voltage-domain" for rk3399 pmu-domains
-- rockchip,grf: phandle to the syscon managing the "general register files"
 
+Deprecated properties:
+- rockchip,grf: phandle to the syscon managing the "general register files"
+    Systems should move the io-domains to a sub-node of the grf simple-mfd.
 
 You specify supplies using the standard regulator bindings by including
 a phandle the relevant regulator.  All specified supplies must be able

diff --git a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
index f80ea2f..c2c68fc 100644
--- a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt

@@ -32,6 +32,13 @@
 
 Enhanced transient response (ETR) will affect the configuration of CKADV.
 
+-junction-warn-millicelsius: u32, junction warning temperature threshold
+		in millicelsius. If die temperature crosses this level then
+		device generates the warning interrupts.
+
+Please note that thermal functionality is only supported on MAX77621. The
+supported threshold warning temperature for MAX77621 are 120 degC and 140 degC.
+
 Example:
 
 	max8973@1b {

diff --git a/Documentation/devicetree/bindings/regulator/pv88080.txt b/Documentation/devicetree/bindings/regulator/pv88080.txt
new file mode 100644
index 0000000..38a6142
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/pv88080.txt

@@ -0,0 +1,49 @@
+* Powerventure Semiconductor PV88080 Voltage Regulator
+
+Required properties:
+- compatible: "pvs,pv88080".
+- reg: I2C slave address, usually 0x49.
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+  device. Each sub-node is identified using the node's name, with valid
+  values listed below. The content of each sub-node is defined by the
+  standard binding for regulators; see regulator.txt.
+  BUCK1, BUCK2, and BUCK3.
+
+Optional properties:
+- Any optional property defined in regulator.txt
+
+Example
+
+	pmic: pv88080@49 {
+		compatible = "pvs,pv88080";
+		reg = <0x49>;
+		interrupt-parent = <&gpio>;
+		interrupts = <24 24>;
+
+		regulators {
+			BUCK1 {
+				regulator-name = "buck1";
+				regulator-min-microvolt = < 600000>;
+				regulator-max-microvolt = <1393750>;
+				regulator-min-microamp 	= < 220000>;
+				regulator-max-microamp 	= <7040000>;
+			};
+
+			BUCK2 {
+				regulator-name = "buck2";
+				regulator-min-microvolt = < 600000>;
+				regulator-max-microvolt = <1393750>;
+				regulator-min-microamp 	= <1496000>;
+				regulator-max-microamp 	= <4189000>;
+			};
+
+			BUCK3 {
+				regulator-name = "buck3";
+				regulator-min-microvolt = <1400000>;
+				regulator-max-microvolt = <2193750>;
+				regulator-min-microamp 	= <1496000>;
+				regulator-max-microamp 	= <4189000>;
+			};
+		};
+	};

diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index d00bfd8..46c6f3e 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt

@@ -7,6 +7,7 @@
 			"qcom,pm8841-regulators"
 			"qcom,pm8916-regulators"
 			"qcom,pm8941-regulators"
+			"qcom,pm8994-regulators"
 
 - interrupts:
 	Usage: optional
@@ -68,6 +69,37 @@
 	Definition: Reference to regulator supplying the input pin, as
 		    described in the data sheet.
 
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_s7-supply:
+- vdd_s8-supply:
+- vdd_s9-supply:
+- vdd_s10-supply:
+- vdd_s11-supply:
+- vdd_s12-supply:
+- vdd_l1-supply:
+- vdd_l2_l26_l28-supply:
+- vdd_l3_l11-supply:
+- vdd_l4_l27_l31-supply:
+- vdd_l5_l7-supply:
+- vdd_l6_l12_l32-supply:
+- vdd_l8_l16_l30-supply:
+- vdd_l9_l10_l18_l22-supply:
+- vdd_l13_l19_l23_l24-supply:
+- vdd_l14_l15-supply:
+- vdd_l17_l29-supply:
+- vdd_l20_l21-supply:
+- vdd_l25-supply:
+- vdd_lvs_1_2-supply:
+	Usage: optional (pm8994 only)
+	Value type: <phandle>
+	Definition: Reference to regulator supplying the input pin, as
+		    described in the data sheet.
+
 
 The regulator node houses sub-nodes for each regulator within the device. Each
 sub-node is identified using the node's name, with valid values listed for each
@@ -85,6 +117,11 @@
 	l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
 	mvs1, mvs2
 
+pm8994:
+	s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, l1, l2, l3, l4, l5,
+	l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20,
+	l21, l22, l23, l24, l25, l26, l27, l28, l29, l30, l31, l32, lvs1, lvs2
+
 The content of each sub-node is defined by the standard binding for regulators -
 see regulator.txt - with additional custom properties described below:
 

diff --git a/Documentation/devicetree/bindings/regulator/regulator-max77620.txt b/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
index b3c8ca6..1c4bfe7 100644
--- a/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator-max77620.txt

@@ -94,6 +94,28 @@
 					This is applicable if suspend state
 					FPS source is selected as FPS0, FPS1 or
 					FPS2.
+- maxim,ramp-rate-setting:		integer, ramp rate(uV/us) setting to be
+					configured to the device.
+					The platform may have different ramp
+					rate than advertised ramp rate if it has
+					design variation from Maxim's
+					recommended. On this case, platform
+					specific ramp rate is used for ramp time
+					calculation and this property is used
+					for device register configurations.
+					The measured ramp rate of platform is
+					provided by the regulator-ramp-delay
+					as described in <devicetree/bindings/
+					regulator/regulator.txt>.
+					Maxim Max77620 supports following ramp
+					delay:
+					  SD: 13.75mV/us, 27.5mV/us, 55mV/us
+					  LDOs: 5mV/us, 100mV/us
+
+Note: If the measured ramp delay is same as advertised ramp delay then it is not
+required to provide the ramp delay with property "maxim,ramp-rate-setting". The
+ramp rate can be provided by the regulator-ramp-delay which will be used for
+ramp time calculation for voltage change as well as for device configuration.
 
 Example:
 --------

diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
index c58db75..c3f6546 100644
--- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt

@@ -14,8 +14,8 @@
   - "setup-address"	- contains setup register address of ABB module (ti,abb-v3)
   - "int-address"	- contains address of interrupt register for ABB module
   (also see Optional properties)
-- #address-cell: should be 0
-- #size-cell: should be 0
+- #address-cells: should be 0
+- #size-cells: should be 0
 - clocks: should point to the clock node used by ABB module
 - ti,settling-time: Settling time in uSecs from SoC documentation for ABB module
 	to settle down(target time for SR2_WTCNT_VALUE).
@@ -69,7 +69,7 @@
 abb_x: regulator-abb-x {
 	compatible = "ti,abb-v1";
 	regulator-name = "abb_x";
-	#address-cell = <0>;
+	#address-cells = <0>;
 	#size-cells = <0>;
 	reg = <0x483072f0 0x8>, <0x48306818 0x4>;
 	reg-names = "base-address", "int-address";
@@ -89,7 +89,7 @@
 abb_y: regulator-abb-y {
 	compatible = "ti,abb-v2";
 	regulator-name = "abb_y";
-	#address-cell = <0>;
+	#address-cells = <0>;
 	#size-cells = <0>;
 	reg = <0x4a307bd0 0x8>, <0x4a306014 0x4>, <0x4A002268 0x8>;
 	reg-names = "base-address", "int-address", "efuse-address";
@@ -110,7 +110,7 @@
 abb_z: regulator-abb-z {
 	compatible = "ti,abb-v2";
 	regulator-name = "abb_z";
-	#address-cell = <0>;
+	#address-cells = <0>;
 	#size-cells = <0>;
 	reg = <0x4ae07ce4 0x8>, <0x4ae06010 0x4>,
 	      <0x4a002194 0x8>, <0x4ae0C314 0x4>;

diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
index 75b0c16..74a91c4 100644
--- a/Documentation/devicetree/bindings/regulator/twl-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt

@@ -57,6 +57,12 @@
 
 Optional properties:
 - Any optional property defined in bindings/regulator/regulator.txt
+For twl4030 regulators/LDOs:
+ - regulator-initial-mode:
+  - 0x08 - Sleep mode, the nominal output voltage is maintained with low power
+           consumption with low load current capability.
+  - 0x0e - Active mode, the regulator can deliver its nominal output voltage
+           with full-load current capability.
 
 Example:
 

diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power
index e2b1b69..fb594c2 100644
--- a/Documentation/hwmon/fam15h_power
+++ b/Documentation/hwmon/fam15h_power

@@ -10,14 +10,22 @@
   Datasheets:
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 16h Processors
+  AMD64 Architecture Programmer's Manual Volume 2: System Programming
 
 Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
 
 Description
 -----------
 
+1) Processor TDP (Thermal design power)
+
+Given a fixed frequency and voltage, the power consumption of a
+processor varies based on the workload being executed. Derated power
+is the power consumed when running a specific application. Thermal
+design power (TDP) is an example of derated power.
+
 This driver permits reading of registers providing power information
-of AMD Family 15h and 16h processors.
+of AMD Family 15h and 16h processors via TDP algorithm.
 
 For AMD Family 15h and 16h processors the following power values can
 be calculated using different processor northbridge function
@@ -37,3 +45,58 @@
 On multi-node processors the calculated value is for the entire
 package and not for a single node. Thus the driver creates sysfs
 attributes only for internal node0 of a multi-node processor.
+
+2) Accumulated Power Mechanism
+
+This driver also introduces an algorithm that should be used to
+calculate the average power consumed by a processor during a
+measurement interval Tm. The feature of accumulated power mechanism is
+indicated by CPUID Fn8000_0007_EDX[12].
+
+* Tsample: compute unit power accumulator sample period
+* Tref: the PTSC counter period
+* PTSC: performance timestamp counter
+* N: the ratio of compute unit power accumulator sample period to the
+  PTSC period
+* Jmax: max compute unit accumulated power which is indicated by
+  MaxCpuSwPwrAcc MSR C001007b
+* Jx/Jy: compute unit accumulated power which is indicated by
+  CpuSwPwrAcc MSR C001007a
+* Tx/Ty: the value of performance timestamp counter which is indicated
+  by CU_PTSC MSR C0010280
+* PwrCPUave: CPU average power
+
+i. Determine the ratio of Tsample to Tref by executing CPUID Fn8000_0007.
+	N = value of CPUID Fn8000_0007_ECX[CpuPwrSampleTimeRatio[15:0]].
+
+ii. Read the full range of the cumulative energy value from the new
+MSR MaxCpuSwPwrAcc.
+	Jmax = value returned.
+iii. At time x, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+	Jx = value read from CpuSwPwrAcc and Tx = value read from
+PTSC.
+
+iv. At time y, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+	Jy = value read from CpuSwPwrAcc and Ty = value read from
+PTSC.
+
+v. Calculate the average power consumption for a compute unit over
+time period (y-x). Unit of result is uWatt.
+	if (Jy < Jx) // Rollover has occurred
+		Jdelta = (Jy + Jmax) - Jx
+	else
+		Jdelta = Jy - Jx
+	PwrCPUave = N * Jdelta * 1000 / (Ty - Tx)
+
+This driver provides PwrCPUave and interval(default is 10 millisecond
+and maximum is 1 second):
+* power1_average (PwrCPUave)
+* power1_average_interval (Interval)
+
+The power1_average_interval can be updated at /etc/sensors3.conf file
+as below:
+
+chip "fam15h_power-*"
+	set power1_average_interval 0.01
+
+Then save it with "sensors -s".

diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
index 733296d..fff6f6b 100644
--- a/Documentation/hwmon/it87
+++ b/Documentation/hwmon/it87

@@ -9,6 +9,9 @@
   * IT8620E
     Prefix: 'it8620'
     Addresses scanned: from Super I/O config space (8 I/O ports)
+  * IT8628E
+    Prefix: 'it8628'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
   * IT8705F
     Prefix: 'it87'
@@ -114,8 +117,8 @@
 Description
 -----------
 
-This driver implements support for the IT8603E, IT8620E, IT8623E, IT8705F,
-IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F,
+This driver implements support for the IT8603E, IT8620E, IT8623E, IT8628E,
+IT8705F, IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F,
 IT8758E, IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E, and
 SiS950 chips.
 
@@ -158,8 +161,8 @@
 IT8728F. It only supports 3 fans, 16-bit fan mode, and the full speed mode
 of the fan is not supported (value 0 of pwmX_enable).
 
-The IT8620E is another custom design, hardware monitoring part is similar to
-IT8728F. It only supports 16-bit fan mode.
+The IT8620E and IT8628E are custom designs, hardware monitoring part is similar
+to IT8728F. It only supports 16-bit fan mode. Both chips support up to 6 fans.
 
 The IT8790E supports up to 3 fans. 16-bit fan mode is always enabled.
 
@@ -187,8 +190,8 @@
 2.8 volts with a resolution of 0.0109 volt.  The battery voltage in8 does not
 have limit registers.
 
-On the IT8603E, IT8721F/IT8758E, IT8732F, IT8781F, IT8782F, and IT8783E/F, some
-voltage inputs are internal and scaled inside the chip:
+On the IT8603E, IT8620E, IT8628E, IT8721F/IT8758E, IT8732F, IT8781F, IT8782F,
+and IT8783E/F, some voltage inputs are internal and scaled inside the chip:
 * in3 (optional)
 * in7 (optional for IT8781F, IT8782F, and IT8783E/F)
 * in8 (always)

diff --git a/Documentation/hwmon/max31722 b/Documentation/hwmon/max31722
new file mode 100644
index 0000000..090da845
--- /dev/null
+++ b/Documentation/hwmon/max31722

@@ -0,0 +1,34 @@
+Kernel driver max31722
+======================
+
+Supported chips:
+  * Maxim Integrated MAX31722
+    Prefix: 'max31722'
+    ACPI ID: MAX31722
+    Addresses scanned: -
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31722-MAX31723.pdf
+  * Maxim Integrated MAX31723
+    Prefix: 'max31723'
+    ACPI ID: MAX31723
+    Addresses scanned: -
+    Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31722-MAX31723.pdf
+
+Author: Tiberiu Breana <tiberiu.a.breana@intel.com>
+
+Description
+-----------
+
+This driver adds support for the Maxim Integrated MAX31722/MAX31723 thermometers
+and thermostats running over an SPI interface.
+
+Usage Notes
+-----------
+
+This driver uses ACPI to auto-detect devices. See ACPI IDs in the above section.
+
+Sysfs entries
+-------------
+
+The following attribute is supported:
+
+temp1_input		Measured temperature. Read-only.

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0b3de80..d3240fe 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -131,6 +131,7 @@
 			More X86-64 boot options can be found in
 			Documentation/x86/x86_64/boot-options.txt .
 	X86	Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
+	X86_UV	SGI UV support is enabled.
 	XEN	Xen support is enabled
 
 In addition, the following text indicates that the option:
@@ -167,16 +168,18 @@
 
 	acpi=		[HW,ACPI,X86,ARM64]
 			Advanced Configuration and Power Interface
-			Format: { force | off | strict | noirq | rsdt |
+			Format: { force | on | off | strict | noirq | rsdt |
 				  copy_dsdt }
 			force -- enable ACPI if default was off
+			on -- enable ACPI but allow fallback to DT [arm64]
 			off -- disable ACPI if default was on
 			noirq -- do not use ACPI for IRQ routing
 			strict -- Be less tolerant of platforms that are not
 				strictly ACPI specification compliant.
 			rsdt -- prefer RSDT over (default) XSDT
 			copy_dsdt -- copy DSDT to memory
-			For ARM64, ONLY "acpi=off" or "acpi=force" are available
+			For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force"
+			are available
 
 			See also Documentation/power/runtime_pm.txt, pci=noacpi
 
@@ -312,6 +315,8 @@
 			acpi_osi=!*		# remove all strings
 			acpi_osi=!		# disable all built-in OS vendor
 						  strings
+			acpi_osi=!!		# enable all built-in OS vendor
+						  strings
 			acpi_osi=		# disable all strings
 
 			'acpi_osi=!' can be used in combination with single or
@@ -542,6 +547,13 @@
 			Format: <int> (must be >=0)
 			Default: 64
 
+	bau=		[X86_UV] Enable the BAU on SGI UV.  The default
+			behavior is to disable the BAU (i.e. bau=0).
+			Format: { "0" | "1" }
+			0 - Disable the BAU.
+			1 - Enable the BAU.
+			unset - Disable the BAU.
+
 	baycom_epp=	[HW,AX25]
 			Format: <io>,<mode>
 
@@ -1661,6 +1673,11 @@
 		hwp_only
 			Only load intel_pstate on systems which support
 			hardware P state control (HWP) if available.
+		support_acpi_ppc
+			Enforce ACPI _PPC performance limits. If the Fixed ACPI
+			Description Table, specifies preferred power management
+			profile as "Enterprise Server" or "Performance Server",
+			then this feature is turned on by default.
 
 	intremap=	[X86-64, Intel-IOMMU]
 			on	enable Interrupt Remapping (default)
@@ -3284,6 +3301,44 @@
 			Lazy RCU callbacks are those which RCU can
 			prove do nothing more than free memory.
 
+	rcuperf.gp_exp= [KNL]
+			Measure performance of expedited synchronous
+			grace-period primitives.
+
+	rcuperf.holdoff= [KNL]
+			Set test-start holdoff period.  The purpose of
+			this parameter is to delay the start of the
+			test until boot completes in order to avoid
+			interference.
+
+	rcuperf.nreaders= [KNL]
+			Set number of RCU readers.  The value -1 selects
+			N, where N is the number of CPUs.  A value
+			"n" less than -1 selects N-n+1, where N is again
+			the number of CPUs.  For example, -2 selects N
+			(the number of CPUs), -3 selects N+1, and so on.
+			A value of "n" less than or equal to -N selects
+			a single reader.
+
+	rcuperf.nwriters= [KNL]
+			Set number of RCU writers.  The values operate
+			the same as for rcuperf.nreaders.
+			N, where N is the number of CPUs
+
+	rcuperf.perf_runnable= [BOOT]
+			Start rcuperf running at boot time.
+
+	rcuperf.shutdown= [KNL]
+			Shut the system down after performance tests
+			complete.  This is useful for hands-off automated
+			testing.
+
+	rcuperf.perf_type= [KNL]
+			Specify the RCU implementation to test.
+
+	rcuperf.verbose= [KNL]
+			Enable additional printk() statements.
+
 	rcutorture.cbflood_inter_holdoff= [KNL]
 			Set holdoff time (jiffies) between successive
 			callback-flood tests.

diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 5001280..9de1c15 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt

@@ -97,7 +97,7 @@
    <hardirq-safe>   ->  <hardirq-unsafe>
    <softirq-safe>   ->  <softirq-unsafe>
 
-The first rule comes from the fact the a hardirq-safe lock could be
+The first rule comes from the fact that a hardirq-safe lock could be
 taken by a hardirq context, interrupting a hardirq-unsafe lock - and
 thus could result in a lock inversion deadlock. Likewise, a softirq-safe
 lock could be taken by an softirq context, interrupting a softirq-unsafe
@@ -220,7 +220,7 @@
 when the chain is validated for the first time, is then put into a hash
 table, which hash-table can be checked in a lockfree manner. If the
 locking chain occurs again later on, the hash table tells us that we
-dont have to validate the chain again.
+don't have to validate the chain again.
 
 Troubleshooting:
 ----------------

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3729cbe..147ae8e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt

@@ -4,8 +4,40 @@
 
 By: David Howells <dhowells@redhat.com>
     Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+    Will Deacon <will.deacon@arm.com>
+    Peter Zijlstra <peterz@infradead.org>
 
-Contents:
+==========
+DISCLAIMER
+==========
+
+This document is not a specification; it is intentionally (for the sake of
+brevity) and unintentionally (due to being human) incomplete. This document is
+meant as a guide to using the various memory barriers provided by Linux, but
+in case of any doubt (and there are many) please ask.
+
+To repeat, this document is not a specification of what Linux expects from
+hardware.
+
+The purpose of this document is twofold:
+
+ (1) to specify the minimum functionality that one can rely on for any
+     particular barrier, and
+
+ (2) to provide a guide as to how to use the barriers that are available.
+
+Note that an architecture can provide more than the minimum requirement
+for any particular barrier, but if the architecure provides less than
+that, that architecture is incorrect.
+
+Note also that it is possible that a barrier may be a no-op for an
+architecture because the way that arch works renders an explicit barrier
+unnecessary in that case.
+
+
+========
+CONTENTS
+========
 
  (*) Abstract memory access model.
 
@@ -31,15 +63,15 @@
 
  (*) Implicit kernel memory barriers.
 
-     - Locking functions.
+     - Lock acquisition functions.
      - Interrupt disabling functions.
      - Sleep and wake-up functions.
      - Miscellaneous functions.
 
- (*) Inter-CPU locking barrier effects.
+ (*) Inter-CPU acquiring barrier effects.
 
-     - Locks vs memory accesses.
-     - Locks vs I/O accesses.
+     - Acquires vs memory accesses.
+     - Acquires vs I/O accesses.
 
  (*) Where are memory barriers needed?
 
@@ -61,6 +93,7 @@
  (*) The things CPUs get up to.
 
      - And then there's the Alpha.
+     - Virtual Machine Guests.
 
  (*) Example uses.
 
@@ -148,7 +181,7 @@
 
 	CPU 1		CPU 2
 	===============	===============
-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
 	B = 4;		Q = P;
 	P = &B		D = *Q;
 
@@ -430,8 +463,9 @@
      This acts as a one-way permeable barrier.  It guarantees that all memory
      operations after the ACQUIRE operation will appear to happen after the
      ACQUIRE operation with respect to the other components of the system.
-     ACQUIRE operations include LOCK operations and smp_load_acquire()
-     operations.
+     ACQUIRE operations include LOCK operations and both smp_load_acquire()
+     and smp_cond_acquire() operations. The later builds the necessary ACQUIRE
+     semantics from relying on a control dependency and smp_rmb().
 
      Memory operations that occur before an ACQUIRE operation may appear to
      happen after it completes.
@@ -464,6 +498,11 @@
      This means that ACQUIRE acts as a minimal "acquire" operation and
      RELEASE acts as a minimal "release" operation.
 
+A subset of the atomic operations described in atomic_ops.txt have ACQUIRE
+and RELEASE variants in addition to fully-ordered and relaxed (no barrier
+semantics) definitions.  For compound atomics performing both a load and a
+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
+only to the store portion of the operation.
 
 Memory barriers are only required where there's a possibility of interaction
 between two CPUs or between a CPU and a device.  If it can be guaranteed that
@@ -517,7 +556,7 @@
 
 	CPU 1		      CPU 2
 	===============	      ===============
-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
 	WRITE_ONCE(P, &B)
@@ -544,7 +583,7 @@
 
 	CPU 1		      CPU 2
 	===============	      ===============
-	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
 	B = 4;
 	<write barrier>
 	WRITE_ONCE(P, &B);
@@ -813,9 +852,10 @@
       the same variable, then those stores must be ordered, either by
       preceding both of them with smp_mb() or by using smp_store_release()
       to carry out the stores.  Please note that it is -not- sufficient
-      to use barrier() at beginning of each leg of the "if" statement,
-      as optimizing compilers do not necessarily respect barrier()
-      in this case.
+      to use barrier() at beginning of each leg of the "if" statement
+      because, as shown by the example above, optimizing compilers can
+      destroy the control dependency while respecting the letter of the
+      barrier() law.
 
   (*) Control dependencies require at least one run-time conditional
       between the prior load and the subsequent store, and this
@@ -1731,15 +1771,15 @@
 
 
 All memory barriers except the data dependency barriers imply a compiler
-barrier. Data dependencies do not impose any additional compiler ordering.
+barrier.  Data dependencies do not impose any additional compiler ordering.
 
 Aside: In the case of data dependencies, the compiler would be expected
 to issue the loads in the correct order (eg. `a[b]` would have to load
 the value of b before loading a[b]), however there is no guarantee in
 the C specification that the compiler may not speculate the value of b
 (eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
-tmp = a[b]; ). There is also the problem of a compiler reloading b after
-having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+tmp = a[b]; ).  There is also the problem of a compiler reloading b after
+having loaded a[b], thus having a newer copy of b than a[b].  A consensus
 has not yet been reached about these problems, however the READ_ONCE()
 macro is a good place to start looking.
 
@@ -1794,6 +1834,7 @@
 
 
  (*) lockless_dereference();
+
      This can be thought of as a pointer-fetch wrapper around the
      smp_read_barrier_depends() data-dependency barrier.
 
@@ -1858,7 +1899,7 @@
 ordered I/O regions to be partially ordered.  Its effects may go beyond the
 CPU->Hardware interface and actually affect the hardware at some level.
 
-See the subsection "Locks vs I/O accesses" for more information.
+See the subsection "Acquires vs I/O accesses" for more information.
 
 
 ===============================
@@ -1873,8 +1914,8 @@
 of arch specific code.
 
 
-ACQUIRING FUNCTIONS
--------------------
+LOCK ACQUISITION FUNCTIONS
+--------------------------
 
 The Linux kernel has a number of locking constructs:
 
@@ -1895,7 +1936,7 @@
      Memory operations issued before the ACQUIRE may be completed after
      the ACQUIRE operation has completed.  An smp_mb__before_spinlock(),
      combined with a following ACQUIRE, orders prior stores against
-     subsequent loads and stores. Note that this is weaker than smp_mb()!
+     subsequent loads and stores.  Note that this is weaker than smp_mb()!
      The smp_mb__before_spinlock() primitive is free on many architectures.
 
  (2) RELEASE operation implication:
@@ -2090,9 +2131,9 @@
 	event_indicated = 1;
 	wake_up_process(event_daemon);
 
-A write memory barrier is implied by wake_up() and co. if and only if they wake
-something up.  The barrier occurs before the task state is cleared, and so sits
-between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+A write memory barrier is implied by wake_up() and co.  if and only if they
+wake something up.  The barrier occurs before the task state is cleared, and so
+sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
 
 	CPU 1				CPU 2
 	===============================	===============================
@@ -2206,7 +2247,7 @@
 
 Then there is no guarantee as to what order CPU 3 will see the accesses to *A
 through *H occur in, other than the constraints imposed by the separate locks
-on the separate CPUs. It might, for example, see:
+on the separate CPUs.  It might, for example, see:
 
 	*E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
 
@@ -2486,9 +2527,9 @@
 	clear_bit_unlock();
 	__clear_bit_unlock();
 
-These implement ACQUIRE-class and RELEASE-class operations. These should be used in
-preference to other operations when implementing locking primitives, because
-their implementations can be optimised on many architectures.
+These implement ACQUIRE-class and RELEASE-class operations.  These should be
+used in preference to other operations when implementing locking primitives,
+because their implementations can be optimised on many architectures.
 
 [!] Note that special memory barrier primitives are available for these
 situations because on some CPUs the atomic instructions used imply full memory
@@ -2568,12 +2609,12 @@
 
 Normally this won't be a problem because the I/O accesses done inside such
 sections will include synchronous load operations on strictly ordered I/O
-registers that form implicit I/O barriers. If this isn't sufficient then an
+registers that form implicit I/O barriers.  If this isn't sufficient then an
 mmiowb() may need to be used explicitly.
 
 
 A similar situation may occur between an interrupt routine and two routines
-running on separate CPUs that communicate with each other. If such a case is
+running on separate CPUs that communicate with each other.  If such a case is
 likely, then interrupt-disabling locks should be used to guarantee ordering.
 
 
@@ -2587,8 +2628,8 @@
  (*) inX(), outX():
 
      These are intended to talk to I/O space rather than memory space, but
-     that's primarily a CPU-specific concept. The i386 and x86_64 processors do
-     indeed have special I/O space access cycles and instructions, but many
+     that's primarily a CPU-specific concept.  The i386 and x86_64 processors
+     do indeed have special I/O space access cycles and instructions, but many
      CPUs don't have such a concept.
 
      The PCI bus, amongst others, defines an I/O space concept which - on such
@@ -2610,7 +2651,7 @@
 
      Whether these are guaranteed to be fully ordered and uncombined with
      respect to each other on the issuing CPU depends on the characteristics
-     defined for the memory window through which they're accessing. On later
+     defined for the memory window through which they're accessing.  On later
      i386 architecture machines, for example, this is controlled by way of the
      MTRR registers.
 
@@ -2635,10 +2676,10 @@
  (*) readX_relaxed(), writeX_relaxed()
 
      These are similar to readX() and writeX(), but provide weaker memory
-     ordering guarantees. Specifically, they do not guarantee ordering with
+     ordering guarantees.  Specifically, they do not guarantee ordering with
      respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
-     ordering with respect to LOCK or UNLOCK operations. If the latter is
-     required, an mmiowb() barrier can be used. Note that relaxed accesses to
+     ordering with respect to LOCK or UNLOCK operations.  If the latter is
+     required, an mmiowb() barrier can be used.  Note that relaxed accesses to
      the same peripheral are guaranteed to be ordered with respect to each
      other.
 
@@ -3040,8 +3081,9 @@
 
 See the subsection on "Cache Coherency" above.
 
+
 VIRTUAL MACHINE GUESTS
--------------------
+----------------------
 
 Guests running within virtual machines might be affected by SMP effects even if
 the guest itself is compiled without SMP support.  This is an artifact of
@@ -3050,7 +3092,7 @@
 
 To handle this case optimally, low-level virt_mb() etc macros are available.
 These have the same effect as smp_mb() etc when SMP is enabled, but generate
-identical code for SMP and non-SMP systems. For example, virtual machine guests
+identical code for SMP and non-SMP systems.  For example, virtual machine guests
 should use virt_mb() rather than smp_mb() when synchronizing against a
 (possibly SMP) host.
 
@@ -3058,6 +3100,7 @@
 in particular, they do not control MMIO effects: to control
 MMIO effects, use mandatory barriers.
 
+
 ============
 EXAMPLE USES
 ============

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index fcddfd5..daabdd7 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt

@@ -60,6 +60,7 @@
 - panic_on_warn
 - perf_cpu_time_max_percent
 - perf_event_paranoid
+- perf_event_max_stack
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -654,6 +655,19 @@
 
 ==============================================================
 
+perf_event_max_stack:
+
+Controls maximum number of stack frames to copy for (attr.sample_type &
+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
+'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 127.
+
+==============================================================
+
 pid_max:
 
 PID allocation wrap value.  When the kernel's next PID value

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index f52f297..9857606 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt

@@ -1562,12 +1562,12 @@
   <idle>-0       3dN.1   12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
   <idle>-0       3dN.1   12us : ktime_get <-tick_nohz_idle_exit
   <idle>-0       3dN.1   12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
-  <idle>-0       3dN.1   13us : update_cpu_load_nohz <-tick_nohz_idle_exit
-  <idle>-0       3dN.1   13us : _raw_spin_lock <-update_cpu_load_nohz
+  <idle>-0       3dN.1   13us : cpu_load_update_nohz <-tick_nohz_idle_exit
+  <idle>-0       3dN.1   13us : _raw_spin_lock <-cpu_load_update_nohz
   <idle>-0       3dN.1   13us : add_preempt_count <-_raw_spin_lock
-  <idle>-0       3dN.2   13us : __update_cpu_load <-update_cpu_load_nohz
-  <idle>-0       3dN.2   14us : sched_avg_update <-__update_cpu_load
-  <idle>-0       3dN.2   14us : _raw_spin_unlock <-update_cpu_load_nohz
+  <idle>-0       3dN.2   13us : __cpu_load_update <-cpu_load_update_nohz
+  <idle>-0       3dN.2   14us : sched_avg_update <-__cpu_load_update
+  <idle>-0       3dN.2   14us : _raw_spin_unlock <-cpu_load_update_nohz
   <idle>-0       3dN.2   14us : sub_preempt_count <-_raw_spin_unlock
   <idle>-0       3dN.1   15us : calc_load_exit_idle <-tick_nohz_idle_exit
   <idle>-0       3dN.1   15us : touch_softlockup_watchdog <-tick_nohz_idle_exit

diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 54944c7..2a4ee63 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt

@@ -196,3 +196,35 @@
 "debugpat" boot parameter. With this parameter, various debug messages are
 printed to dmesg log.
 
+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT   Call Sequence               PAT State  PAT MSR
+ =========================================================
+ E    E     MTRR -> PAT init            Enabled    OS
+ E    D     MTRR -> PAT init            Disabled    -
+ D    E     MTRR -> PAT disable         Disabled   BIOS
+ D    D     MTRR -> PAT disable         Disabled    -
+ -    np/E  PAT  -> PAT disable         Disabled   BIOS
+ -    np/D  PAT  -> PAT disable         Disabled    -
+ E    !P/E  MTRR -> PAT init            Disabled   BIOS
+ D    !P/E  MTRR -> PAT disable         Disabled   BIOS
+ !M   !P/E  MTRR stub -> PAT disable    Disabled   BIOS
+
+ Legend
+ ------------------------------------------------
+ E         Feature enabled in CPU
+ D	   Feature disabled/unsupported in CPU
+ np	   "nopat" boot option specified
+ !P	   CONFIG_X86_PAT option unset
+ !M	   CONFIG_MTRR option unset
+ Enabled   PAT state set to enabled
+ Disabled  PAT state set to disabled
+ OS        PAT initializes PAT MSR with OS setting
+ BIOS      PAT keeps PAT MSR with BIOS setting
+

diff --git a/MAINTAINERS b/MAINTAINERS
index 9c567a4..f40d40d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -1322,6 +1322,7 @@
 F:	arch/arm/boot/dts/armada*
 F:	arch/arm/boot/dts/kirkwood*
 F:	arch/arm64/boot/dts/marvell/armada*
+F:	drivers/cpufreq/mvebu-cpufreq.c
 
 
 ARM/Marvell Berlin SoC support
@@ -3539,6 +3540,15 @@
 F:	include/linux/devfreq-event.h
 F:	Documentation/devicetree/bindings/devfreq/event/
 
+BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
+M:	Chanwoo Choi <cw00.choi@samsung.com>
+L:	linux-pm@vger.kernel.org
+L:	linux-samsung-soc@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+S:	Maintained
+F:	drivers/devfreq/exynos-bus.c
+F:	Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+
 DEVICE NUMBER REGISTRY
 M:	Torben Mathiasen <device@lanana.org>
 W:	http://lanana.org/docs/device-list/index.html
@@ -7020,9 +7030,9 @@
 M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
-F:	drivers/*/max14577.c
+F:	drivers/*/max14577*.c
 F:	drivers/*/max77686*.c
-F:	drivers/*/max77693.c
+F:	drivers/*/max77693*.c
 F:	drivers/extcon/extcon-max14577.c
 F:	drivers/extcon/extcon-max77693.c
 F:	drivers/rtc/rtc-max77686.c
@@ -11246,14 +11256,13 @@
 F:	drivers/media/i2c/tc358743*
 F:	include/media/i2c/tc358743.h
 
-TMIO MMC DRIVER
-M:	Ian Molton <ian@mnementh.co.uk>
+TMIO/SDHI MMC DRIVER
+M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
 L:	linux-mmc@vger.kernel.org
-S:	Maintained
+S:	Supported
 F:	drivers/mmc/host/tmio_mmc*
 F:	drivers/mmc/host/sh_mobile_sdhi.c
-F:	include/linux/mmc/tmio.h
-F:	include/linux/mmc/sh_mobile_sdhi.h
+F:	include/linux/mfd/tmio.h
 
 TMP401 HARDWARE MONITOR DRIVER
 M:	Guenter Roeck <linux@roeck-us.net>
@@ -12011,7 +12020,9 @@
 W:	http://www.slimlogic.co.uk/?p=48
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git
 S:	Supported
+F:	Documentation/devicetree/bindings/regulator/
 F:	drivers/regulator/
+F:	include/dt-bindings/regulator/
 F:	include/linux/regulator/
 
 VRF

diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index a83bbea6..0131a70 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h

@@ -63,7 +63,7 @@
 	return res >= 0 ? 1 : 0;
 }
 
-static inline void __down_write(struct rw_semaphore *sem)
+static inline long ___down_write(struct rw_semaphore *sem)
 {
 	long oldcount;
 #ifndef	CONFIG_SMP
@@ -83,10 +83,24 @@
 	:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
 	:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
 #endif
-	if (unlikely(oldcount))
+	return oldcount;
+}
+
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	if (unlikely(___down_write(sem)))
 		rwsem_down_write_failed(sem);
 }
 
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	if (unlikely(___down_write(sem)))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+
+	return 0;
+}
+
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */

diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 3848259..baefe1d 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h

@@ -36,7 +36,7 @@
 
 struct of_cpuidle_method {
 	const char *method;
-	struct cpuidle_ops *ops;
+	const struct cpuidle_ops *ops;
 };
 
 #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops)			\

diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index e0eea72..a708fa1 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h

@@ -17,33 +17,27 @@
 #include <asm/mach/map.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/ptrace.h>
 
 #ifdef CONFIG_EFI
 void efi_init(void);
 
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
 
-#define efi_call_virt(f, ...)						\
+#define arch_efi_call_virt_setup()	efi_virtmap_load()
+#define arch_efi_call_virt_teardown()	efi_virtmap_unload()
+
+#define arch_efi_call_virt(f, args...)					\
 ({									\
 	efi_##f##_t *__f;						\
-	efi_status_t __s;						\
-									\
-	efi_virtmap_load();						\
 	__f = efi.systab->runtime->f;					\
-	__s = __f(__VA_ARGS__);						\
-	efi_virtmap_unload();						\
-	__s;								\
+	__f(args);							\
 })
 
-#define __efi_call_virt(f, ...)						\
-({									\
-	efi_##f##_t *__f;						\
-									\
-	efi_virtmap_load();						\
-	__f = efi.systab->runtime->f;					\
-	__f(__VA_ARGS__);						\
-	efi_virtmap_unload();						\
-})
+#define ARCH_EFI_IRQ_FLAGS_MASK \
+	(PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
+	 PSR_T_BIT | MODE_MASK)
 
 static inline void efi_set_pgd(struct mm_struct *mm)
 {
@@ -59,7 +53,16 @@
 
 /* arch specific definitions used by the stub code */
 
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define efi_call_early(f, ...)		sys_table_arg->boottime->f(__VA_ARGS__)
+#define __efi_call_early(f, ...)	f(__VA_ARGS__)
+#define efi_is_64bit()			(false)
+
+struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg);
+void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si);
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
 
 /*
  * A reasonable upper bound for the uncompressed kernel size is 32 MBytes,

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3850701..738d5ee 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h

@@ -265,6 +265,15 @@
 	kvm_call_hyp(__init_stage2_translation);
 }
 
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * TODO
+	 * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+	 */
+}
+
 static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	return 0;
@@ -277,7 +286,6 @@
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index da44be9..f17a8d4 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h

@@ -66,6 +66,7 @@
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 

diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index fa5b42d..3cc14dd 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h

@@ -15,6 +15,7 @@
 
 #include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/preempt.h>
 #include <asm/cacheflush.h>
 #include <asm/cachetype.h>
 #include <asm/proc-fns.h>
@@ -66,6 +67,7 @@
 		cpu_switch_mm(mm->pgd, mm);
 }
 
+#ifndef MODULE
 #define finish_arch_post_lock_switch \
 	finish_arch_post_lock_switch
 static inline void finish_arch_post_lock_switch(void)
@@ -87,6 +89,7 @@
 		preempt_enable_no_resched();
 	}
 }
+#endif /* !MODULE */
 
 #endif	/* CONFIG_MMU */
 

diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 703926e..a44b268e 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c

@@ -70,7 +70,7 @@
  *
  * Returns a struct cpuidle_ops pointer, NULL if not found.
  */
-static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
+static const struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
 {
 	struct of_cpuidle_method *m = __cpuidle_method_of_table;
 
@@ -88,7 +88,7 @@
  *
  * Get the method name defined in the 'enable-method' property, retrieve the
  * associated cpuidle_ops and do a struct copy. This copy is needed because all
- * cpuidle_ops are tagged __initdata and will be unloaded after the init
+ * cpuidle_ops are tagged __initconst and will be unloaded after the init
  * process.
  *
  * Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if
@@ -97,7 +97,7 @@
 static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu)
 {
 	const char *enable_method;
-	struct cpuidle_ops *ops;
+	const struct cpuidle_ops *ops;
 
 	enable_method = of_get_property(dn, "enable-method", NULL);
 	if (!enable_method)

diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c
index ff8a9d8..9f43ba0 100644
--- a/arch/arm/kernel/efi.c
+++ b/arch/arm/kernel/efi.c

@@ -11,6 +11,41 @@
 #include <asm/mach/map.h>
 #include <asm/mmu_context.h>
 
+static int __init set_permissions(pte_t *ptep, pgtable_t token,
+				  unsigned long addr, void *data)
+{
+	efi_memory_desc_t *md = data;
+	pte_t pte = *ptep;
+
+	if (md->attribute & EFI_MEMORY_RO)
+		pte = set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+	if (md->attribute & EFI_MEMORY_XP)
+		pte = set_pte_bit(pte, __pgprot(L_PTE_XN));
+	set_pte_ext(ptep, pte, PTE_EXT_NG);
+	return 0;
+}
+
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+				       efi_memory_desc_t *md)
+{
+	unsigned long base, size;
+
+	base = md->virt_addr;
+	size = md->num_pages << EFI_PAGE_SHIFT;
+
+	/*
+	 * We can only use apply_to_page_range() if we can guarantee that the
+	 * entire region was mapped using pages. This should be the case if the
+	 * region does not cover any naturally aligned SECTION_SIZE sized
+	 * blocks.
+	 */
+	if (round_down(base + size, SECTION_SIZE) <
+	    round_up(base, SECTION_SIZE) + SECTION_SIZE)
+		return apply_to_page_range(mm, base, size, set_permissions, md);
+
+	return 0;
+}
+
 int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
 {
 	struct map_desc desc = {
@@ -34,5 +69,11 @@
 		desc.type = MT_DEVICE;
 
 	create_mapping_late(mm, &desc, true);
+
+	/*
+	 * If stricter permissions were specified, apply them now.
+	 */
+	if (md->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))
+		return efi_set_mapping_permissions(mm, md);
 	return 0;
 }

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 6284779..b8df458 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c

@@ -631,7 +631,7 @@
 	info->address &= ~alignment_mask;
 	info->ctrl.len <<= offset;
 
-	if (!bp->overflow_handler) {
+	if (is_default_overflow_handler(bp)) {
 		/*
 		 * Mismatch breakpoints are required for single-stepping
 		 * breakpoints.
@@ -754,7 +754,7 @@
 		 * mismatch breakpoint so we can single-step over the
 		 * watchpoint trigger.
 		 */
-		if (!wp->overflow_handler)
+		if (is_default_overflow_handler(wp))
 			enable_single_step(wp, instruction_pointer(regs));
 
 unlock:

diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
index 4e02ae5..27563be 100644
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c

@@ -75,7 +75,7 @@
 
 	tail = (struct frame_tail __user *)regs->ARM_fp - 1;
 
-	while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+	while ((entry->nr < sysctl_perf_event_max_stack) &&
 	       tail && !((unsigned long)tail & 0x3))
 		tail = user_backtrace(tail, entry);
 }

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 2c4bea3..7d4e285 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c

@@ -883,7 +883,8 @@
 		request_resource(&ioport_resource, &lp2);
 }
 
-#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) || \
+    defined(CONFIG_EFI)
 struct screen_info screen_info = {
  .orig_video_lines	= 30,
  .orig_video_cols	= 80,

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dded1b7..9ef013d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c

@@ -16,7 +16,6 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
@@ -66,6 +65,8 @@
 
 static bool vgic_present;
 
+static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -90,11 +91,6 @@
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void)
-{
-	return 0;
-}
-
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -1033,11 +1029,6 @@
 	}
 }
 
-static void cpu_init_stage2(void *dummy)
-{
-	__cpu_init_stage2();
-}
-
 static void cpu_init_hyp_mode(void *dummy)
 {
 	phys_addr_t boot_pgd_ptr;
@@ -1065,43 +1056,87 @@
 {
 	if (is_kernel_in_hyp_mode()) {
 		/*
-		 * cpu_init_stage2() is safe to call even if the PM
+		 * __cpu_init_stage2() is safe to call even if the PM
 		 * event was cancelled before the CPU was reset.
 		 */
-		cpu_init_stage2(NULL);
+		__cpu_init_stage2();
 	} else {
 		if (__hyp_get_vectors() == hyp_default_vectors)
 			cpu_init_hyp_mode(NULL);
 	}
 }
 
-static int hyp_init_cpu_notify(struct notifier_block *self,
-			       unsigned long action, void *cpu)
+static void cpu_hyp_reset(void)
 {
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		cpu_hyp_reinit();
-	}
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t phys_idmap_start;
 
-	return NOTIFY_OK;
+	if (!is_kernel_in_hyp_mode()) {
+		boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+		phys_idmap_start = kvm_get_idmap_start();
+
+		__cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
+	}
 }
 
-static struct notifier_block hyp_init_cpu_nb = {
-	.notifier_call = hyp_init_cpu_notify,
-};
+static void _kvm_arch_hardware_enable(void *discard)
+{
+	if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+		cpu_hyp_reinit();
+		__this_cpu_write(kvm_arm_hardware_enabled, 1);
+	}
+}
+
+int kvm_arch_hardware_enable(void)
+{
+	_kvm_arch_hardware_enable(NULL);
+	return 0;
+}
+
+static void _kvm_arch_hardware_disable(void *discard)
+{
+	if (__this_cpu_read(kvm_arm_hardware_enabled)) {
+		cpu_hyp_reset();
+		__this_cpu_write(kvm_arm_hardware_enabled, 0);
+	}
+}
+
+void kvm_arch_hardware_disable(void)
+{
+	_kvm_arch_hardware_disable(NULL);
+}
 
 #ifdef CONFIG_CPU_PM
 static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT) {
-		cpu_hyp_reinit();
-		return NOTIFY_OK;
-	}
+	/*
+	 * kvm_arm_hardware_enabled is left with its old value over
+	 * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
+	 * re-enable hyp.
+	 */
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			/*
+			 * don't update kvm_arm_hardware_enabled here
+			 * so that the hardware will be re-enabled
+			 * when we resume. See below.
+			 */
+			cpu_hyp_reset();
 
-	return NOTIFY_DONE;
+		return NOTIFY_OK;
+	case CPU_PM_EXIT:
+		if (__this_cpu_read(kvm_arm_hardware_enabled))
+			/* The hardware was enabled before suspend. */
+			cpu_hyp_reinit();
+
+		return NOTIFY_OK;
+
+	default:
+		return NOTIFY_DONE;
+	}
 }
 
 static struct notifier_block hyp_init_cpu_pm_nb = {
@@ -1143,16 +1178,12 @@
 
 static int init_subsystems(void)
 {
-	int err;
+	int err = 0;
 
 	/*
-	 * Register CPU Hotplug notifier
+	 * Enable hardware so that subsystem initialisation can access EL2.
 	 */
-	err = register_cpu_notifier(&hyp_init_cpu_nb);
-	if (err) {
-		kvm_err("Cannot register KVM init CPU notifier (%d)\n", err);
-		return err;
-	}
+	on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
 
 	/*
 	 * Register CPU lower-power notifier
@@ -1170,9 +1201,10 @@
 	case -ENODEV:
 	case -ENXIO:
 		vgic_present = false;
+		err = 0;
 		break;
 	default:
-		return err;
+		goto out;
 	}
 
 	/*
@@ -1180,12 +1212,15 @@
 	 */
 	err = kvm_timer_hyp_init();
 	if (err)
-		return err;
+		goto out;
 
 	kvm_perf_init();
 	kvm_coproc_table_init();
 
-	return 0;
+out:
+	on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+
+	return err;
 }
 
 static void teardown_hyp_mode(void)
@@ -1198,17 +1233,11 @@
 	free_hyp_pgds();
 	for_each_possible_cpu(cpu)
 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
-	unregister_cpu_notifier(&hyp_init_cpu_nb);
 	hyp_cpu_pm_exit();
 }
 
 static int init_vhe_mode(void)
 {
-	/*
-	 * Execute the init code on each CPU.
-	 */
-	on_each_cpu(cpu_init_stage2, NULL, 1);
-
 	/* set size of VMID supported by CPU */
 	kvm_vmid_bits = kvm_get_vmid_bits();
 	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@@ -1295,11 +1324,6 @@
 		}
 	}
 
-	/*
-	 * Execute the init code on each CPU.
-	 */
-	on_each_cpu(cpu_init_hyp_mode, NULL, 1);
-
 #ifndef CONFIG_HOTPLUG_CPU
 	free_boot_hyp_pgd();
 #endif

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d6d4191..be30212 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c

@@ -1666,6 +1666,11 @@
 	return hyp_idmap_vector;
 }
 
+phys_addr_t kvm_get_idmap_start(void)
+{
+	return hyp_idmap_start;
+}
+
 int kvm_mmu_init(void)
 {
 	int err;

diff --git a/arch/arm/mach-berlin/berlin.c b/arch/arm/mach-berlin/berlin.c
index 25d7387..ac181c6 100644
--- a/arch/arm/mach-berlin/berlin.c
+++ b/arch/arm/mach-berlin/berlin.c

@@ -18,11 +18,6 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 
-static void __init berlin_init_late(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
 static const char * const berlin_dt_compat[] = {
 	"marvell,berlin",
 	NULL,
@@ -30,7 +25,6 @@
 
 DT_MACHINE_START(BERLIN_DT, "Marvell Berlin")
 	.dt_compat	= berlin_dt_compat,
-	.init_late	= berlin_init_late,
 	/*
 	 * with DT probing for L2CCs, berlin_init_machine can be removed.
 	 * Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc

diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 725e693..add3771 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c

@@ -751,16 +751,6 @@
 		.end	= IRQ_DA8XX_MMCSDINT0,
 		.flags	= IORESOURCE_IRQ,
 	},
-	{		/* DMA RX */
-		.start	= DA8XX_DMA_MMCSD0_RX,
-		.end	= DA8XX_DMA_MMCSD0_RX,
-		.flags	= IORESOURCE_DMA,
-	},
-	{		/* DMA TX */
-		.start	= DA8XX_DMA_MMCSD0_TX,
-		.end	= DA8XX_DMA_MMCSD0_TX,
-		.flags	= IORESOURCE_DMA,
-	},
 };
 
 static struct platform_device da8xx_mmcsd0_device = {
@@ -788,16 +778,6 @@
 		.end	= IRQ_DA850_MMCSDINT0_1,
 		.flags	= IORESOURCE_IRQ,
 	},
-	{		/* DMA RX */
-		.start	= DA850_DMA_MMCSD1_RX,
-		.end	= DA850_DMA_MMCSD1_RX,
-		.flags	= IORESOURCE_DMA,
-	},
-	{		/* DMA TX */
-		.start	= DA850_DMA_MMCSD1_TX,
-		.end	= DA850_DMA_MMCSD1_TX,
-		.flags	= IORESOURCE_DMA,
-	},
 };
 
 static struct platform_device da850_mmcsd1_device = {

diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 6257aa4..67d26c5 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c

@@ -144,14 +144,6 @@
 		.start = IRQ_SDIOINT,
 		.flags = IORESOURCE_IRQ,
 	},
-	/* DMA channels: RX, then TX */
-	{
-		.start = EDMA_CTLR_CHAN(0, DAVINCI_DMA_MMCRXEVT),
-		.flags = IORESOURCE_DMA,
-	}, {
-		.start = EDMA_CTLR_CHAN(0, DAVINCI_DMA_MMCTXEVT),
-		.flags = IORESOURCE_DMA,
-	},
 };
 
 static struct platform_device davinci_mmcsd0_device = {
@@ -181,14 +173,6 @@
 		.start = IRQ_DM355_SDIOINT1,
 		.flags = IORESOURCE_IRQ,
 	},
-	/* DMA channels: RX, then TX */
-	{
-		.start = EDMA_CTLR_CHAN(0, 30),	/* rx */
-		.flags = IORESOURCE_DMA,
-	}, {
-		.start = EDMA_CTLR_CHAN(0, 31),	/* tx */
-		.flags = IORESOURCE_DMA,
-	},
 };
 
 static struct platform_device davinci_mmcsd1_device = {

diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index bbf51a4..4d3b056 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c

@@ -213,33 +213,6 @@
 	exynos_map_pmu();
 }
 
-static const struct of_device_id exynos_cpufreq_matches[] = {
-	{ .compatible = "samsung,exynos3250", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos4210", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos4212", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos4412", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos5250", .data = "cpufreq-dt" },
-#ifndef CONFIG_BL_SWITCHER
-	{ .compatible = "samsung,exynos5420", .data = "cpufreq-dt" },
-	{ .compatible = "samsung,exynos5800", .data = "cpufreq-dt" },
-#endif
-	{ /* sentinel */ }
-};
-
-static void __init exynos_cpufreq_init(void)
-{
-	struct device_node *root = of_find_node_by_path("/");
-	const struct of_device_id *match;
-
-	match = of_match_node(exynos_cpufreq_matches, root);
-	if (!match) {
-		platform_device_register_simple("exynos-cpufreq", -1, NULL, 0);
-		return;
-	}
-
-	platform_device_register_simple(match->data, -1, NULL, 0);
-}
-
 static void __init exynos_dt_machine_init(void)
 {
 	/*
@@ -262,8 +235,6 @@
 	    of_machine_is_compatible("samsung,exynos5250"))
 		platform_device_register(&exynos_cpuidle);
 
-	exynos_cpufreq_init();
-
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 

diff --git a/arch/arm/mach-imx/imx27-dt.c b/arch/arm/mach-imx/imx27-dt.c
index bd42d1b..530a728 100644
--- a/arch/arm/mach-imx/imx27-dt.c
+++ b/arch/arm/mach-imx/imx27-dt.c

@@ -18,15 +18,6 @@
 #include "common.h"
 #include "mx27.h"
 
-static void __init imx27_dt_init(void)
-{
-	struct platform_device_info devinfo = { .name = "cpufreq-dt", };
-
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-
-	platform_device_register_full(&devinfo);
-}
-
 static const char * const imx27_dt_board_compat[] __initconst = {
 	"fsl,imx27",
 	NULL
@@ -36,6 +27,5 @@
 	.map_io		= mx27_map_io,
 	.init_early	= imx27_init_early,
 	.init_irq	= mx27_init_irq,
-	.init_machine	= imx27_dt_init,
 	.dt_compat	= imx27_dt_board_compat,
 MACHINE_END

diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c
index 6883fba..10a82a4 100644
--- a/arch/arm/mach-imx/mach-imx51.c
+++ b/arch/arm/mach-imx/mach-imx51.c

@@ -50,13 +50,10 @@
 
 static void __init imx51_dt_init(void)
 {
-	struct platform_device_info devinfo = { .name = "cpufreq-dt", };
-
 	imx51_ipu_mipi_setup();
 	imx_src_init();
 
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-	platform_device_register_full(&devinfo);
 }
 
 static void __init imx51_init_late(void)

diff --git a/arch/arm/mach-imx/mach-imx53.c b/arch/arm/mach-imx/mach-imx53.c
index 86316a9..18b5c5c13 100644
--- a/arch/arm/mach-imx/mach-imx53.c
+++ b/arch/arm/mach-imx/mach-imx53.c

@@ -40,8 +40,6 @@
 static void __init imx53_init_late(void)
 {
 	imx53_pm_init();
-
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
 }
 
 static const char * const imx53_dt_board_compat[] __initconst = {

diff --git a/arch/arm/mach-imx/mach-imx7d.c b/arch/arm/mach-imx/mach-imx7d.c
index 5a27f20..b450f52 100644
--- a/arch/arm/mach-imx/mach-imx7d.c
+++ b/arch/arm/mach-imx/mach-imx7d.c

@@ -105,11 +105,6 @@
 	irqchip_init();
 }
 
-static void __init imx7d_init_late(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
 static const char *const imx7d_dt_compat[] __initconst = {
 	"fsl,imx7d",
 	NULL,
@@ -117,7 +112,6 @@
 
 DT_MACHINE_START(IMX7D, "Freescale i.MX7 Dual (Device Tree)")
 	.init_irq	= imx7d_init_irq,
-	.init_late	= imx7d_init_late,
 	.init_machine	= imx7d_init_machine,
 	.dt_compat	= imx7d_dt_compat,
 MACHINE_END

diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index ed8fda4..b444423 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c

@@ -20,7 +20,6 @@
 
 #include <linux/clk.h>
 #include <linux/cpu_pm.h>
-#include <linux/cpufreq-dt.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -29,7 +28,6 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
-#include <linux/pm_opp.h>
 #include <linux/resource.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -608,86 +606,3 @@
 
 	return 0;
 }
-
-struct cpufreq_dt_platform_data cpufreq_dt_pd = {
-	.independent_clocks = true,
-};
-
-static int __init armada_xp_pmsu_cpufreq_init(void)
-{
-	struct device_node *np;
-	struct resource res;
-	int ret, cpu;
-
-	if (!of_machine_is_compatible("marvell,armadaxp"))
-		return 0;
-
-	/*
-	 * In order to have proper cpufreq handling, we need to ensure
-	 * that the Device Tree description of the CPU clock includes
-	 * the definition of the PMU DFS registers. If not, we do not
-	 * register the clock notifier and the cpufreq driver. This
-	 * piece of code is only for compatibility with old Device
-	 * Trees.
-	 */
-	np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
-	if (!np)
-		return 0;
-
-	ret = of_address_to_resource(np, 1, &res);
-	if (ret) {
-		pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
-		of_node_put(np);
-		return 0;
-	}
-
-	of_node_put(np);
-
-	/*
-	 * For each CPU, this loop registers the operating points
-	 * supported (which are the nominal CPU frequency and half of
-	 * it), and registers the clock notifier that will take care
-	 * of doing the PMSU part of a frequency transition.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct device *cpu_dev;
-		struct clk *clk;
-		int ret;
-
-		cpu_dev = get_cpu_device(cpu);
-		if (!cpu_dev) {
-			pr_err("Cannot get CPU %d\n", cpu);
-			continue;
-		}
-
-		clk = clk_get(cpu_dev, 0);
-		if (IS_ERR(clk)) {
-			pr_err("Cannot get clock for CPU %d\n", cpu);
-			return PTR_ERR(clk);
-		}
-
-		/*
-		 * In case of a failure of dev_pm_opp_add(), we don't
-		 * bother with cleaning up the registered OPP (there's
-		 * no function to do so), and simply cancel the
-		 * registration of the cpufreq device.
-		 */
-		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
-		if (ret) {
-			clk_put(clk);
-			return ret;
-		}
-
-		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
-		if (ret) {
-			clk_put(clk);
-			return ret;
-		}
-	}
-
-	platform_device_register_data(NULL, "cpufreq-dt", -1,
-				      &cpufreq_dt_pd, sizeof(cpufreq_dt_pd));
-	return 0;
-}
-
-device_initcall(armada_xp_pmsu_cpufreq_init);

diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 58920bc..2f7b11d 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c

@@ -277,13 +277,10 @@
 
 static inline void omap_init_cpufreq(void)
 {
-	struct platform_device_info devinfo = { };
+	struct platform_device_info devinfo = { .name = "omap-cpufreq" };
 
 	if (!of_have_populated_dt())
-		devinfo.name = "omap-cpufreq";
-	else
-		devinfo.name = "cpufreq-dt";
-	platform_device_register_full(&devinfo);
+		platform_device_register_full(&devinfo);
 }
 
 static int __init omap2_common_pm_init(void)

diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index 3f07cc5..beb71da 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c

@@ -74,7 +74,6 @@
 {
 	rockchip_suspend_init();
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-	platform_device_register_simple("cpufreq-dt", 0, NULL, 0);
 }
 
 static const char * const rockchip_board_dt_compat[] = {

diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index a65c80ac..c9ea0e6 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile

@@ -38,7 +38,6 @@
 
 # PM objects
 obj-$(CONFIG_SUSPEND)		+= suspend.o
-obj-$(CONFIG_CPU_FREQ)		+= cpufreq.o
 obj-$(CONFIG_PM_RCAR)		+= pm-rcar.o
 obj-$(CONFIG_PM_RMOBILE)	+= pm-rmobile.o
 obj-$(CONFIG_ARCH_RCAR_GEN2)	+= pm-rcar-gen2.o

diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 5464b7a..3b562d8 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h

@@ -25,16 +25,9 @@
 static inline void shmobile_smp_apmu_suspend_init(void) { }
 #endif
 
-#ifdef CONFIG_CPU_FREQ
-int shmobile_cpufreq_init(void);
-#else
-static inline int shmobile_cpufreq_init(void) { return 0; }
-#endif
-
 static inline void __init shmobile_init_late(void)
 {
 	shmobile_suspend_init();
-	shmobile_cpufreq_init();
 }
 
 #endif /* __ARCH_MACH_COMMON_H */

diff --git a/arch/arm/mach-shmobile/cpufreq.c b/arch/arm/mach-shmobile/cpufreq.c
deleted file mode 100644
index 634d701..0000000
--- a/arch/arm/mach-shmobile/cpufreq.c
+++ /dev/null

@@ -1,19 +0,0 @@
-/*
- * CPUFreq support code for SH-Mobile ARM
- *
- *  Copyright (C) 2014 Gaku Inami
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/platform_device.h>
-
-#include "common.h"
-
-int __init shmobile_cpufreq_init(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-	return 0;
-}

diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index 575195b..65e1817 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h

@@ -38,6 +38,8 @@
 extern void socfpga_sysmgr_init(void);
 void socfpga_init_l2_ecc(void);
 void socfpga_init_ocram_ecc(void);
+void socfpga_init_arria10_l2_ecc(void);
+void socfpga_init_arria10_ocram_ecc(void);
 
 extern void __iomem *sys_manager_base_addr;
 extern void __iomem *rst_manager_base_addr;

diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c
index e3907ab..4267c95f 100644
--- a/arch/arm/mach-socfpga/l2_cache.c
+++ b/arch/arm/mach-socfpga/l2_cache.c

@@ -17,6 +17,20 @@
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 
+#include "core.h"
+
+/* A10 System Manager L2 ECC Control register */
+#define A10_MPU_CTRL_L2_ECC_OFST          0x0
+#define A10_MPU_CTRL_L2_ECC_EN            BIT(0)
+
+/* A10 System Manager Global IRQ Mask register */
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
+#define A10_SYSMGR_ECC_INTMASK_CLR_L2     BIT(0)
+
+/* A10 System Manager L2 ECC IRQ Clear register */
+#define A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST  0xA8
+#define A10_SYSMGR_MPU_CLEAR_L2_ECC       (BIT(31) | BIT(15))
+
 void socfpga_init_l2_ecc(void)
 {
 	struct device_node *np;
@@ -39,3 +53,38 @@
 	writel(0x01, mapped_l2_edac_addr);
 	iounmap(mapped_l2_edac_addr);
 }
+
+void socfpga_init_arria10_l2_ecc(void)
+{
+	struct device_node *np;
+	void __iomem *mapped_l2_edac_addr;
+
+	/* Find the L2 EDAC device tree node */
+	np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-l2-ecc");
+	if (!np) {
+		pr_err("Unable to find socfpga-a10-l2-ecc in dtb\n");
+		return;
+	}
+
+	mapped_l2_edac_addr = of_iomap(np, 0);
+	of_node_put(np);
+	if (!mapped_l2_edac_addr) {
+		pr_err("Unable to find L2 ECC mapping in dtb\n");
+		return;
+	}
+
+	if (!sys_manager_base_addr) {
+		pr_err("System Mananger not mapped for L2 ECC\n");
+		goto exit;
+	}
+	/* Clear any pending IRQs */
+	writel(A10_SYSMGR_MPU_CLEAR_L2_ECC, (sys_manager_base_addr +
+	       A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST));
+	/* Enable ECC */
+	writel(A10_SYSMGR_ECC_INTMASK_CLR_L2, sys_manager_base_addr +
+	       A10_SYSMGR_ECC_INTMASK_CLR_OFST);
+	writel(A10_MPU_CTRL_L2_ECC_EN, mapped_l2_edac_addr +
+	       A10_MPU_CTRL_L2_ECC_OFST);
+exit:
+	iounmap(mapped_l2_edac_addr);
+}

diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c
index 60ec643..10d6732 100644
--- a/arch/arm/mach-socfpga/ocram.c
+++ b/arch/arm/mach-socfpga/ocram.c

@@ -13,12 +13,15 @@
  * You should have received a copy of the GNU General Public License along with
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+#include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/genalloc.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 
+#include "core.h"
+
 #define ALTR_OCRAM_CLEAR_ECC          0x00000018
 #define ALTR_OCRAM_ECC_EN             0x00000019
 
@@ -47,3 +50,133 @@
 
 	iounmap(mapped_ocr_edac_addr);
 }
+
+/* Arria10 OCRAM Section */
+#define ALTR_A10_ECC_CTRL_OFST          0x08
+#define ALTR_A10_OCRAM_ECC_EN_CTL       (BIT(1) | BIT(0))
+#define ALTR_A10_ECC_INITA              BIT(16)
+
+#define ALTR_A10_ECC_INITSTAT_OFST      0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA      BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB      BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST      0x10
+#define ALTR_A10_ECC_SERRINTEN          BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST       0x20
+#define ALTR_A10_ECC_SERRPENA           BIT(0)
+#define ALTR_A10_ECC_DERRPENA           BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK       (ALTR_A10_ECC_SERRPENA | \
+					 ALTR_A10_ECC_DERRPENA)
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST   0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM      BIT(1)
+
+#define ALTR_A10_ECC_INIT_WATCHDOG_10US   10000
+
+static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	value |= bit_mask;
+	writel(value, ioaddr);
+}
+
+static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	value &= ~bit_mask;
+	writel(value, ioaddr);
+}
+
+static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr);
+
+	return (value & bit_mask) ? 1 : 0;
+}
+
+/*
+ * This function uses the memory initialization block in the Arria10 ECC
+ * controller to initialize/clear the entire memory data and ECC data.
+ */
+static int altr_init_memory_port(void __iomem *ioaddr)
+{
+	int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US;
+
+	ecc_set_bits(ALTR_A10_ECC_INITA, (ioaddr + ALTR_A10_ECC_CTRL_OFST));
+	while (limit--) {
+		if (ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA,
+				  (ioaddr + ALTR_A10_ECC_INITSTAT_OFST)))
+			break;
+		udelay(1);
+	}
+	if (limit < 0)
+		return -EBUSY;
+
+	/* Clear any pending ECC interrupts */
+	writel(ALTR_A10_ECC_ERRPENA_MASK,
+	       (ioaddr + ALTR_A10_ECC_INTSTAT_OFST));
+
+	return 0;
+}
+
+void socfpga_init_arria10_ocram_ecc(void)
+{
+	struct device_node *np;
+	int ret = 0;
+	void __iomem *ecc_block_base;
+
+	if (!sys_manager_base_addr) {
+		pr_err("SOCFPGA: sys-mgr is not initialized\n");
+		return;
+	}
+
+	/* Find the OCRAM EDAC device tree node */
+	np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ocram-ecc");
+	if (!np) {
+		pr_err("Unable to find socfpga-a10-ocram-ecc\n");
+		return;
+	}
+
+	/* Map the ECC Block */
+	ecc_block_base = of_iomap(np, 0);
+	of_node_put(np);
+	if (!ecc_block_base) {
+		pr_err("Unable to map OCRAM ECC block\n");
+		return;
+	}
+
+	/* Disable ECC */
+	writel(ALTR_A10_OCRAM_ECC_EN_CTL,
+	       sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_SET_OFST);
+	ecc_clear_bits(ALTR_A10_ECC_SERRINTEN,
+		       (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST));
+	ecc_clear_bits(ALTR_A10_OCRAM_ECC_EN_CTL,
+		       (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+
+	/* Ensure all writes complete */
+	wmb();
+
+	/* Use HW initialization block to initialize memory for ECC */
+	ret = altr_init_memory_port(ecc_block_base);
+	if (ret) {
+		pr_err("ECC: cannot init OCRAM PORTA memory\n");
+		goto exit;
+	}
+
+	/* Enable ECC */
+	ecc_set_bits(ALTR_A10_OCRAM_ECC_EN_CTL,
+		     (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+	ecc_set_bits(ALTR_A10_ECC_SERRINTEN,
+		     (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST));
+	writel(ALTR_A10_OCRAM_ECC_EN_CTL,
+	       sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_CLR_OFST);
+
+	/* Ensure all writes complete */
+	wmb();
+exit:
+	iounmap(ecc_block_base);
+}

diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 7e0aad2..dde14f7 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c

@@ -66,6 +66,16 @@
 		socfpga_init_ocram_ecc();
 }
 
+static void __init socfpga_arria10_init_irq(void)
+{
+	irqchip_init();
+	socfpga_sysmgr_init();
+	if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C))
+		socfpga_init_arria10_l2_ecc();
+	if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM))
+		socfpga_init_arria10_ocram_ecc();
+}
+
 static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
 {
 	u32 temp;
@@ -113,7 +123,7 @@
 DT_MACHINE_START(SOCFPGA_A10, "Altera SOCFPGA Arria10")
 	.l2c_aux_val	= 0,
 	.l2c_aux_mask	= ~0,
-	.init_irq	= socfpga_init_irq,
+	.init_irq	= socfpga_arria10_init_irq,
 	.restart	= socfpga_arria10_restart,
 	.dt_compat	= altera_a10_dt_match,
 MACHINE_END

diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 3c15619..95dca8c 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c

@@ -17,11 +17,6 @@
 
 #include <asm/mach/arch.h>
 
-static void __init sunxi_dt_cpufreq_init(void)
-{
-	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
 static const char * const sunxi_board_dt_compat[] = {
 	"allwinner,sun4i-a10",
 	"allwinner,sun5i-a10s",
@@ -32,7 +27,6 @@
 
 DT_MACHINE_START(SUNXI_DT, "Allwinner sun4i/sun5i Families")
 	.dt_compat	= sunxi_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun6i_board_dt_compat[] = {
@@ -53,7 +47,6 @@
 DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
 	.init_time	= sun6i_timer_init,
 	.dt_compat	= sun6i_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun7i_board_dt_compat[] = {
@@ -63,7 +56,6 @@
 
 DT_MACHINE_START(SUN7I_DT, "Allwinner sun7i (A20) Family")
 	.dt_compat	= sun7i_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun8i_board_dt_compat[] = {
@@ -77,7 +69,6 @@
 DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family")
 	.init_time	= sun6i_timer_init,
 	.dt_compat	= sun8i_board_dt_compat,
-	.init_late	= sunxi_dt_cpufreq_init,
 MACHINE_END
 
 static const char * const sun9i_board_dt_compat[] = {

diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 860ffb6..da876d2 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c

@@ -110,7 +110,6 @@
  */
 static void __init zynq_init_machine(void)
 {
-	struct platform_device_info devinfo = { .name = "cpufreq-dt", };
 	struct soc_device_attribute *soc_dev_attr;
 	struct soc_device *soc_dev;
 	struct device *parent = NULL;
@@ -145,7 +144,6 @@
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, parent);
 
 	platform_device_register(&zynq_cpuidle_device);
-	platform_device_register_full(&devinfo);
 }
 
 static void __init zynq_timer_init(void)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4f43622..8845c0d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig

@@ -11,6 +11,7 @@
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
@@ -58,11 +59,14 @@
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARM_SMCCC
 	select HAVE_BPF_JIT
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
+	select HAVE_CONTEXT_TRACKING
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_API_DEBUG
@@ -76,6 +80,7 @@
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP if NUMA
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
@@ -89,15 +94,13 @@
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
+	select OF_NUMA if NUMA && OF
 	select OF_RESERVED_MEM
 	select PERF_USE_VMALLOC
 	select POWER_RESET
 	select POWER_SUPPLY
-	select RTC_LIB
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
-	select HAVE_CONTEXT_TRACKING
-	select HAVE_ARM_SMCCC
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
@@ -546,10 +549,35 @@
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
 
+# Common NUMA Features
+config NUMA
+	bool "Numa Memory Allocation and Scheduler Support"
+	depends on SMP
+	help
+	  Enable NUMA (Non Uniform Memory Access) support.
+
+	  The kernel will try to allocate memory used by a CPU on the
+	  local memory of the CPU and add some more
+	  NUMA awareness to the kernel.
+
+config NODES_SHIFT
+	int "Maximum NUMA Nodes (as a power of 2)"
+	range 1 10
+	default "2"
+	depends on NEED_MULTIPLE_NODES
+	help
+	  Specify the maximum number of NUMA Nodes available on the target
+	  system.  Increases memory reserved to accommodate various tables.
+
+config USE_PERCPU_NUMA_NODE_ID
+	def_bool y
+	depends on NUMA
+
 source kernel/Kconfig.preempt
 source kernel/Kconfig.hz
 
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	depends on !HIBERNATION
 	def_bool y
 
 config ARCH_HAS_HOLES_MEMORYMODEL
@@ -578,9 +606,6 @@
 config ARCH_WANT_HUGE_PMD_SHARE
 	def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
-config HAVE_ARCH_TRANSPARENT_HUGEPAGE
-	def_bool y
-
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
 
@@ -953,6 +978,14 @@
 
 source "kernel/power/Kconfig"
 
+config ARCH_HIBERNATION_POSSIBLE
+	def_bool y
+	depends on CPU_PM
+
+config ARCH_HIBERNATION_HEADER
+	def_bool y
+	depends on HIBERNATION
+
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 

diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 7e76845..710fde4 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug

@@ -59,7 +59,7 @@
 	  If in doubt, say Y
 
 config DEBUG_ALIGN_RODATA
-	depends on DEBUG_RODATA && ARM64_4K_PAGES
+	depends on DEBUG_RODATA
 	bool "Align linker sections up to SECTION_SIZE"
 	help
 	  If this option is enabled, sections that may potentially be marked as

diff --git a/arch/arm64/boot/dts/broadcom/vulcan.dtsi b/arch/arm64/boot/dts/broadcom/vulcan.dtsi
index 85820e2..34e11a9 100644
--- a/arch/arm64/boot/dts/broadcom/vulcan.dtsi
+++ b/arch/arm64/boot/dts/broadcom/vulcan.dtsi

@@ -86,7 +86,7 @@
 	};
 
 	pmu {
-		compatible = "arm,armv8-pmuv3";
+		compatible = "brcm,vulcan-pmu", "arm,armv8-pmuv3";
 		interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_HIGH>; /* PMU overflow */
 	};
 

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index a44ef99..c5e0132 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig

@@ -264,6 +264,7 @@
 CONFIG_PHY_HI6220_USB=y
 CONFIG_PHY_XGENE=y
 CONFIG_ARM_SCPI_PROTOCOL=y
+CONFIG_ACPI=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_FANOTIFY=y

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 70f7b9e..10b017c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h

@@ -1,5 +1,5 @@
 /*
- * Based on arch/arm/include/asm/assembler.h
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
  *
  * Copyright (C) 1996-2000 Russell King
  * Copyright (C) 2012 ARM Ltd.
@@ -23,22 +23,13 @@
 #ifndef __ASM_ASSEMBLER_H
 #define __ASM_ASSEMBLER_H
 
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 
 /*
- * Stack pushing/popping (register pairs only). Equivalent to store decrement
- * before, load increment after.
- */
-	.macro	push, xreg1, xreg2
-	stp	\xreg1, \xreg2, [sp, #-16]!
-	.endm
-
-	.macro	pop, xreg1, xreg2
-	ldp	\xreg1, \xreg2, [sp], #16
-	.endm
-
-/*
  * Enable and disable interrupts.
  */
 	.macro	disable_irq
@@ -212,6 +203,102 @@
 	.endm
 
 /*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+	.macro	mmid, rd, rn
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+	.endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+	.macro	dcache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+	.macro	icache_line_size, reg, tmp
+	mrs	\tmp, ctr_el0			// read CTR
+	and	\tmp, \tmp, #0xf		// cache line size encoding
+	mov	\reg, #4			// bytes per word
+	lsl	\reg, \reg, \tmp		// actual cache line size
+	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [kaddr, kaddr + size)
+ *
+ * 	op:		operation passed to dc instruction
+ * 	domain:		domain used in dsb instruciton
+ * 	kaddr:		starting virtual address of the region
+ * 	size:		size of the region
+ * 	Corrupts:	kaddr, size, tmp1, tmp2
+ */
+	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+	dcache_line_size \tmp1, \tmp2
+	add	\size, \kaddr, \size
+	sub	\tmp2, \tmp1, #1
+	bic	\kaddr, \kaddr, \tmp2
+9998:	dc	\op, \kaddr
+	add	\kaddr, \kaddr, \tmp1
+	cmp	\kaddr, \size
+	b.lo	9998b
+	dsb	\domain
+	.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+	.macro	reset_pmuserenr_el0, tmpreg
+	mrs	\tmpreg, id_aa64dfr0_el1	// Check ID_AA64DFR0_EL1 PMUVer
+	sbfx	\tmpreg, \tmpreg, #8, #4
+	cmp	\tmpreg, #1			// Skip if no PMU present
+	b.lt	9000f
+	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
+9000:
+	.endm
+
+/*
+ * copy_page - copy src to dest using temp registers t1-t8
+ */
+	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
+9998:	ldp	\t1, \t2, [\src]
+	ldp	\t3, \t4, [\src, #16]
+	ldp	\t5, \t6, [\src, #32]
+	ldp	\t7, \t8, [\src, #48]
+	add	\src, \src, #64
+	stnp	\t1, \t2, [\dest]
+	stnp	\t3, \t4, [\dest, #16]
+	stnp	\t5, \t6, [\dest, #32]
+	stnp	\t7, \t8, [\dest, #48]
+	add	\dest, \dest, #64
+	tst	\src, #(PAGE_SIZE - 1)
+	b.ne	9998b
+	.endm
+
+/*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
  */
@@ -233,4 +320,24 @@
 	.long	\sym\()_hi32
 	.endm
 
+	/*
+	 * mov_q - move an immediate constant into a 64-bit register using
+	 *         between 2 and 4 movz/movk instructions (depending on the
+	 *         magnitude and sign of the operand)
+	 */
+	.macro	mov_q, reg, val
+	.if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
+	movz	\reg, :abs_g1_s:\val
+	.else
+	.if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
+	movz	\reg, :abs_g2_s:\val
+	.else
+	movz	\reg, :abs_g3:\val
+	movk	\reg, :abs_g2_nc:\val
+	.endif
+	movk	\reg, :abs_g1_nc:\val
+	.endif
+	movk	\reg, :abs_g0_nc:\val
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b9b6494..224efe7 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h

@@ -35,8 +35,9 @@
 #define ARM64_ALT_PAN_NOT_UAO			10
 #define ARM64_HAS_VIRT_HOST_EXTN		11
 #define ARM64_WORKAROUND_CAVIUM_27456		12
+#define ARM64_HAS_32BIT_EL0			13
 
-#define ARM64_NCAPS				13
+#define ARM64_NCAPS				14
 
 #ifndef __ASSEMBLY__
 
@@ -77,10 +78,17 @@
 	struct arm64_ftr_bits	*ftr_bits;
 };
 
+/* scope of capability check */
+enum {
+	SCOPE_SYSTEM,
+	SCOPE_LOCAL_CPU,
+};
+
 struct arm64_cpu_capabilities {
 	const char *desc;
 	u16 capability;
-	bool (*matches)(const struct arm64_cpu_capabilities *);
+	int def_scope;			/* default scope */
+	bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
 	void (*enable)(void *);		/* Called on all active CPUs */
 	union {
 		struct {	/* To be used for erratum handling only */
@@ -101,6 +109,8 @@
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
+bool this_cpu_has_cap(unsigned int cap);
+
 static inline bool cpu_have_feature(unsigned int num)
 {
 	return elf_hwcap & (1UL << num);
@@ -170,12 +180,20 @@
 		cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
 }
 
+static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
+{
+	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
+
+	return val == ID_AA64PFR0_EL0_32BIT_64BIT;
+}
+
 void __init setup_cpu_features(void);
 
 void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
 void check_local_cpu_errata(void);
 
+void verify_local_cpu_errata(void);
 void verify_local_cpu_capabilities(void);
 
 u64 read_system_reg(u32 id);
@@ -185,6 +203,11 @@
 	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
 }
 
+static inline bool system_supports_32bit_el0(void)
+{
+	return cpus_have_cap(ARM64_HAS_32BIT_EL0);
+}
+
 static inline bool system_supports_mixed_endian_el0(void)
 {
 	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));

diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8e88a69..622db3c 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h

@@ -4,6 +4,7 @@
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/neon.h>
+#include <asm/ptrace.h>
 #include <asm/tlbflush.h>
 
 #ifdef CONFIG_EFI
@@ -14,32 +15,29 @@
 
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 
-#define efi_call_virt(f, ...)						\
+#define efi_set_mapping_permissions	efi_create_mapping
+
+#define arch_efi_call_virt_setup()					\
 ({									\
-	efi_##f##_t *__f;						\
-	efi_status_t __s;						\
-									\
 	kernel_neon_begin();						\
 	efi_virtmap_load();						\
-	__f = efi.systab->runtime->f;					\
-	__s = __f(__VA_ARGS__);						\
-	efi_virtmap_unload();						\
-	kernel_neon_end();						\
-	__s;								\
 })
 
-#define __efi_call_virt(f, ...)						\
+#define arch_efi_call_virt(f, args...)					\
 ({									\
 	efi_##f##_t *__f;						\
-									\
-	kernel_neon_begin();						\
-	efi_virtmap_load();						\
 	__f = efi.systab->runtime->f;					\
-	__f(__VA_ARGS__);						\
+	__f(args);							\
+})
+
+#define arch_efi_call_virt_teardown()					\
+({									\
 	efi_virtmap_unload();						\
 	kernel_neon_end();						\
 })
 
+#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+
 /* arch specific definitions used by the stub code */
 
 /*
@@ -50,7 +48,16 @@
 #define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */
 #define MAX_FDT_OFFSET	SZ_512M
 
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define efi_call_early(f, ...)		sys_table_arg->boottime->f(__VA_ARGS__)
+#define __efi_call_early(f, ...)	f(__VA_ARGS__)
+#define efi_is_64bit()			(true)
+
+#define alloc_screen_info(x...)		&screen_info
+#define free_screen_info(x...)
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
 
 #define EFI_ALLOC_ALIGN		SZ_64K
 

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 24ed037..7a09c48 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h

@@ -177,7 +177,8 @@
 
 /* AArch32 EABI. */
 #define EF_ARM_EABI_MASK		0xff000000
-#define compat_elf_check_arch(x)	(((x)->e_machine == EM_ARM) && \
+#define compat_elf_check_arch(x)	(system_supports_32bit_el0() && \
+					 ((x)->e_machine == EM_ARM) && \
 					 ((x)->e_flags & EF_ARM_EABI_MASK))
 
 #define compat_start_thread		compat_start_thread

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 5c6375d..7e51d1b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h

@@ -19,6 +19,7 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
+#include <asm/sparsemem.h>
 
 /*
  * The linear mapping and the start of memory are both 2M aligned (per
@@ -86,10 +87,24 @@
  * (64k granule), or a multiple that can be mapped using contiguous bits
  * in the page tables: 32 * PMD_SIZE (16k granule)
  */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define ARM64_MEMSTART_ALIGN	SZ_512M
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ARM64_MEMSTART_SHIFT		PUD_SHIFT
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ARM64_MEMSTART_SHIFT		(PMD_SHIFT + 5)
 #else
-#define ARM64_MEMSTART_ALIGN	SZ_1G
+#define ARM64_MEMSTART_SHIFT		PMD_SHIFT
+#endif
+
+/*
+ * sparsemem vmemmap imposes an additional requirement on the alignment of
+ * memstart_addr, due to the fact that the base of the vmemmap region
+ * has a direct correspondence, and needs to appear sufficiently aligned
+ * in the virtual address space.
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
+#define ARM64_MEMSTART_ALIGN	(1UL << SECTION_SIZE_BITS)
+#else
+#define ARM64_MEMSTART_ALIGN	(1UL << ARM64_MEMSTART_SHIFT)
 #endif
 
 #endif	/* __ASM_KERNEL_PGTABLE_H */

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3f29887..1b3dc9df 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h

@@ -84,17 +84,6 @@
 #define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
-/* Hyp System Control Register (SCTLR_EL2) bits */
-#define SCTLR_EL2_EE	(1 << 25)
-#define SCTLR_EL2_WXN	(1 << 19)
-#define SCTLR_EL2_I	(1 << 12)
-#define SCTLR_EL2_SA	(1 << 3)
-#define SCTLR_EL2_C	(1 << 2)
-#define SCTLR_EL2_A	(1 << 1)
-#define SCTLR_EL2_M	1
-#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
-			 SCTLR_EL2_SA | SCTLR_EL2_I)
-
 /* TCR_EL2 Registers bits */
 #define TCR_EL2_RES1	((1 << 31) | (1 << 23))
 #define TCR_EL2_TBI	(1 << 20)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 40a0a24..7561f63 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h

@@ -22,6 +22,8 @@
 
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
+/* The hyp-stub will return this for any kvm_call_hyp() call */
+#define ARM_EXCEPTION_HYP_GONE	  2
 
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
@@ -40,6 +42,7 @@
 
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
+extern char __kvm_hyp_reset[];
 
 extern char __kvm_hyp_vector[];
 

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f5c6bd2..90a8d23 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h

@@ -46,6 +46,8 @@
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
+unsigned long kvm_hyp_reset_entry(void);
+void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
 
 struct kvm_arch {
 	/* The VMID generation used for the virt. memory system */
@@ -352,7 +354,17 @@
 		       hyp_stack_ptr, vector_ptr);
 }
 
-static inline void kvm_arch_hardware_disable(void) {}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+					phys_addr_t phys_idmap_start)
+{
+	/*
+	 * Call reset code, and switch back to stub hyp vectors.
+	 * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation.
+	 */
+	__kvm_call_hyp((void *)kvm_hyp_reset_entry(),
+		       boot_pgd_ptr, phys_idmap_start);
+}
+
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 22732a5..e8d39d4 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h

@@ -109,6 +109,7 @@
 phys_addr_t kvm_mmu_get_httbr(void);
 phys_addr_t kvm_mmu_get_boot_httbr(void);
 phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 12f8a00..72a3025 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h

@@ -40,6 +40,21 @@
 #define PCI_IO_SIZE		SZ_16M
 
 /*
+ * Log2 of the upper bound of the size of a struct page. Used for sizing
+ * the vmemmap region only, does not affect actual memory footprint.
+ * We don't use sizeof(struct page) directly since taking its size here
+ * requires its definition to be available at this point in the inclusion
+ * chain, and it may not be a power of 2 in the first place.
+ */
+#define STRUCT_PAGE_MAX_SHIFT	6
+
+/*
+ * VMEMMAP_SIZE - allows the whole linear region to be covered by
+ *                a struct page array
+ */
+#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT))
+
+/*
  * PAGE_OFFSET - the virtual address of the start of the kernel image (top
  *		 (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
@@ -54,7 +69,8 @@
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
 #define MODULES_VSIZE		(SZ_128M)
-#define PCI_IO_END		(PAGE_OFFSET - SZ_2M)
+#define VMEMMAP_START		(PAGE_OFFSET - VMEMMAP_SIZE)
+#define PCI_IO_END		(VMEMMAP_START - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
 #define TASK_SIZE_64		(UL(1) << VA_BITS)
@@ -71,6 +87,9 @@
 
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
+#define KERNEL_START      _text
+#define KERNEL_END        _end
+
 /*
  * The size of the KASAN shadow region. This should be 1/8th of the
  * size of the entire kernel virtual address space.
@@ -192,9 +211,19 @@
  */
 #define ARCH_PFN_OFFSET		((unsigned long)PHYS_PFN_OFFSET)
 
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define	virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#else
+#define __virt_to_pgoff(kaddr)	(((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
+#define __page_to_voff(kaddr)	(((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
 
+#define page_to_virt(page)	((void *)((__page_to_voff(page)) | PAGE_OFFSET))
+#define virt_to_page(vaddr)	((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
+
+#define virt_addr_valid(kaddr)	pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
+					   + PHYS_OFFSET) >> PAGE_SHIFT)
+#endif
 #endif
 
 #include <asm-generic/memory_model.h>

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 990124a..97b1d8f 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h

@@ -29,6 +29,7 @@
 #define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
 extern void paging_init(void);
+extern void bootmem_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
 extern void init_mem_pgprot(void);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,

diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h
new file mode 100644
index 0000000..a0de9e6
--- /dev/null
+++ b/arch/arm64/include/asm/mmzone.h

@@ -0,0 +1,12 @@
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)		(node_data[(nid)])
+
+#endif /* CONFIG_NUMA */
+#endif /* __ASM_MMZONE_H */

diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
new file mode 100644
index 0000000..e9b4f29
--- /dev/null
+++ b/arch/arm64/include/asm/numa.h

@@ -0,0 +1,45 @@
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+
+#ifdef CONFIG_NUMA
+
+/* currently, arm64 implements flat NUMA topology */
+#define parent_node(node)	(node)
+
+int __node_distance(int from, int to);
+#define node_distance(a, b) __node_distance(a, b)
+
+extern nodemask_t numa_nodes_parsed __initdata;
+
+/* Mappings between node number and cpus on that node. */
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+void numa_clear_node(unsigned int cpu);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+const struct cpumask *cpumask_of_node(int node);
+#else
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+	return node_to_cpumask_map[node];
+}
+#endif
+
+void __init arm64_numa_init(void);
+int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+void __init numa_set_distance(int from, int to, int distance);
+void __init numa_free_distance(void);
+void __init early_map_cpu_to_node(unsigned int cpu, int nid);
+void numa_store_cpu_info(unsigned int cpu);
+
+#else	/* CONFIG_NUMA */
+
+static inline void numa_store_cpu_info(unsigned int cpu) { }
+static inline void arm64_numa_init(void) { }
+static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
+
+#endif	/* CONFIG_NUMA */
+
+#endif	/* __ASM_NUMA_H */

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index ae615b9..17b45f7 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h

@@ -19,6 +19,8 @@
 #ifndef __ASM_PAGE_H
 #define __ASM_PAGE_H
 
+#include <linux/const.h>
+
 /* PAGE_SHIFT determines the page size */
 /* CONT_SHIFT determines the number of pages which can be tracked together  */
 #ifdef CONFIG_ARM64_64K_PAGES

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5c25b83..9786f77 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h

@@ -133,7 +133,6 @@
  * Section
  */
 #define PMD_SECT_VALID		(_AT(pmdval_t, 1) << 0)
-#define PMD_SECT_PROT_NONE	(_AT(pmdval_t, 1) << 58)
 #define PMD_SECT_USER		(_AT(pmdval_t, 1) << 6)		/* AP[1] */
 #define PMD_SECT_RDONLY		(_AT(pmdval_t, 1) << 7)		/* AP[2] */
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)

diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 2b1bd7e..69b2fd4 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h

@@ -27,10 +27,6 @@
 typedef u64 pudval_t;
 typedef u64 pgdval_t;
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-
 /*
  * These are used to make use of C type-checking..
  */
@@ -58,34 +54,6 @@
 #define pgprot_val(x)	((x).pgprot)
 #define __pgprot(x)	((pgprot_t) { (x) } )
 
-#else	/* !STRICT_MM_TYPECHECKS */
-
-typedef pteval_t pte_t;
-#define pte_val(x)	(x)
-#define __pte(x)	(x)
-
-#if CONFIG_PGTABLE_LEVELS > 2
-typedef pmdval_t pmd_t;
-#define pmd_val(x)	(x)
-#define __pmd(x)	(x)
-#endif
-
-#if CONFIG_PGTABLE_LEVELS > 3
-typedef pudval_t pud_t;
-#define pud_val(x)	(x)
-#define __pud(x)	(x)
-#endif
-
-typedef pgdval_t pgd_t;
-#define pgd_val(x)	(x)
-#define __pgd(x)	(x)
-
-typedef pteval_t pgprot_t;
-#define pgprot_val(x)	(x)
-#define __pgprot(x)	(x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
 #if CONFIG_PGTABLE_LEVELS == 2
 #include <asm-generic/pgtable-nopmd.h>
 #elif CONFIG_PGTABLE_LEVELS == 3

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 989fef1..2da46ae 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h

@@ -24,22 +24,16 @@
 #include <asm/pgtable-prot.h>
 
 /*
- * VMALLOC and SPARSEMEM_VMEMMAP ranges.
+ * VMALLOC range.
  *
- * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
- *	(rounded up to PUD_SIZE).
  * VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
- *	fixed mappings and modules
+ * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
+ *	and fixed mappings
  */
-#define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-
 #define VMALLOC_START		(MODULES_END)
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
-#define VMEMMAP_START		(VMALLOC_END + SZ_64K)
-#define vmemmap			((struct page *)VMEMMAP_START - \
-				 SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
+#define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
 
 #define FIRST_USER_ADDRESS	0UL
 
@@ -58,7 +52,7 @@
  * for zero-mapped memory areas etc..
  */
 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
+#define ZERO_PAGE(vaddr)	pfn_to_page(PHYS_PFN(__pa(empty_zero_page)))
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
 
@@ -272,6 +266,21 @@
 	return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif
+
 /*
  * THP definitions.
  */
@@ -280,15 +289,16 @@
 #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+#define pmd_present(pmd)	pte_present(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
-#define pmd_mkclean(pmd)       pte_pmd(pte_mkclean(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
+#define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
 
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
@@ -327,9 +337,8 @@
 				     unsigned long size, pgprot_t vma_prot);
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
-#define pmd_present(pmd)	(pmd_val(pmd))
 
-#define pmd_bad(pmd)		(!(pmd_val(pmd) & 2))
+#define pmd_bad(pmd)		(!(pmd_val(pmd) & PMD_TABLE_BIT))
 
 #define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
 				 PMD_TYPE_TABLE)
@@ -394,7 +403,7 @@
 #define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
 
 #define pud_none(pud)		(!pud_val(pud))
-#define pud_bad(pud)		(!(pud_val(pud) & 2))
+#define pud_bad(pud)		(!(pud_val(pud) & PUD_TABLE_BIT))
 #define pud_present(pud)	(pud_val(pud))
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -526,6 +535,21 @@
 }
 
 #ifdef CONFIG_ARM64_HW_AFDBM
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pte_t *ptep,
+				 pte_t entry, int dirty);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+					unsigned long address, pmd_t *pmdp,
+					pmd_t entry, int dirty)
+{
+	return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+}
+#endif
+
 /*
  * Atomic pte/pmd modifications.
  */
@@ -578,9 +602,9 @@
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
-				       unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long address, pmd_t *pmdp)
 {
 	return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
 }

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 817a067..433e504 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h

@@ -113,6 +113,17 @@
 	dsb(ishst);
 }
 
+/*
+ * The calling secondary CPU has detected serious configuration mismatch,
+ * which calls for a kernel panic. Update the boot status and park the calling
+ * CPU.
+ */
+static inline void cpu_panic_kernel(void)
+{
+	update_cpu_boot_status(CPU_PANIC_KERNEL);
+	cpu_park_loop();
+}
+
 #endif /* ifndef __ASSEMBLY__ */
 
 #endif /* ifndef __ASM_SMP_H */

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 59a5b0f1..024d623 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h

@@ -1,7 +1,8 @@
 #ifndef __ASM_SUSPEND_H
 #define __ASM_SUSPEND_H
 
-#define NR_CTX_REGS 11
+#define NR_CTX_REGS 10
+#define NR_CALLEE_SAVED_REGS 12
 
 /*
  * struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
@@ -16,11 +17,34 @@
 	u64 sp;
 } __aligned(16);
 
-struct sleep_save_sp {
-	phys_addr_t *save_ptr_stash;
-	phys_addr_t save_ptr_stash_phys;
+/*
+ * Memory to save the cpu state is allocated on the stack by
+ * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter().
+ * This data must survive until cpu_resume() is called.
+ *
+ * This struct desribes the size and the layout of the saved cpu state.
+ * The layout of the callee_saved_regs is defined by the implementation
+ * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed
+ * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it
+ * returns, and the data would be subsequently corrupted by the call to the
+ * finisher.
+ */
+struct sleep_stack_data {
+	struct cpu_suspend_ctx	system_regs;
+	unsigned long		callee_saved_regs[NR_CALLEE_SAVED_REGS];
 };
 
+extern unsigned long *sleep_save_stash;
+
 extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
 extern void cpu_resume(void);
+int __cpu_suspend_enter(struct sleep_stack_data *state);
+void __cpu_suspend_exit(void);
+void _cpu_resume(void);
+
+int swsusp_arch_suspend(void);
+int swsusp_arch_resume(void);
+int arch_hibernation_header_save(void *addr, unsigned int max_size);
+int arch_hibernation_header_restore(void *addr);
+
 #endif

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 1287416..751e901 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h

@@ -86,10 +86,21 @@
 #define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
 				     (!!x)<<8 | 0x1f)
 
-/* SCTLR_EL1 */
-#define SCTLR_EL1_CP15BEN	(0x1 << 5)
-#define SCTLR_EL1_SED		(0x1 << 8)
-#define SCTLR_EL1_SPAN		(0x1 << 23)
+/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_EE    (1 << 25)
+#define SCTLR_ELx_I	(1 << 12)
+#define SCTLR_ELx_SA	(1 << 3)
+#define SCTLR_ELx_C	(1 << 2)
+#define SCTLR_ELx_A	(1 << 1)
+#define SCTLR_ELx_M	1
+
+#define SCTLR_ELx_FLAGS	(SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
+			 SCTLR_ELx_SA | SCTLR_ELx_I)
+
+/* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_SPAN		(1 << 23)
+#define SCTLR_EL1_SED		(1 << 8)
+#define SCTLR_EL1_CP15BEN	(1 << 5)
 
 
 /* id_aa64isar0 */
@@ -115,6 +126,7 @@
 #define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
 #define ID_AA64PFR0_EL1_64BIT_ONLY	0x1
 #define ID_AA64PFR0_EL0_64BIT_ONLY	0x1
+#define ID_AA64PFR0_EL0_32BIT_64BIT	0x2
 
 /* id_aa64mmfr0 */
 #define ID_AA64MMFR0_TGRAN4_SHIFT	28
@@ -145,7 +157,11 @@
 #define ID_AA64MMFR1_VMIDBITS_16	2
 
 /* id_aa64mmfr2 */
+#define ID_AA64MMFR2_LVA_SHIFT		16
+#define ID_AA64MMFR2_IESB_SHIFT		12
+#define ID_AA64MMFR2_LSM_SHIFT		8
 #define ID_AA64MMFR2_UAO_SHIFT		4
+#define ID_AA64MMFR2_CNP_SHIFT		0
 
 /* id_aa64dfr0 */
 #define ID_AA64DFR0_CTX_CMPS_SHIFT	28

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a3e9d6f..8b57339 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h

@@ -22,6 +22,16 @@
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 
+#ifdef CONFIG_NUMA
+
+struct pci_bus;
+int pcibus_to_node(struct pci_bus *bus);
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ?		\
+				 cpu_all_mask :				\
+				 cpumask_of_node(pcibus_to_node(bus)))
+
+#endif /* CONFIG_NUMA */
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_ARM_TOPOLOGY_H */

diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 9f22dd6..dcbcf8d 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h

@@ -18,6 +18,22 @@
 #ifndef __ASM__VIRT_H
 #define __ASM__VIRT_H
 
+/*
+ * The arm64 hcall implementation uses x0 to specify the hcall type. A value
+ * less than 0xfff indicates a special hcall, such as get/set vector.
+ * Any other value is used as a pointer to the function to call.
+ */
+
+/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */
+#define HVC_GET_VECTORS 0
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x1: Physical address of the new vector table.
+ */
+#define HVC_SET_VECTORS 1
+
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
@@ -60,6 +76,12 @@
 	return el == CurrentEL_EL2;
 }
 
+#ifdef CONFIG_ARM64_VHE
+extern void verify_cpu_run_el(void);
+#else
+static inline void verify_cpu_run_el(void) {}
+#endif
+
 /* The section containing the hypervisor text */
 extern char __hyp_text_start[];
 extern char __hyp_text_end[];

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 3793003..2173149 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile

@@ -45,6 +45,7 @@
 arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 arm64-obj-$(CONFIG_PARAVIRT)		+= paravirt.o
 arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
+arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)

diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index d1ce8e2..3e4f1a4 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c

@@ -42,6 +42,7 @@
 EXPORT_SYMBOL(acpi_pci_disabled);
 
 static bool param_acpi_off __initdata;
+static bool param_acpi_on __initdata;
 static bool param_acpi_force __initdata;
 
 static int __init parse_acpi(char *arg)
@@ -52,6 +53,8 @@
 	/* "acpi=off" disables both ACPI table parsing and interpreter */
 	if (strcmp(arg, "off") == 0)
 		param_acpi_off = true;
+	else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */
+		param_acpi_on = true;
 	else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
 		param_acpi_force = true;
 	else
@@ -66,12 +69,24 @@
 				       void *data)
 {
 	/*
-	 * Return 1 as soon as we encounter a node at depth 1 that is
-	 * not the /chosen node.
+	 * Ignore anything not directly under the root node; we'll
+	 * catch its parent instead.
 	 */
-	if (depth == 1 && (strcmp(uname, "chosen") != 0))
-		return 1;
-	return 0;
+	if (depth != 1)
+		return 0;
+
+	if (strcmp(uname, "chosen") == 0)
+		return 0;
+
+	if (strcmp(uname, "hypervisor") == 0 &&
+	    of_flat_dt_is_compatible(node, "xen,xen"))
+		return 0;
+
+	/*
+	 * This node at depth 1 is neither a chosen node nor a xen node,
+	 * which we do not expect.
+	 */
+	return 1;
 }
 
 /*
@@ -184,11 +199,13 @@
 	/*
 	 * Enable ACPI instead of device tree unless
 	 * - ACPI has been disabled explicitly (acpi=off), or
-	 * - the device tree is not empty (it has more than just a /chosen node)
-	 *   and ACPI has not been force enabled (acpi=force)
+	 * - the device tree is not empty (it has more than just a /chosen node,
+	 *   and a /hypervisor node when running on Xen)
+	 *   and ACPI has not been [force] enabled (acpi=on|force)
 	 */
 	if (param_acpi_off ||
-	    (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+	    (!param_acpi_on && !param_acpi_force &&
+	     of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
 		return;
 
 	/*

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b31..f8e5d47 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c

@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/kvm_host.h>
+#include <linux/suspend.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/smp_plat.h>
@@ -119,11 +120,14 @@
   DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp));
   DEFINE(MPIDR_HASH_MASK,	offsetof(struct mpidr_hash, mask));
   DEFINE(MPIDR_HASH_SHIFTS,	offsetof(struct mpidr_hash, shift_aff));
-  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
-  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
-  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
+  DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS,	offsetof(struct sleep_stack_data, system_regs));
+  DEFINE(SLEEP_STACK_DATA_CALLEE_REGS,	offsetof(struct sleep_stack_data, callee_saved_regs));
 #endif
   DEFINE(ARM_SMCCC_RES_X0_OFFS,	offsetof(struct arm_smccc_res, a0));
   DEFINE(ARM_SMCCC_RES_X2_OFFS,	offsetof(struct arm_smccc_res, a2));
+  BLANK();
+  DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));
+  DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));
+  DEFINE(HIBERN_PBE_NEXT,	offsetof(struct pbe, next));
   return 0;
 }

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 06afd04..d427894 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c

@@ -22,14 +22,16 @@
 #include <asm/cpufeature.h>
 
 static bool __maybe_unused
-is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
 {
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
 	return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
 				       entry->midr_range_min,
 				       entry->midr_range_max);
 }
 
 #define MIDR_RANGE(model, min, max) \
+	.def_scope = SCOPE_LOCAL_CPU, \
 	.matches = is_affected_midr_range, \
 	.midr_model = model, \
 	.midr_range_min = min, \
@@ -101,6 +103,26 @@
 	}
 };
 
+/*
+ * The CPU Errata work arounds are detected and applied at boot time
+ * and the related information is freed soon after. If the new CPU requires
+ * an errata not detected at boot, fail this CPU.
+ */
+void verify_local_cpu_errata(void)
+{
+	const struct arm64_cpu_capabilities *caps = arm64_errata;
+
+	for (; caps->matches; caps++)
+		if (!cpus_have_cap(caps->capability) &&
+			caps->matches(caps, SCOPE_LOCAL_CPU)) {
+			pr_crit("CPU%d: Requires work around for %s, not detected"
+					" at boot time\n",
+				smp_processor_id(),
+				caps->desc ? : "an erratum");
+			cpu_die_early();
+		}
+}
+
 void check_local_cpu_errata(void)
 {
 	update_cpu_capabilities(arm64_errata, "enabling workaround for");

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 943f514..811773d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c

@@ -71,7 +71,8 @@
 
 /* meta feature for alternatives */
 static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
+
 
 static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
@@ -130,7 +131,11 @@
 };
 
 static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -435,22 +440,26 @@
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
-	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
-	init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
-	init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
-	init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
-	init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
-	init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
-	init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
-	init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
-	init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
-	init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
-	init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
-	init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
-	init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
-	init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
-	init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
-	init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+
+	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+		init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+		init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+		init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+		init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+		init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+		init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+		init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+		init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+		init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+		init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+		init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+		init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+		init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+		init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+		init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+		init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+	}
+
 }
 
 static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -555,47 +564,51 @@
 				      info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
 
 	/*
-	 * If we have AArch32, we care about 32-bit features for compat. These
-	 * registers should be RES0 otherwise.
+	 * If we have AArch32, we care about 32-bit features for compat.
+	 * If the system doesn't support AArch32, don't update them.
 	 */
-	taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+	if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) &&
+		id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+
+		taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
 					info->reg_id_dfr0, boot->reg_id_dfr0);
-	taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
 					info->reg_id_isar0, boot->reg_id_isar0);
-	taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
 					info->reg_id_isar1, boot->reg_id_isar1);
-	taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
 					info->reg_id_isar2, boot->reg_id_isar2);
-	taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
 					info->reg_id_isar3, boot->reg_id_isar3);
-	taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
 					info->reg_id_isar4, boot->reg_id_isar4);
-	taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
 					info->reg_id_isar5, boot->reg_id_isar5);
 
-	/*
-	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
-	 * ACTLR formats could differ across CPUs and therefore would have to
-	 * be trapped for virtualization anyway.
-	 */
-	taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+		/*
+		 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+		 * ACTLR formats could differ across CPUs and therefore would have to
+		 * be trapped for virtualization anyway.
+		 */
+		taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
 					info->reg_id_mmfr0, boot->reg_id_mmfr0);
-	taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
 					info->reg_id_mmfr1, boot->reg_id_mmfr1);
-	taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
 					info->reg_id_mmfr2, boot->reg_id_mmfr2);
-	taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
 					info->reg_id_mmfr3, boot->reg_id_mmfr3);
-	taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
 					info->reg_id_pfr0, boot->reg_id_pfr0);
-	taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
 					info->reg_id_pfr1, boot->reg_id_pfr1);
-	taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
 					info->reg_mvfr0, boot->reg_mvfr0);
-	taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
 					info->reg_mvfr1, boot->reg_mvfr1);
-	taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+		taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
 					info->reg_mvfr2, boot->reg_mvfr2);
+	}
 
 	/*
 	 * Mismatched CPU features are a recipe for disaster. Don't even
@@ -614,254 +627,9 @@
 	return regp->sys_val;
 }
 
-#include <linux/irqchip/arm-gic-v3.h>
-
-static bool
-feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
-{
-	int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
-
-	return val >= entry->min_field_value;
-}
-
-static bool
-has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
-{
-	u64 val;
-
-	val = read_system_reg(entry->sys_reg);
-	return feature_matches(val, entry);
-}
-
-static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
-{
-	bool has_sre;
-
-	if (!has_cpuid_feature(entry))
-		return false;
-
-	has_sre = gic_enable_sre();
-	if (!has_sre)
-		pr_warn_once("%s present but disabled by higher exception level\n",
-			     entry->desc);
-
-	return has_sre;
-}
-
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
-{
-	u32 midr = read_cpuid_id();
-	u32 rv_min, rv_max;
-
-	/* Cavium ThunderX pass 1.x and 2.x */
-	rv_min = 0;
-	rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
-
-	return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
-}
-
-static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
-{
-	return is_kernel_in_hyp_mode();
-}
-
-static const struct arm64_cpu_capabilities arm64_features[] = {
-	{
-		.desc = "GIC system register CPU interface",
-		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
-		.matches = has_useable_gicv3_cpuif,
-		.sys_reg = SYS_ID_AA64PFR0_EL1,
-		.field_pos = ID_AA64PFR0_GIC_SHIFT,
-		.sign = FTR_UNSIGNED,
-		.min_field_value = 1,
-	},
-#ifdef CONFIG_ARM64_PAN
-	{
-		.desc = "Privileged Access Never",
-		.capability = ARM64_HAS_PAN,
-		.matches = has_cpuid_feature,
-		.sys_reg = SYS_ID_AA64MMFR1_EL1,
-		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
-		.sign = FTR_UNSIGNED,
-		.min_field_value = 1,
-		.enable = cpu_enable_pan,
-	},
-#endif /* CONFIG_ARM64_PAN */
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
-	{
-		.desc = "LSE atomic instructions",
-		.capability = ARM64_HAS_LSE_ATOMICS,
-		.matches = has_cpuid_feature,
-		.sys_reg = SYS_ID_AA64ISAR0_EL1,
-		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
-		.sign = FTR_UNSIGNED,
-		.min_field_value = 2,
-	},
-#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
-	{
-		.desc = "Software prefetching using PRFM",
-		.capability = ARM64_HAS_NO_HW_PREFETCH,
-		.matches = has_no_hw_prefetch,
-	},
-#ifdef CONFIG_ARM64_UAO
-	{
-		.desc = "User Access Override",
-		.capability = ARM64_HAS_UAO,
-		.matches = has_cpuid_feature,
-		.sys_reg = SYS_ID_AA64MMFR2_EL1,
-		.field_pos = ID_AA64MMFR2_UAO_SHIFT,
-		.min_field_value = 1,
-		.enable = cpu_enable_uao,
-	},
-#endif /* CONFIG_ARM64_UAO */
-#ifdef CONFIG_ARM64_PAN
-	{
-		.capability = ARM64_ALT_PAN_NOT_UAO,
-		.matches = cpufeature_pan_not_uao,
-	},
-#endif /* CONFIG_ARM64_PAN */
-	{
-		.desc = "Virtualization Host Extensions",
-		.capability = ARM64_HAS_VIRT_HOST_EXTN,
-		.matches = runs_at_el2,
-	},
-	{},
-};
-
-#define HWCAP_CAP(reg, field, s, min_value, type, cap)	\
-	{							\
-		.desc = #cap,					\
-		.matches = has_cpuid_feature,			\
-		.sys_reg = reg,					\
-		.field_pos = field,				\
-		.sign = s,					\
-		.min_field_value = min_value,			\
-		.hwcap_type = type,				\
-		.hwcap = cap,					\
-	}
-
-static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
-	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
-#ifdef CONFIG_COMPAT
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
-	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
-#endif
-	{},
-};
-
-static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
-{
-	switch (cap->hwcap_type) {
-	case CAP_HWCAP:
-		elf_hwcap |= cap->hwcap;
-		break;
-#ifdef CONFIG_COMPAT
-	case CAP_COMPAT_HWCAP:
-		compat_elf_hwcap |= (u32)cap->hwcap;
-		break;
-	case CAP_COMPAT_HWCAP2:
-		compat_elf_hwcap2 |= (u32)cap->hwcap;
-		break;
-#endif
-	default:
-		WARN_ON(1);
-		break;
-	}
-}
-
-/* Check if we have a particular HWCAP enabled */
-static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
-{
-	bool rc;
-
-	switch (cap->hwcap_type) {
-	case CAP_HWCAP:
-		rc = (elf_hwcap & cap->hwcap) != 0;
-		break;
-#ifdef CONFIG_COMPAT
-	case CAP_COMPAT_HWCAP:
-		rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
-		break;
-	case CAP_COMPAT_HWCAP2:
-		rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
-		break;
-#endif
-	default:
-		WARN_ON(1);
-		rc = false;
-	}
-
-	return rc;
-}
-
-static void __init setup_cpu_hwcaps(void)
-{
-	int i;
-	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
-
-	for (i = 0; hwcaps[i].matches; i++)
-		if (hwcaps[i].matches(&hwcaps[i]))
-			cap_set_hwcap(&hwcaps[i]);
-}
-
-void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
-			    const char *info)
-{
-	int i;
-
-	for (i = 0; caps[i].matches; i++) {
-		if (!caps[i].matches(&caps[i]))
-			continue;
-
-		if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
-			pr_info("%s %s\n", info, caps[i].desc);
-		cpus_set_cap(caps[i].capability);
-	}
-}
-
-/*
- * Run through the enabled capabilities and enable() it on all active
- * CPUs
- */
-static void __init
-enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
-{
-	int i;
-
-	for (i = 0; caps[i].matches; i++)
-		if (caps[i].enable && cpus_have_cap(caps[i].capability))
-			on_each_cpu(caps[i].enable, NULL, true);
-}
-
-/*
- * Flag to indicate if we have computed the system wide
- * capabilities based on the boot time active CPUs. This
- * will be used to determine if a new booting CPU should
- * go through the verification process to make sure that it
- * supports the system capabilities, without using a hotplug
- * notifier.
- */
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
-{
-	sys_caps_initialised = true;
-}
-
 /*
  * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ * Read the system register on the current CPU
  */
 static u64 __raw_read_system_reg(u32 sys_id)
 {
@@ -902,15 +670,314 @@
 	}
 }
 
+#include <linux/irqchip/arm-gic-v3.h>
+
+static bool
+feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
+{
+	int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
+
+	return val >= entry->min_field_value;
+}
+
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	u64 val;
+
+	WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
+	if (scope == SCOPE_SYSTEM)
+		val = read_system_reg(entry->sys_reg);
+	else
+		val = __raw_read_system_reg(entry->sys_reg);
+
+	return feature_matches(val, entry);
+}
+
+static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	bool has_sre;
+
+	if (!has_cpuid_feature(entry, scope))
+		return false;
+
+	has_sre = gic_enable_sre();
+	if (!has_sre)
+		pr_warn_once("%s present but disabled by higher exception level\n",
+			     entry->desc);
+
+	return has_sre;
+}
+
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+	u32 midr = read_cpuid_id();
+	u32 rv_min, rv_max;
+
+	/* Cavium ThunderX pass 1.x and 2.x */
+	rv_min = 0;
+	rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+	return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+	return is_kernel_in_hyp_mode();
+}
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+	{
+		.desc = "GIC system register CPU interface",
+		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_useable_gicv3_cpuif,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 1,
+	},
+#ifdef CONFIG_ARM64_PAN
+	{
+		.desc = "Privileged Access Never",
+		.capability = ARM64_HAS_PAN,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 1,
+		.enable = cpu_enable_pan,
+	},
+#endif /* CONFIG_ARM64_PAN */
+#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+	{
+		.desc = "LSE atomic instructions",
+		.capability = ARM64_HAS_LSE_ATOMICS,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR0_EL1,
+		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 2,
+	},
+#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+	{
+		.desc = "Software prefetching using PRFM",
+		.capability = ARM64_HAS_NO_HW_PREFETCH,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_no_hw_prefetch,
+	},
+#ifdef CONFIG_ARM64_UAO
+	{
+		.desc = "User Access Override",
+		.capability = ARM64_HAS_UAO,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR2_EL1,
+		.field_pos = ID_AA64MMFR2_UAO_SHIFT,
+		.min_field_value = 1,
+		.enable = cpu_enable_uao,
+	},
+#endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+	{
+		.capability = ARM64_ALT_PAN_NOT_UAO,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = cpufeature_pan_not_uao,
+	},
+#endif /* CONFIG_ARM64_PAN */
+	{
+		.desc = "Virtualization Host Extensions",
+		.capability = ARM64_HAS_VIRT_HOST_EXTN,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = runs_at_el2,
+	},
+	{
+		.desc = "32-bit EL0 Support",
+		.capability = ARM64_HAS_32BIT_EL0,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64PFR0_EL0_SHIFT,
+		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+	},
+	{},
+};
+
+#define HWCAP_CAP(reg, field, s, min_value, type, cap)	\
+	{							\
+		.desc = #cap,					\
+		.def_scope = SCOPE_SYSTEM,			\
+		.matches = has_cpuid_feature,			\
+		.sys_reg = reg,					\
+		.field_pos = field,				\
+		.sign = s,					\
+		.min_field_value = min_value,			\
+		.hwcap_type = type,				\
+		.hwcap = cap,					\
+	}
+
+static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+	{},
+};
+
+static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
+#ifdef CONFIG_COMPAT
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+#endif
+	{},
+};
+
+static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+	switch (cap->hwcap_type) {
+	case CAP_HWCAP:
+		elf_hwcap |= cap->hwcap;
+		break;
+#ifdef CONFIG_COMPAT
+	case CAP_COMPAT_HWCAP:
+		compat_elf_hwcap |= (u32)cap->hwcap;
+		break;
+	case CAP_COMPAT_HWCAP2:
+		compat_elf_hwcap2 |= (u32)cap->hwcap;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+/* Check if we have a particular HWCAP enabled */
+static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+	bool rc;
+
+	switch (cap->hwcap_type) {
+	case CAP_HWCAP:
+		rc = (elf_hwcap & cap->hwcap) != 0;
+		break;
+#ifdef CONFIG_COMPAT
+	case CAP_COMPAT_HWCAP:
+		rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
+		break;
+	case CAP_COMPAT_HWCAP2:
+		rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		rc = false;
+	}
+
+	return rc;
+}
+
+static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
+{
+	for (; hwcaps->matches; hwcaps++)
+		if (hwcaps->matches(hwcaps, hwcaps->def_scope))
+			cap_set_elf_hwcap(hwcaps);
+}
+
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info)
+{
+	for (; caps->matches; caps++) {
+		if (!caps->matches(caps, caps->def_scope))
+			continue;
+
+		if (!cpus_have_cap(caps->capability) && caps->desc)
+			pr_info("%s %s\n", info, caps->desc);
+		cpus_set_cap(caps->capability);
+	}
+}
+
+/*
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs
+ */
+static void __init
+enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+{
+	for (; caps->matches; caps++)
+		if (caps->enable && cpus_have_cap(caps->capability))
+			on_each_cpu(caps->enable, NULL, true);
+}
+
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+	sys_caps_initialised = true;
+}
+
 /*
  * Check for CPU features that are used in early boot
  * based on the Boot CPU value.
  */
 static void check_early_cpu_features(void)
 {
+	verify_cpu_run_el();
 	verify_cpu_asid_bits();
 }
 
+static void
+verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+{
+
+	for (; caps->matches; caps++)
+		if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) {
+			pr_crit("CPU%d: missing HWCAP: %s\n",
+					smp_processor_id(), caps->desc);
+			cpu_die_early();
+		}
+}
+
+static void
+verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
+{
+	for (; caps->matches; caps++) {
+		if (!cpus_have_cap(caps->capability))
+			continue;
+		/*
+		 * If the new CPU misses an advertised feature, we cannot proceed
+		 * further, park the cpu.
+		 */
+		if (!caps->matches(caps, SCOPE_LOCAL_CPU)) {
+			pr_crit("CPU%d: missing feature: %s\n",
+					smp_processor_id(), caps->desc);
+			cpu_die_early();
+		}
+		if (caps->enable)
+			caps->enable(NULL);
+	}
+}
+
 /*
  * Run through the enabled system capabilities and enable() it on this CPU.
  * The capabilities were decided based on the available CPUs at the boot time.
@@ -921,8 +988,6 @@
  */
 void verify_local_cpu_capabilities(void)
 {
-	int i;
-	const struct arm64_cpu_capabilities *caps;
 
 	check_early_cpu_features();
 
@@ -933,32 +998,11 @@
 	if (!sys_caps_initialised)
 		return;
 
-	caps = arm64_features;
-	for (i = 0; caps[i].matches; i++) {
-		if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
-			continue;
-		/*
-		 * If the new CPU misses an advertised feature, we cannot proceed
-		 * further, park the cpu.
-		 */
-		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
-			pr_crit("CPU%d: missing feature: %s\n",
-					smp_processor_id(), caps[i].desc);
-			cpu_die_early();
-		}
-		if (caps[i].enable)
-			caps[i].enable(NULL);
-	}
-
-	for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
-		if (!cpus_have_hwcap(&caps[i]))
-			continue;
-		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
-			pr_crit("CPU%d: missing HWCAP: %s\n",
-					smp_processor_id(), caps[i].desc);
-			cpu_die_early();
-		}
-	}
+	verify_local_cpu_errata();
+	verify_local_cpu_features(arm64_features);
+	verify_local_elf_hwcaps(arm64_elf_hwcaps);
+	if (system_supports_32bit_el0())
+		verify_local_elf_hwcaps(compat_elf_hwcaps);
 }
 
 static void __init setup_feature_capabilities(void)
@@ -967,6 +1011,24 @@
 	enable_cpu_capabilities(arm64_features);
 }
 
+/*
+ * Check if the current CPU has a given feature capability.
+ * Should be called from non-preemptible context.
+ */
+bool this_cpu_has_cap(unsigned int cap)
+{
+	const struct arm64_cpu_capabilities *caps;
+
+	if (WARN_ON(preemptible()))
+		return false;
+
+	for (caps = arm64_features; caps->desc; caps++)
+		if (caps->capability == cap && caps->matches)
+			return caps->matches(caps, SCOPE_LOCAL_CPU);
+
+	return false;
+}
+
 void __init setup_cpu_features(void)
 {
 	u32 cwg;
@@ -974,7 +1036,10 @@
 
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
-	setup_cpu_hwcaps();
+	setup_elf_hwcaps(arm64_elf_hwcaps);
+
+	if (system_supports_32bit_el0())
+		setup_elf_hwcaps(compat_elf_hwcaps);
 
 	/* Advertise that we have computed the system capabilities */
 	set_sys_caps_initialised();
@@ -993,7 +1058,7 @@
 }
 
 static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
 {
 	return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
 }

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 9047cab6..e11857f 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c

@@ -19,7 +19,8 @@
 {
 	int ret = -EOPNOTSUPP;
 
-	if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
+	if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend &&
+			cpu_ops[cpu]->cpu_init_idle)
 		ret = cpu_ops[cpu]->cpu_init_idle(cpu);
 
 	return ret;
@@ -36,11 +37,5 @@
 {
 	int cpu = smp_processor_id();
 
-	/*
-	 * If cpu_ops have not been registered or suspend
-	 * has not been initialized, cpu_suspend call fails early.
-	 */
-	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
-		return -EOPNOTSUPP;
 	return cpu_ops[cpu]->cpu_suspend(index);
 }

diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 84c8684..3808470 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c

@@ -87,7 +87,8 @@
 	"idivt",
 	"vfpd32",
 	"lpae",
-	"evtstrm"
+	"evtstrm",
+	NULL
 };
 
 static const char *const compat_hwcap2_str[] = {
@@ -216,23 +217,26 @@
 	info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
 	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
 
-	info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
-	info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
-	info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
-	info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
-	info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
-	info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
-	info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
-	info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
-	info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
-	info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
-	info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
-	info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
-	info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+	/* Update the 32bit ID registers only if AArch32 is implemented */
+	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+		info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+		info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+		info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+		info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+		info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+		info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+		info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+		info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+		info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+		info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+		info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+		info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+		info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
 
-	info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
-	info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
-	info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+		info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+		info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+		info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+	}
 
 	cpuinfo_detect_icache_policy(info);
 

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c45f296..4fbf3c5 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c

@@ -135,9 +135,8 @@
 static int os_lock_notify(struct notifier_block *self,
 				    unsigned long action, void *data)
 {
-	int cpu = (unsigned long)data;
 	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
-		smp_call_function_single(cpu, clear_os_lock, NULL, 1);
+		clear_os_lock(NULL);
 	return NOTIFY_OK;
 }
 

diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index cae3112..e88c064 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S

@@ -62,7 +62,7 @@
 	 */
 	mov	x20, x0		// DTB address
 	ldr	x0, [sp, #16]	// relocated _text address
-	movz	x21, #:abs_g0:stext_offset
+	ldr	w21, =stext_offset
 	add	x21, x0, x21
 
 	/*

diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index b6abc85..78f5248 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c

@@ -17,22 +17,51 @@
 
 #include <asm/efi.h>
 
+/*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set. Also take the new (optional) RO/XP bits into account.
+ */
+static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
+{
+	u64 attr = md->attribute;
+	u32 type = md->type;
+
+	if (type == EFI_MEMORY_MAPPED_IO)
+		return PROT_DEVICE_nGnRE;
+
+	if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr),
+		      "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?"))
+		/*
+		 * If the region is not aligned to the page size of the OS, we
+		 * can not use strict permissions, since that would also affect
+		 * the mapping attributes of the adjacent regions.
+		 */
+		return pgprot_val(PAGE_KERNEL_EXEC);
+
+	/* R-- */
+	if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+	    (EFI_MEMORY_XP | EFI_MEMORY_RO))
+		return pgprot_val(PAGE_KERNEL_RO);
+
+	/* R-X */
+	if (attr & EFI_MEMORY_RO)
+		return pgprot_val(PAGE_KERNEL_ROX);
+
+	/* RW- */
+	if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE)
+		return pgprot_val(PAGE_KERNEL);
+
+	/* RWX */
+	return pgprot_val(PAGE_KERNEL_EXEC);
+}
+
+/* we will fill this structure from the stub, so don't put it in .bss */
+struct screen_info screen_info __section(.data);
+
 int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
 {
-	pteval_t prot_val;
-
-	/*
-	 * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
-	 * executable, everything else can be mapped with the XN bits
-	 * set.
-	 */
-	if ((md->attribute & EFI_MEMORY_WB) == 0)
-		prot_val = PROT_DEVICE_nGnRE;
-	else if (md->type == EFI_RUNTIME_SERVICES_CODE ||
-		 !PAGE_ALIGNED(md->phys_addr))
-		prot_val = pgprot_val(PAGE_KERNEL_EXEC);
-	else
-		prot_val = pgprot_val(PAGE_KERNEL);
+	pteval_t prot_val = create_mapping_protection(md);
 
 	create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
 			   md->num_pages << EFI_PAGE_SHIFT,

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 85da0f5..2c6e598 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S

@@ -25,6 +25,7 @@
 #include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
+#include <asm/boot.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
@@ -51,9 +52,6 @@
 #error TEXT_OFFSET must be less than 2MB
 #endif
 
-#define KERNEL_START	_text
-#define KERNEL_END	_end
-
 /*
  * Kernel startup entry point.
  * ---------------------------
@@ -102,8 +100,6 @@
 #endif
 
 #ifdef CONFIG_EFI
-	.globl	__efistub_stext_offset
-	.set	__efistub_stext_offset, stext - _head
 	.align 3
 pe_header:
 	.ascii	"PE"
@@ -123,11 +119,11 @@
 	.short	0x20b				// PE32+ format
 	.byte	0x02				// MajorLinkerVersion
 	.byte	0x14				// MinorLinkerVersion
-	.long	_end - stext			// SizeOfCode
+	.long	_end - efi_header_end		// SizeOfCode
 	.long	0				// SizeOfInitializedData
 	.long	0				// SizeOfUninitializedData
 	.long	__efistub_entry - _head		// AddressOfEntryPoint
-	.long	__efistub_stext_offset		// BaseOfCode
+	.long	efi_header_end - _head		// BaseOfCode
 
 extra_header_fields:
 	.quad	0				// ImageBase
@@ -144,7 +140,7 @@
 	.long	_end - _head			// SizeOfImage
 
 	// Everything before the kernel image is considered part of the header
-	.long	__efistub_stext_offset		// SizeOfHeaders
+	.long	efi_header_end - _head		// SizeOfHeaders
 	.long	0				// CheckSum
 	.short	0xa				// Subsystem (EFI application)
 	.short	0				// DllCharacteristics
@@ -188,10 +184,10 @@
 	.byte	0
 	.byte	0
 	.byte	0        		// end of 0 padding of section name
-	.long	_end - stext		// VirtualSize
-	.long	__efistub_stext_offset	// VirtualAddress
-	.long	_edata - stext		// SizeOfRawData
-	.long	__efistub_stext_offset	// PointerToRawData
+	.long	_end - efi_header_end	// VirtualSize
+	.long	efi_header_end - _head	// VirtualAddress
+	.long	_edata - efi_header_end	// SizeOfRawData
+	.long	efi_header_end - _head	// PointerToRawData
 
 	.long	0		// PointerToRelocations (0 for executables)
 	.long	0		// PointerToLineNumbers (0 for executables)
@@ -200,20 +196,23 @@
 	.long	0xe0500020	// Characteristics (section flags)
 
 	/*
-	 * EFI will load stext onwards at the 4k section alignment
+	 * EFI will load .text onwards at the 4k section alignment
 	 * described in the PE/COFF header. To ensure that instruction
 	 * sequences using an adrp and a :lo12: immediate will function
-	 * correctly at this alignment, we must ensure that stext is
+	 * correctly at this alignment, we must ensure that .text is
 	 * placed at a 4k boundary in the Image to begin with.
 	 */
 	.align 12
+efi_header_end:
 #endif
 
+	__INIT
+
 ENTRY(stext)
 	bl	preserve_boot_args
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
-	mov	x23, xzr			// KASLR offset, defaults to 0
 	adrp	x24, __PHYS_OFFSET
+	and	x23, x24, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
 	bl	set_cpu_boot_mode_flag
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
 	/*
@@ -222,13 +221,11 @@
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
-	ldr	x27, 0f				// address to jump to after
+	bl	__cpu_setup			// initialise processor
+	adr_l	x27, __primary_switch		// address to jump to after
 						// MMU has been enabled
-	adr_l	lr, __enable_mmu		// return (PIC) address
-	b	__cpu_setup			// initialise processor
+	b	__enable_mmu
 ENDPROC(stext)
-	.align	3
-0:	.quad	__mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
 
 /*
  * Preserve the arguments passed by the bootloader in x0 .. x3
@@ -338,7 +335,7 @@
 	cmp	x0, x6
 	b.lo	1b
 
-	ldr	x7, =SWAPPER_MM_MMUFLAGS
+	mov	x7, SWAPPER_MM_MMUFLAGS
 
 	/*
 	 * Create the identity mapping.
@@ -394,12 +391,13 @@
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
 	mov	x0, x26				// swapper_pg_dir
-	ldr	x5, =KIMAGE_VADDR
+	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
 	create_pgd_entry x0, x5, x3, x6
-	ldr	w6, kernel_img_size
-	add	x6, x6, x5
-	mov	x3, x24				// phys offset
+	adrp	x6, _end			// runtime __pa(_end)
+	adrp	x3, _text			// runtime __pa(_text)
+	sub	x6, x6, x3			// _end - _text
+	add	x6, x6, x5			// runtime __va(_end)
 	create_block_map x0, x7, x3, x5, x6
 
 	/*
@@ -414,16 +412,13 @@
 
 	ret	x28
 ENDPROC(__create_page_tables)
-
-kernel_img_size:
-	.long	_end - (_head - TEXT_OFFSET)
 	.ltorg
 
 /*
  * The following fragment of code is executed with the MMU enabled.
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
-__mmap_switched:
+__primary_switched:
 	mov	x28, lr				// preserve LR
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
@@ -437,44 +432,6 @@
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
-#ifdef CONFIG_RELOCATABLE
-
-	/*
-	 * Iterate over each entry in the relocation table, and apply the
-	 * relocations in place.
-	 */
-	adr_l	x8, __dynsym_start		// start of symbol table
-	adr_l	x9, __reloc_start		// start of reloc table
-	adr_l	x10, __reloc_end		// end of reloc table
-
-0:	cmp	x9, x10
-	b.hs	2f
-	ldp	x11, x12, [x9], #24
-	ldr	x13, [x9, #-8]
-	cmp	w12, #R_AARCH64_RELATIVE
-	b.ne	1f
-	add	x13, x13, x23			// relocate
-	str	x13, [x11, x23]
-	b	0b
-
-1:	cmp	w12, #R_AARCH64_ABS64
-	b.ne	0b
-	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
-	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
-	ldrsh	w14, [x12, #6]			// Elf64_Sym::st_shndx
-	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
-	cmp	w14, #-0xf			// SHN_ABS (0xfff1) ?
-	add	x14, x15, x23			// relocate
-	csel	x15, x14, x15, ne
-	add	x15, x13, x15
-	str	x15, [x11, x23]
-	b	0b
-
-2:	adr_l	x8, kimage_vaddr		// make relocated kimage_vaddr
-	dc	cvac, x8			// value visible to secondaries
-	dsb	sy				// with MMU off
-#endif
-
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
 	and	x4, x4, #~(THREAD_SIZE - 1)
@@ -490,17 +447,19 @@
 	bl	kasan_early_init
 #endif
 #ifdef CONFIG_RANDOMIZE_BASE
-	cbnz	x23, 0f				// already running randomized?
+	tst	x23, ~(MIN_KIMG_ALIGN - 1)	// already running randomized?
+	b.ne	0f
 	mov	x0, x21				// pass FDT address in x0
+	mov	x1, x23				// pass modulo offset in x1
 	bl	kaslr_early_init		// parse FDT for KASLR options
 	cbz	x0, 0f				// KASLR disabled? just proceed
-	mov	x23, x0				// record KASLR offset
+	orr	x23, x23, x0			// record KASLR offset
 	ret	x28				// we must enable KASLR, return
 						// to __enable_mmu()
 0:
 #endif
 	b	start_kernel
-ENDPROC(__mmap_switched)
+ENDPROC(__primary_switched)
 
 /*
  * end early head section, begin head code that is also used for
@@ -650,7 +609,7 @@
  * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
  * in x20. See arch/arm64/include/asm/virt.h for more info.
  */
-ENTRY(set_cpu_boot_mode_flag)
+set_cpu_boot_mode_flag:
 	adr_l	x1, __boot_cpu_mode
 	cmp	w20, #BOOT_CPU_MODE_EL2
 	b.ne	1f
@@ -683,7 +642,7 @@
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
 	bl	set_cpu_boot_mode_flag
 	mrs	x0, mpidr_el1
-	ldr     x1, =MPIDR_HWID_BITMASK
+	mov_q	x1, MPIDR_HWID_BITMASK
 	and	x0, x0, x1
 	adr_l	x3, secondary_holding_pen_release
 pen:	ldr	x4, [x3]
@@ -703,7 +662,7 @@
 	b	secondary_startup
 ENDPROC(secondary_entry)
 
-ENTRY(secondary_startup)
+secondary_startup:
 	/*
 	 * Common entry point for secondary CPUs.
 	 */
@@ -711,14 +670,11 @@
 	adrp	x26, swapper_pg_dir
 	bl	__cpu_setup			// initialise processor
 
-	ldr	x8, kimage_vaddr
-	ldr	w9, 0f
-	sub	x27, x8, w9, sxtw		// address to jump to after enabling the MMU
+	adr_l	x27, __secondary_switch		// address to jump to after enabling the MMU
 	b	__enable_mmu
 ENDPROC(secondary_startup)
-0:	.long	(_text - TEXT_OFFSET) - __secondary_switched
 
-ENTRY(__secondary_switched)
+__secondary_switched:
 	adr_l	x5, vectors
 	msr	vbar_el1, x5
 	isb
@@ -768,7 +724,7 @@
  * If it isn't, park the CPU
  */
 	.section	".idmap.text", "ax"
-__enable_mmu:
+ENTRY(__enable_mmu)
 	mrs	x22, sctlr_el1			// preserve old SCTLR_EL1 value
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
@@ -806,7 +762,6 @@
 	ic	iallu				// flush instructions fetched
 	dsb	nsh				// via old mapping
 	isb
-	add	x27, x27, x23			// relocated __mmap_switched
 #endif
 	br	x27
 ENDPROC(__enable_mmu)
@@ -819,3 +774,53 @@
 	wfi
 	b 1b
 ENDPROC(__no_granule_support)
+
+__primary_switch:
+#ifdef CONFIG_RELOCATABLE
+	/*
+	 * Iterate over each entry in the relocation table, and apply the
+	 * relocations in place.
+	 */
+	ldr	w8, =__dynsym_offset		// offset to symbol table
+	ldr	w9, =__rela_offset		// offset to reloc table
+	ldr	w10, =__rela_size		// size of reloc table
+
+	mov_q	x11, KIMAGE_VADDR		// default virtual offset
+	add	x11, x11, x23			// actual virtual offset
+	add	x8, x8, x11			// __va(.dynsym)
+	add	x9, x9, x11			// __va(.rela)
+	add	x10, x9, x10			// __va(.rela) + sizeof(.rela)
+
+0:	cmp	x9, x10
+	b.hs	2f
+	ldp	x11, x12, [x9], #24
+	ldr	x13, [x9, #-8]
+	cmp	w12, #R_AARCH64_RELATIVE
+	b.ne	1f
+	add	x13, x13, x23			// relocate
+	str	x13, [x11, x23]
+	b	0b
+
+1:	cmp	w12, #R_AARCH64_ABS64
+	b.ne	0b
+	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
+	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
+	ldrsh	w14, [x12, #6]			// Elf64_Sym::st_shndx
+	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
+	cmp	w14, #-0xf			// SHN_ABS (0xfff1) ?
+	add	x14, x15, x23			// relocate
+	csel	x15, x14, x15, ne
+	add	x15, x13, x15
+	str	x15, [x11, x23]
+	b	0b
+
+2:
+#endif
+	ldr	x8, =__primary_switched
+	br	x8
+ENDPROC(__primary_switch)
+
+__secondary_switch:
+	ldr	x8, =__secondary_switched
+	br	x8
+ENDPROC(__secondary_switch)

diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
new file mode 100644
index 0000000..46f29b6
--- /dev/null
+++ b/arch/arm64/kernel/hibernate-asm.S

@@ -0,0 +1,176 @@
+/*
+ * Hibernate low-level support
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ * Author:	James Morse <james.morse@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/virt.h>
+
+/*
+ * To prevent the possibility of old and new partial table walks being visible
+ * in the tlb, switch the ttbr to a zero page when we invalidate the old
+ * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
+ * Even switching to our copied tables will cause a changed output address at
+ * each stage of the walk.
+ */
+.macro break_before_make_ttbr_switch zero_page, page_table
+	msr	ttbr1_el1, \zero_page
+	isb
+	tlbi	vmalle1is
+	dsb	ish
+	msr	ttbr1_el1, \page_table
+	isb
+.endm
+
+
+/*
+ * Resume from hibernate
+ *
+ * Loads temporary page tables then restores the memory image.
+ * Finally branches to cpu_resume() to restore the state saved by
+ * swsusp_arch_suspend().
+ *
+ * Because this code has to be copied to a 'safe' page, it can't call out to
+ * other functions by PC-relative address. Also remember that it may be
+ * mid-way through over-writing other functions. For this reason it contains
+ * code from flush_icache_range() and uses the copy_page() macro.
+ *
+ * This 'safe' page is mapped via ttbr0, and executed from there. This function
+ * switches to a copy of the linear map in ttbr1, performs the restore, then
+ * switches ttbr1 to the original kernel's swapper_pg_dir.
+ *
+ * All of memory gets written to, including code. We need to clean the kernel
+ * text to the Point of Coherence (PoC) before secondary cores can be booted.
+ * Because the kernel modules and executable pages mapped to user space are
+ * also written as data, we clean all pages we touch to the Point of
+ * Unification (PoU).
+ *
+ * x0: physical address of temporary page tables
+ * x1: physical address of swapper page tables
+ * x2: address of cpu_resume
+ * x3: linear map address of restore_pblist in the current kernel
+ * x4: physical address of __hyp_stub_vectors, or 0
+ * x5: physical address of a  zero page that remains zero after resume
+ */
+.pushsection    ".hibernate_exit.text", "ax"
+ENTRY(swsusp_arch_suspend_exit)
+	/*
+	 * We execute from ttbr0, change ttbr1 to our copied linear map tables
+	 * with a break-before-make via the zero page
+	 */
+	break_before_make_ttbr_switch	x5, x0
+
+	mov	x21, x1
+	mov	x30, x2
+	mov	x24, x4
+	mov	x25, x5
+
+	/* walk the restore_pblist and use copy_page() to over-write memory */
+	mov	x19, x3
+
+1:	ldr	x10, [x19, #HIBERN_PBE_ORIG]
+	mov	x0, x10
+	ldr	x1, [x19, #HIBERN_PBE_ADDR]
+
+	copy_page	x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
+
+	add	x1, x10, #PAGE_SIZE
+	/* Clean the copied page to PoU - based on flush_icache_range() */
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x4, x10, x3
+2:	dc	cvau, x4	/* clean D line / unified line */
+	add	x4, x4, x2
+	cmp	x4, x1
+	b.lo	2b
+
+	ldr	x19, [x19, #HIBERN_PBE_NEXT]
+	cbnz	x19, 1b
+	dsb	ish		/* wait for PoU cleaning to finish */
+
+	/* switch to the restored kernels page tables */
+	break_before_make_ttbr_switch	x25, x21
+
+	ic	ialluis
+	dsb	ish
+	isb
+
+	cbz	x24, 3f		/* Do we need to re-initialise EL2? */
+	hvc	#0
+3:	ret
+
+	.ltorg
+ENDPROC(swsusp_arch_suspend_exit)
+
+/*
+ * Restore the hyp stub.
+ * This must be done before the hibernate page is unmapped by _cpu_resume(),
+ * but happens before any of the hyp-stub's code is cleaned to PoC.
+ *
+ * x24: The physical address of __hyp_stub_vectors
+ */
+el1_sync:
+	msr	vbar_el2, x24
+	eret
+ENDPROC(el1_sync)
+
+.macro invalid_vector	label
+\label:
+	b \label
+ENDPROC(\label)
+.endm
+
+	invalid_vector	el2_sync_invalid
+	invalid_vector	el2_irq_invalid
+	invalid_vector	el2_fiq_invalid
+	invalid_vector	el2_error_invalid
+	invalid_vector	el1_sync_invalid
+	invalid_vector	el1_irq_invalid
+	invalid_vector	el1_fiq_invalid
+	invalid_vector	el1_error_invalid
+
+/* el2 vectors - switch el2 here while we restore the memory image. */
+	.align 11
+ENTRY(hibernate_el2_vectors)
+	ventry	el2_sync_invalid		// Synchronous EL2t
+	ventry	el2_irq_invalid			// IRQ EL2t
+	ventry	el2_fiq_invalid			// FIQ EL2t
+	ventry	el2_error_invalid		// Error EL2t
+
+	ventry	el2_sync_invalid		// Synchronous EL2h
+	ventry	el2_irq_invalid			// IRQ EL2h
+	ventry	el2_fiq_invalid			// FIQ EL2h
+	ventry	el2_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq_invalid			// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync_invalid		// Synchronous 32-bit EL1
+	ventry	el1_irq_invalid			// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+END(hibernate_el2_vectors)
+
+.popsection

diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
new file mode 100644
index 0000000..f8df75d
--- /dev/null
+++ b/arch/arm64/kernel/hibernate.c

@@ -0,0 +1,487 @@
+/*:
+ * Hibernate support specific for ARM64
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ *  https://lkml.org/lkml/2010/6/18/4
+ *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ *  https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(x) "hibernate: " x
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/pm.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/irqflags.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+#include <asm/virt.h>
+
+/*
+ * Hibernate core relies on this value being 0 on resume, and marks it
+ * __nosavedata assuming it will keep the resume kernel's '0' value. This
+ * doesn't happen with either KASLR.
+ *
+ * defined as "__visible int in_suspend __nosavedata" in
+ * kernel/power/hibernate.c
+ */
+extern int in_suspend;
+
+/* Find a symbols alias in the linear map */
+#define LMADDR(x)	phys_to_virt(virt_to_phys(x))
+
+/* Do we need to reset el2? */
+#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
+
+/*
+ * Start/end of the hibernate exit code, this must be copied to a 'safe'
+ * location in memory, and executed from there.
+ */
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+
+/* temporary el2 vectors in the __hibernate_exit_text section. */
+extern char hibernate_el2_vectors[];
+
+/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
+extern char __hyp_stub_vectors[];
+
+/*
+ * Values that may not change over hibernate/resume. We put the build number
+ * and date in here so that we guarantee not to resume with a different
+ * kernel.
+ */
+struct arch_hibernate_hdr_invariants {
+	char		uts_version[__NEW_UTS_LEN + 1];
+};
+
+/* These values need to be know across a hibernate/restore. */
+static struct arch_hibernate_hdr {
+	struct arch_hibernate_hdr_invariants invariants;
+
+	/* These are needed to find the relocated kernel if built with kaslr */
+	phys_addr_t	ttbr1_el1;
+	void		(*reenter_kernel)(void);
+
+	/*
+	 * We need to know where the __hyp_stub_vectors are after restore to
+	 * re-configure el2.
+	 */
+	phys_addr_t	__hyp_stub_vectors;
+} resume_hdr;
+
+static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
+{
+	memset(i, 0, sizeof(*i));
+	memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+	unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+	return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+}
+
+void notrace restore_processor_state(void)
+{
+}
+
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct arch_hibernate_hdr *hdr = addr;
+
+	if (max_size < sizeof(*hdr))
+		return -EOVERFLOW;
+
+	arch_hdr_invariants(&hdr->invariants);
+	hdr->ttbr1_el1		= virt_to_phys(swapper_pg_dir);
+	hdr->reenter_kernel	= _cpu_resume;
+
+	/* We can't use __hyp_get_vectors() because kvm may still be loaded */
+	if (el2_reset_needed())
+		hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
+	else
+		hdr->__hyp_stub_vectors = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_save);
+
+int arch_hibernation_header_restore(void *addr)
+{
+	struct arch_hibernate_hdr_invariants invariants;
+	struct arch_hibernate_hdr *hdr = addr;
+
+	arch_hdr_invariants(&invariants);
+	if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
+		pr_crit("Hibernate image not generated by this kernel!\n");
+		return -EINVAL;
+	}
+
+	resume_hdr = *hdr;
+
+	return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_restore);
+
+/*
+ * Copies length bytes, starting at src_start into an new page,
+ * perform cache maintentance, then maps it at the specified address low
+ * address as executable.
+ *
+ * This is used by hibernate to copy the code it needs to execute when
+ * overwriting the kernel text. This function generates a new set of page
+ * tables, which it loads into ttbr0.
+ *
+ * Length is provided as we probably only want 4K of data, even on a 64K
+ * page system.
+ */
+static int create_safe_exec_page(void *src_start, size_t length,
+				 unsigned long dst_addr,
+				 phys_addr_t *phys_dst_addr,
+				 void *(*allocator)(gfp_t mask),
+				 gfp_t mask)
+{
+	int rc = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long dst = (unsigned long)allocator(mask);
+
+	if (!dst) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	memcpy((void *)dst, src_start, length);
+	flush_icache_range(dst, dst + length);
+
+	pgd = pgd_offset_raw(allocator(mask), dst_addr);
+	if (pgd_none(*pgd)) {
+		pud = allocator(mask);
+		if (!pud) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		pgd_populate(&init_mm, pgd, pud);
+	}
+
+	pud = pud_offset(pgd, dst_addr);
+	if (pud_none(*pud)) {
+		pmd = allocator(mask);
+		if (!pmd) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		pud_populate(&init_mm, pud, pmd);
+	}
+
+	pmd = pmd_offset(pud, dst_addr);
+	if (pmd_none(*pmd)) {
+		pte = allocator(mask);
+		if (!pte) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		pmd_populate_kernel(&init_mm, pmd, pte);
+	}
+
+	pte = pte_offset_kernel(pmd, dst_addr);
+	set_pte(pte, __pte(virt_to_phys((void *)dst) |
+			 pgprot_val(PAGE_KERNEL_EXEC)));
+
+	/* Load our new page tables */
+	asm volatile("msr	ttbr0_el1, %0;"
+		     "isb;"
+		     "tlbi	vmalle1is;"
+		     "dsb	ish;"
+		     "isb" : : "r"(virt_to_phys(pgd)));
+
+	*phys_dst_addr = virt_to_phys((void *)dst);
+
+out:
+	return rc;
+}
+
+
+int swsusp_arch_suspend(void)
+{
+	int ret = 0;
+	unsigned long flags;
+	struct sleep_stack_data state;
+
+	local_dbg_save(flags);
+
+	if (__cpu_suspend_enter(&state)) {
+		ret = swsusp_save();
+	} else {
+		/* Clean kernel to PoC for secondary core startup */
+		__flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+
+		/*
+		 * Tell the hibernation core that we've just restored
+		 * the memory
+		 */
+		in_suspend = 0;
+
+		__cpu_suspend_exit();
+	}
+
+	local_dbg_restore(flags);
+
+	return ret;
+}
+
+static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+		    unsigned long end)
+{
+	pte_t *src_pte;
+	pte_t *dst_pte;
+	unsigned long addr = start;
+
+	dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
+	if (!dst_pte)
+		return -ENOMEM;
+	pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
+	dst_pte = pte_offset_kernel(dst_pmd, start);
+
+	src_pte = pte_offset_kernel(src_pmd, start);
+	do {
+		if (!pte_none(*src_pte))
+			/*
+			 * Resume will overwrite areas that may be marked
+			 * read only (code, rodata). Clear the RDONLY bit from
+			 * the temporary mappings we use during restore.
+			 */
+			set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
+	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+	return 0;
+}
+
+static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+		    unsigned long end)
+{
+	pmd_t *src_pmd;
+	pmd_t *dst_pmd;
+	unsigned long next;
+	unsigned long addr = start;
+
+	if (pud_none(*dst_pud)) {
+		dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+		if (!dst_pmd)
+			return -ENOMEM;
+		pud_populate(&init_mm, dst_pud, dst_pmd);
+	}
+	dst_pmd = pmd_offset(dst_pud, start);
+
+	src_pmd = pmd_offset(src_pud, start);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*src_pmd))
+			continue;
+		if (pmd_table(*src_pmd)) {
+			if (copy_pte(dst_pmd, src_pmd, addr, next))
+				return -ENOMEM;
+		} else {
+			set_pmd(dst_pmd,
+				__pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+		}
+	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
+
+	return 0;
+}
+
+static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+		    unsigned long end)
+{
+	pud_t *dst_pud;
+	pud_t *src_pud;
+	unsigned long next;
+	unsigned long addr = start;
+
+	if (pgd_none(*dst_pgd)) {
+		dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+		if (!dst_pud)
+			return -ENOMEM;
+		pgd_populate(&init_mm, dst_pgd, dst_pud);
+	}
+	dst_pud = pud_offset(dst_pgd, start);
+
+	src_pud = pud_offset(src_pgd, start);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none(*src_pud))
+			continue;
+		if (pud_table(*(src_pud))) {
+			if (copy_pmd(dst_pud, src_pud, addr, next))
+				return -ENOMEM;
+		} else {
+			set_pud(dst_pud,
+				__pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+		}
+	} while (dst_pud++, src_pud++, addr = next, addr != end);
+
+	return 0;
+}
+
+static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+			    unsigned long end)
+{
+	unsigned long next;
+	unsigned long addr = start;
+	pgd_t *src_pgd = pgd_offset_k(start);
+
+	dst_pgd = pgd_offset_raw(dst_pgd, start);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(*src_pgd))
+			continue;
+		if (copy_pud(dst_pgd, src_pgd, addr, next))
+			return -ENOMEM;
+	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+	return 0;
+}
+
+/*
+ * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
+ *
+ * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
+ * we don't need to free it here.
+ */
+int swsusp_arch_resume(void)
+{
+	int rc = 0;
+	void *zero_page;
+	size_t exit_size;
+	pgd_t *tmp_pg_dir;
+	void *lm_restore_pblist;
+	phys_addr_t phys_hibernate_exit;
+	void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
+					  void *, phys_addr_t, phys_addr_t);
+
+	/*
+	 * Locate the exit code in the bottom-but-one page, so that *NULL
+	 * still has disastrous affects.
+	 */
+	hibernate_exit = (void *)PAGE_SIZE;
+	exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
+	/*
+	 * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
+	 * a new set of ttbr0 page tables and load them.
+	 */
+	rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
+				   (unsigned long)hibernate_exit,
+				   &phys_hibernate_exit,
+				   (void *)get_safe_page, GFP_ATOMIC);
+	if (rc) {
+		pr_err("Failed to create safe executable page for hibernate_exit code.");
+		goto out;
+	}
+
+	/*
+	 * The hibernate exit text contains a set of el2 vectors, that will
+	 * be executed at el2 with the mmu off in order to reload hyp-stub.
+	 */
+	__flush_dcache_area(hibernate_exit, exit_size);
+
+	/*
+	 * Restoring the memory image will overwrite the ttbr1 page tables.
+	 * Create a second copy of just the linear map, and use this when
+	 * restoring.
+	 */
+	tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+	if (!tmp_pg_dir) {
+		pr_err("Failed to allocate memory for temporary page tables.");
+		rc = -ENOMEM;
+		goto out;
+	}
+	rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+	if (rc)
+		goto out;
+
+	/*
+	 * Since we only copied the linear map, we need to find restore_pblist's
+	 * linear map address.
+	 */
+	lm_restore_pblist = LMADDR(restore_pblist);
+
+	/*
+	 * KASLR will cause the el2 vectors to be in a different location in
+	 * the resumed kernel. Load hibernate's temporary copy into el2.
+	 *
+	 * We can skip this step if we booted at EL1, or are running with VHE.
+	 */
+	if (el2_reset_needed()) {
+		phys_addr_t el2_vectors = phys_hibernate_exit;  /* base */
+		el2_vectors += hibernate_el2_vectors -
+			       __hibernate_exit_text_start;     /* offset */
+
+		__hyp_set_vectors(el2_vectors);
+	}
+
+	/*
+	 * We need a zero page that is zero before & after resume in order to
+	 * to break before make on the ttbr1 page tables.
+	 */
+	zero_page = (void *)get_safe_page(GFP_ATOMIC);
+
+	hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
+		       resume_hdr.reenter_kernel, lm_restore_pblist,
+		       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
+
+out:
+	return rc;
+}
+
+static int check_boot_cpu_online_pm_callback(struct notifier_block *nb,
+					     unsigned long action, void *ptr)
+{
+	if (action == PM_HIBERNATION_PREPARE &&
+	     cpumask_first(cpu_online_mask) != 0) {
+		pr_warn("CPU0 is offline.\n");
+		return notifier_from_errno(-ENODEV);
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init check_boot_cpu_online_init(void)
+{
+	/*
+	 * Set this pm_notifier callback with a lower priority than
+	 * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be
+	 * called earlier to disable cpu hotplug before the cpu online check.
+	 */
+	pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX);
+
+	return 0;
+}
+core_initcall(check_boot_cpu_online_init);

diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..ce21aa8 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c

@@ -616,7 +616,7 @@
 		perf_bp_event(bp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (!bp->overflow_handler)
+		if (is_default_overflow_handler(bp))
 			step = 1;
 unlock:
 		rcu_read_unlock();
@@ -712,7 +712,7 @@
 		perf_bp_event(wp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (!wp->overflow_handler)
+		if (is_default_overflow_handler(wp))
 			step = 1;
 
 unlock:
@@ -886,9 +886,11 @@
 						unsigned long action,
 						void *hcpu)
 {
-	int cpu = (long)hcpu;
-	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
-		smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) {
+		local_irq_disable();
+		hw_breakpoint_reset(NULL);
+		local_irq_enable();
+	}
 	return NOTIFY_OK;
 }
 

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..8727f44 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S

@@ -22,6 +22,8 @@
 #include <linux/irqchip/arm-gic-v3.h>
 
 #include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
@@ -53,15 +55,26 @@
 	.align 11
 
 el1_sync:
-	mrs	x1, esr_el2
-	lsr	x1, x1, #26
-	cmp	x1, #0x16
-	b.ne	2f				// Not an HVC trap
-	cbz	x0, 1f
-	msr	vbar_el2, x0			// Set vbar_el2
-	b	2f
-1:	mrs	x0, vbar_el2			// Return vbar_el2
-2:	eret
+	mrs	x30, esr_el2
+	lsr	x30, x30, #ESR_ELx_EC_SHIFT
+
+	cmp	x30, #ESR_ELx_EC_HVC64
+	b.ne	9f				// Not an HVC trap
+
+	cmp	x0, #HVC_GET_VECTORS
+	b.ne	1f
+	mrs	x0, vbar_el2
+	b	9f
+
+1:	cmp	x0, #HVC_SET_VECTORS
+	b.ne	2f
+	msr	vbar_el2, x1
+	b	9f
+
+	/* Someone called kvm_call_hyp() against the hyp-stub... */
+2:	mov     x0, #ARM_EXCEPTION_HYP_GONE
+
+9:	eret
 ENDPROC(el1_sync)
 
 .macro invalid_vector	label
@@ -101,10 +114,18 @@
  */
 
 ENTRY(__hyp_get_vectors)
-	mov	x0, xzr
-	// fall through
-ENTRY(__hyp_set_vectors)
+	str	lr, [sp, #-16]!
+	mov	x0, #HVC_GET_VECTORS
 	hvc	#0
+	ldr	lr, [sp], #16
 	ret
 ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+	str	lr, [sp, #-16]!
+	mov	x1, x0
+	mov	x0, #HVC_SET_VECTORS
+	hvc	#0
+	ldr	lr, [sp], #16
+	ret
 ENDPROC(__hyp_set_vectors)

diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 5e360ce..c7fcb23 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h

@@ -73,6 +73,8 @@
 
 #ifdef CONFIG_EFI
 
+__efistub_stext_offset = stext - _text;
+
 /*
  * Prevent the symbol aliases below from being emitted into the kallsyms
  * table, by forcing them to be absolute symbols (which are conveniently
@@ -112,6 +114,7 @@
 __efistub__text			= KALLSYMS_HIDE(_text);
 __efistub__end			= KALLSYMS_HIDE(_end);
 __efistub__edata		= KALLSYMS_HIDE(_edata);
+__efistub_screen_info		= KALLSYMS_HIDE(screen_info);
 
 #endif
 

diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7371455..368c082 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c

@@ -96,7 +96,7 @@
 	if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
 		page = vmalloc_to_page(addr);
 	else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
-		page = virt_to_page(addr);
+		page = pfn_to_page(PHYS_PFN(__pa(addr)));
 	else
 		return addr;
 

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 5829839..b054691 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c

@@ -74,7 +74,7 @@
  * containing function pointers) to be reinitialized, and zero-initialized
  * .bss variables will be reset to 0.
  */
-u64 __init kaslr_early_init(u64 dt_phys)
+u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
 {
 	void *fdt;
 	u64 seed, offset, mask, module_range;
@@ -132,8 +132,8 @@
 	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
 	 * happens, increase the KASLR offset by the size of the kernel image.
 	 */
-	if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
-	    (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+	if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
+	    (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT))
 		offset = (offset + (u64)(_end - _text)) & mask;
 
 	if (IS_ENABLED(CONFIG_KASAN))

diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index c72de66..3c4e308 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c

@@ -74,6 +74,16 @@
 	return -ENXIO;
 }
 
+#ifdef CONFIG_NUMA
+
+int pcibus_to_node(struct pci_bus *bus)
+{
+	return dev_to_node(&bus->dev);
+}
+EXPORT_SYMBOL(pcibus_to_node);
+
+#endif
+
 #ifdef CONFIG_ACPI
 /* Root bridge scanning */
 struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)

diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index ff46654..32c3c6e 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c

@@ -122,7 +122,7 @@
 
 		tail = (struct frame_tail __user *)regs->regs[29];
 
-		while (entry->nr < PERF_MAX_STACK_DEPTH &&
+		while (entry->nr < sysctl_perf_event_max_stack &&
 		       tail && !((unsigned long)tail & 0xf))
 			tail = user_backtrace(tail, entry);
 	} else {
@@ -132,7 +132,7 @@
 
 		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
 
-		while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+		while ((entry->nr < sysctl_perf_event_max_stack) &&
 			tail && !((unsigned long)tail & 0x3))
 			tail = compat_user_backtrace(tail, entry);
 #endif

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f419a7c..838ccf1 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c

@@ -21,6 +21,7 @@
 
 #include <asm/irq_regs.h>
 #include <asm/perf_event.h>
+#include <asm/sysreg.h>
 #include <asm/virt.h>
 
 #include <linux/of.h>
@@ -33,43 +34,43 @@
  */
 
 /* Required events. */
-#define ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR			0x00
-#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL			0x03
-#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS			0x04
-#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED			0x10
-#define ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES			0x11
-#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED			0x12
+#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
 
 /* At least one of the following is required. */
-#define ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED			0x08
-#define ARMV8_PMUV3_PERFCTR_OP_SPEC				0x1B
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x08
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x1B
 
 /* Common architectural events. */
-#define ARMV8_PMUV3_PERFCTR_MEM_READ				0x06
-#define ARMV8_PMUV3_PERFCTR_MEM_WRITE				0x07
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x06
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x07
 #define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_EXECUTED			0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE				0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE				0x0C
-#define ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH			0x0D
-#define ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN			0x0E
-#define ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS		0x0F
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE				0x1C
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x0A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x0B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x0C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x0D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x0E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x0F
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x1C
 #define ARMV8_PMUV3_PERFCTR_CHAIN				0x1E
 #define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x21
 
 /* Common microarchitectural events. */
-#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL			0x01
-#define ARMV8_PMUV3_PERFCTR_ITLB_REFILL				0x02
-#define ARMV8_PMUV3_PERFCTR_DTLB_REFILL				0x05
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x01
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x02
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x05
 #define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x13
-#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS			0x14
-#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB			0x15
-#define ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS			0x16
-#define ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL			0x17
-#define ARMV8_PMUV3_PERFCTR_L2_CACHE_WB				0x18
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x14
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x15
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x16
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x17
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x18
 #define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x19
-#define ARMV8_PMUV3_PERFCTR_MEM_ERROR				0x1A
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x1A
 #define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x1D
 #define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x1F
 #define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x20
@@ -85,89 +86,182 @@
 #define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x2B
 #define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x2C
 #define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x2D
-#define ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL			0x2E
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x2E
 #define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x2F
-#define ARMV8_PMUV3_PERFCTR_L21_TLB				0x30
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x30
 
-/* ARMv8 implementation defined event types. */
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD		0x40
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST		0x41
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD		0x42
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST		0x43
-#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD			0x4C
-#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST			0x4D
-#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD			0x4E
-#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST			0x4F
+/* ARMv8 recommended implementation defined event types */
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x40
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x41
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x42
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x43
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x44
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x45
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x46
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x47
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x48
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x4C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x4D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x4E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x4F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x50
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x51
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x52
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x53
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x56
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x57
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x58
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x5C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x5D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x5E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x5F
+
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x60
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x61
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x62
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x63
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x64
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x65
+
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x66
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x67
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x68
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x69
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x6A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x6C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x6D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x6E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x6F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x70
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x71
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x72
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x73
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x74
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x75
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x76
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x77
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x78
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x79
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x7A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x7C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x7D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x7E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x81
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x82
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x83
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x84
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x86
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x87
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x88
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x8A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x8B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x8C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x8D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x8E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x8F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x90
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x91
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0xA0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0xA1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0xA2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0xA3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0xA6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0xA7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0xA8
 
 /* ARMv8 Cortex-A53 specific event types. */
-#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL			0xC2
+#define ARMV8_A53_PERFCTR_PREF_LINEFILL				0xC2
 
 /* ARMv8 Cavium ThunderX specific event types. */
-#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST			0xE9
-#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS		0xEA
-#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS		0xEB
-#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS		0xEC
-#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS		0xED
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST			0xE9
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS		0xEA
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS		0xEB
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS		0xEC
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS		0xED
 
 /* PMUv3 HW events mapping. */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
 /* ARM Cortex-A53 HW events mapping. */
 static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
 };
 
 /* ARM Cortex-A57 and Cortex-A72 events mapping. */
 static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
 };
 
 static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
-	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE,
-	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
 };
 
+/* Broadcom Vulcan events mapping */
+static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_BR_RETIRED,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
+};
+
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						[PERF_COUNT_HW_CACHE_OP_MAX]
 						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
 
-	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
 static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -175,21 +269,21 @@
 					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
 
-	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
-	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
 
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
 static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -197,23 +291,23 @@
 					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
 
-	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
-	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST,
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
 
-	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
-	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
 static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -221,67 +315,108 @@
 						   [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
 	PERF_CACHE_MAP_ALL_UNSUPPORTED,
 
-	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
-	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
-	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST,
-	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS,
-	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS,
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
 
-	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
-	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
-	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS,
-	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS,
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+	[C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
+	[C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
 
-	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD,
-	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
-	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST,
-	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
 
-	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
 
-	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
-	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };
 
+static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+
+	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static ssize_t
+armv8pmu_events_sysfs_show(struct device *dev,
+			   struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
+}
+
 #define ARMV8_EVENT_ATTR_RESOLVE(m) #m
 #define ARMV8_EVENT_ATTR(name, config) \
-	PMU_EVENT_ATTR_STRING(name, armv8_event_attr_##name, \
-			      "event=" ARMV8_EVENT_ATTR_RESOLVE(config))
+	PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
+		       config, armv8pmu_events_sysfs_show)
 
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_ITLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_DTLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_MEM_READ);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_MEM_WRITE);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED);
+ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
+ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
+ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
+ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
+ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
+ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
+ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
+ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
+ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
 ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_EXECUTED);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED);
+ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
+ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
+ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
+ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
+ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
+ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
+ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
+ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
+ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
 ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2_CACHE_WB);
+ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
+ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
+ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
+ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
+ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
 ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEM_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE);
+ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
+ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
+ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
 ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-ARMV8_EVENT_ATTR(chain, ARMV8_PMUV3_PERFCTR_CHAIN);
+/* Don't expose the chain event in /sys, since it's useless in isolation */
 ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
 ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
 ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
@@ -297,9 +432,9 @@
 ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
 ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
 ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l21_tlb_refill, ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL);
+ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
 ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l21_tlb, ARMV8_PMUV3_PERFCTR_L21_TLB);
+ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
 
 static struct attribute *armv8_pmuv3_event_attrs[] = {
 	&armv8_event_attr_sw_incr.attr.attr,
@@ -332,7 +467,6 @@
 	&armv8_event_attr_inst_spec.attr.attr,
 	&armv8_event_attr_ttbr_write_retired.attr.attr,
 	&armv8_event_attr_bus_cycles.attr.attr,
-	&armv8_event_attr_chain.attr.attr,
 	&armv8_event_attr_l1d_cache_allocate.attr.attr,
 	&armv8_event_attr_l2d_cache_allocate.attr.attr,
 	&armv8_event_attr_br_retired.attr.attr,
@@ -348,15 +482,33 @@
 	&armv8_event_attr_l3d_cache.attr.attr,
 	&armv8_event_attr_l3d_cache_wb.attr.attr,
 	&armv8_event_attr_l2d_tlb_refill.attr.attr,
-	&armv8_event_attr_l21_tlb_refill.attr.attr,
+	&armv8_event_attr_l2i_tlb_refill.attr.attr,
 	&armv8_event_attr_l2d_tlb.attr.attr,
-	&armv8_event_attr_l21_tlb.attr.attr,
+	&armv8_event_attr_l2i_tlb.attr.attr,
 	NULL,
 };
 
+static umode_t
+armv8pmu_event_attr_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+	if (test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
+		return attr->mode;
+
+	return 0;
+}
+
 static struct attribute_group armv8_pmuv3_events_attr_group = {
 	.name = "events",
 	.attrs = armv8_pmuv3_event_attrs,
+	.is_visible = armv8pmu_event_attr_is_visible,
 };
 
 PMU_FORMAT_ATTR(event, "config:0-9");
@@ -397,16 +549,14 @@
 
 static inline u32 armv8pmu_pmcr_read(void)
 {
-	u32 val;
-	asm volatile("mrs %0, pmcr_el0" : "=r" (val));
-	return val;
+	return read_sysreg(pmcr_el0);
 }
 
 static inline void armv8pmu_pmcr_write(u32 val)
 {
 	val &= ARMV8_PMU_PMCR_MASK;
 	isb();
-	asm volatile("msr pmcr_el0, %0" :: "r" (val));
+	write_sysreg(val, pmcr_el0);
 }
 
 static inline int armv8pmu_has_overflowed(u32 pmovsr)
@@ -428,7 +578,7 @@
 static inline int armv8pmu_select_counter(int idx)
 {
 	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-	asm volatile("msr pmselr_el0, %0" :: "r" (counter));
+	write_sysreg(counter, pmselr_el0);
 	isb();
 
 	return idx;
@@ -445,9 +595,9 @@
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
-		asm volatile("mrs %0, pmccntr_el0" : "=r" (value));
+		value = read_sysreg(pmccntr_el0);
 	else if (armv8pmu_select_counter(idx) == idx)
-		asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value));
+		value = read_sysreg(pmxevcntr_el0);
 
 	return value;
 }
@@ -469,47 +619,47 @@
 		 */
 		u64 value64 = 0xffffffff00000000ULL | value;
 
-		asm volatile("msr pmccntr_el0, %0" :: "r" (value64));
+		write_sysreg(value64, pmccntr_el0);
 	} else if (armv8pmu_select_counter(idx) == idx)
-		asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
+		write_sysreg(value, pmxevcntr_el0);
 }
 
 static inline void armv8pmu_write_evtype(int idx, u32 val)
 {
 	if (armv8pmu_select_counter(idx) == idx) {
 		val &= ARMV8_PMU_EVTYPE_MASK;
-		asm volatile("msr pmxevtyper_el0, %0" :: "r" (val));
+		write_sysreg(val, pmxevtyper_el0);
 	}
 }
 
 static inline int armv8pmu_enable_counter(int idx)
 {
 	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-	asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
+	write_sysreg(BIT(counter), pmcntenset_el0);
 	return idx;
 }
 
 static inline int armv8pmu_disable_counter(int idx)
 {
 	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-	asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
+	write_sysreg(BIT(counter), pmcntenclr_el0);
 	return idx;
 }
 
 static inline int armv8pmu_enable_intens(int idx)
 {
 	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-	asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
+	write_sysreg(BIT(counter), pmintenset_el1);
 	return idx;
 }
 
 static inline int armv8pmu_disable_intens(int idx)
 {
 	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-	asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
+	write_sysreg(BIT(counter), pmintenclr_el1);
 	isb();
 	/* Clear the overflow flag in case an interrupt is pending. */
-	asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
+	write_sysreg(BIT(counter), pmovsclr_el0);
 	isb();
 
 	return idx;
@@ -520,11 +670,11 @@
 	u32 value;
 
 	/* Read */
-	asm volatile("mrs %0, pmovsclr_el0" : "=r" (value));
+	value = read_sysreg(pmovsclr_el0);
 
 	/* Write to clear flags */
 	value &= ARMV8_PMU_OVSR_MASK;
-	asm volatile("msr pmovsclr_el0, %0" :: "r" (value));
+	write_sysreg(value, pmovsclr_el0);
 
 	return value;
 }
@@ -685,7 +835,7 @@
 	unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
-	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
+	if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
 		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
 			return -EAGAIN;
 
@@ -781,22 +931,38 @@
 				ARMV8_PMU_EVTYPE_EVENT);
 }
 
-static void armv8pmu_read_num_pmnc_events(void *info)
+static int armv8_vulcan_map_event(struct perf_event *event)
 {
-	int *nb_cnt = info;
-
-	/* Read the nb of CNTx counters supported from PMNC */
-	*nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
-
-	/* Add the CPU cycles counter */
-	*nb_cnt += 1;
+	return armpmu_map_event(event, &armv8_vulcan_perf_map,
+				&armv8_vulcan_perf_cache_map,
+				ARMV8_PMU_EVTYPE_EVENT);
 }
 
-static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
+static void __armv8pmu_probe_pmu(void *info)
 {
-	return smp_call_function_any(&arm_pmu->supported_cpus,
-				    armv8pmu_read_num_pmnc_events,
-				    &arm_pmu->num_events, 1);
+	struct arm_pmu *cpu_pmu = info;
+	u32 pmceid[2];
+
+	/* Read the nb of CNTx counters supported from PMNC */
+	cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
+		& ARMV8_PMU_PMCR_N_MASK;
+
+	/* Add the CPU cycles counter */
+	cpu_pmu->num_events += 1;
+
+	pmceid[0] = read_sysreg(pmceid0_el0);
+	pmceid[1] = read_sysreg(pmceid1_el0);
+
+	bitmap_from_u32array(cpu_pmu->pmceid_bitmap,
+			     ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid,
+			     ARRAY_SIZE(pmceid));
+}
+
+static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
+{
+	return smp_call_function_any(&cpu_pmu->supported_cpus,
+				    __armv8pmu_probe_pmu,
+				    cpu_pmu, 1);
 }
 
 static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
@@ -819,7 +985,8 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_pmuv3";
 	cpu_pmu->map_event		= armv8_pmuv3_map_event;
-	return armv8pmu_probe_num_events(cpu_pmu);
+	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
 static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
@@ -828,7 +995,7 @@
 	cpu_pmu->name			= "armv8_cortex_a53";
 	cpu_pmu->map_event		= armv8_a53_map_event;
 	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
-	return armv8pmu_probe_num_events(cpu_pmu);
+	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
 static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
@@ -837,7 +1004,7 @@
 	cpu_pmu->name			= "armv8_cortex_a57";
 	cpu_pmu->map_event		= armv8_a57_map_event;
 	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
-	return armv8pmu_probe_num_events(cpu_pmu);
+	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
 static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
@@ -846,7 +1013,7 @@
 	cpu_pmu->name			= "armv8_cortex_a72";
 	cpu_pmu->map_event		= armv8_a57_map_event;
 	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
-	return armv8pmu_probe_num_events(cpu_pmu);
+	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
 static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
@@ -855,7 +1022,16 @@
 	cpu_pmu->name			= "armv8_cavium_thunder";
 	cpu_pmu->map_event		= armv8_thunder_map_event;
 	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
-	return armv8pmu_probe_num_events(cpu_pmu);
+	return armv8pmu_probe_pmu(cpu_pmu);
+}
+
+static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv8_pmu_init(cpu_pmu);
+	cpu_pmu->name			= "armv8_brcm_vulcan";
+	cpu_pmu->map_event		= armv8_vulcan_map_event;
+	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
@@ -864,6 +1040,7 @@
 	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
 	{.compatible = "arm,cortex-a72-pmu",	.data = armv8_a72_pmu_init},
 	{.compatible = "cavium,thunder-pmu",	.data = armv8_thunder_pmu_init},
+	{.compatible = "brcm,vulcan-pmu",	.data = armv8_vulcan_pmu_init},
 	{},
 };
 

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 8062482..48eea68 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c

@@ -265,9 +265,6 @@
 		if (stack_start) {
 			if (is_compat_thread(task_thread_info(p)))
 				childregs->compat_sp = stack_start;
-			/* 16-byte aligned stack mandatory on AArch64 */
-			else if (stack_start & 15)
-				return -EINVAL;
 			else
 				childregs->sp = stack_start;
 		}
@@ -382,13 +379,14 @@
 	return sp & ~0xf;
 }
 
-static unsigned long randomize_base(unsigned long base)
-{
-	unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1;
-	return randomize_range(base, range_end, 0) ? : base;
-}
-
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-	return randomize_base(mm->brk);
+	unsigned long range_end = mm->brk;
+
+	if (is_compat_task())
+		range_end += 0x02000000;
+	else
+		range_end += 0x40000000;
+
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 }

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 9dc6776..3279def 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c

@@ -53,6 +53,7 @@
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/kasan.h>
+#include <asm/numa.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -175,7 +176,6 @@
 	 */
 	if (mpidr_hash_size() > 4 * num_possible_cpus())
 		pr_warn("Large number of MPIDR hash buckets detected\n");
-	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
 }
 
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -224,69 +224,6 @@
 	}
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-/*
- * Relocate initrd if it is not completely within the linear mapping.
- * This would be the case if mem= cuts out all or part of it.
- */
-static void __init relocate_initrd(void)
-{
-	phys_addr_t orig_start = __virt_to_phys(initrd_start);
-	phys_addr_t orig_end = __virt_to_phys(initrd_end);
-	phys_addr_t ram_end = memblock_end_of_DRAM();
-	phys_addr_t new_start;
-	unsigned long size, to_free = 0;
-	void *dest;
-
-	if (orig_end <= ram_end)
-		return;
-
-	/*
-	 * Any of the original initrd which overlaps the linear map should
-	 * be freed after relocating.
-	 */
-	if (orig_start < ram_end)
-		to_free = ram_end - orig_start;
-
-	size = orig_end - orig_start;
-	if (!size)
-		return;
-
-	/* initrd needs to be relocated completely inside linear mapping */
-	new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn),
-					   size, PAGE_SIZE);
-	if (!new_start)
-		panic("Cannot relocate initrd of size %ld\n", size);
-	memblock_reserve(new_start, size);
-
-	initrd_start = __phys_to_virt(new_start);
-	initrd_end   = initrd_start + size;
-
-	pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n",
-		orig_start, orig_start + size - 1,
-		new_start, new_start + size - 1);
-
-	dest = (void *)initrd_start;
-
-	if (to_free) {
-		memcpy(dest, (void *)__phys_to_virt(orig_start), to_free);
-		dest += to_free;
-	}
-
-	copy_from_early_mem(dest, orig_start + to_free, size - to_free);
-
-	if (to_free) {
-		pr_info("Freeing original RAMDISK from [%llx-%llx]\n",
-			orig_start, orig_start + to_free - 1);
-		memblock_free(orig_start, to_free);
-	}
-}
-#else
-static inline void __init relocate_initrd(void)
-{
-}
-#endif
-
 u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)
@@ -327,7 +264,11 @@
 	acpi_boot_table_init();
 
 	paging_init();
-	relocate_initrd();
+
+	if (acpi_disabled)
+		unflatten_device_tree();
+
+	bootmem_init();
 
 	kasan_init();
 
@@ -335,12 +276,11 @@
 
 	early_ioremap_reset();
 
-	if (acpi_disabled) {
-		unflatten_device_tree();
+	if (acpi_disabled)
 		psci_dt_init();
-	} else {
+	else
 		psci_acpi_init();
-	}
+
 	xen_early_init();
 
 	cpu_read_bootcpu_ops();
@@ -379,6 +319,9 @@
 {
 	int i;
 
+	for_each_online_node(i)
+		register_one_node(i);
+
 	for_each_possible_cpu(i) {
 		struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
 		cpu->hotpluggable = 1;

diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index fd10eb6..9a3aec9 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S

@@ -49,39 +49,32 @@
 	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3)
 	.endm
 /*
- * Save CPU state for a suspend and execute the suspend finisher.
- * On success it will return 0 through cpu_resume - ie through a CPU
- * soft/hard reboot from the reset vector.
- * On failure it returns the suspend finisher return value or force
- * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
- * is not allowed to return, if it does this must be considered failure).
- * It saves callee registers, and allocates space on the kernel stack
- * to save the CPU specific registers + some other data for resume.
+ * Save CPU state in the provided sleep_stack_data area, and publish its
+ * location for cpu_resume()'s use in sleep_save_stash.
  *
- *  x0 = suspend finisher argument
- *  x1 = suspend finisher function pointer
+ * cpu_resume() will restore this saved state, and return. Because the
+ * link-register is saved and restored, it will appear to return from this
+ * function. So that the caller can tell the suspend/resume paths apart,
+ * __cpu_suspend_enter() will always return a non-zero value, whereas the
+ * path through cpu_resume() will return 0.
+ *
+ *  x0 = struct sleep_stack_data area
  */
 ENTRY(__cpu_suspend_enter)
-	stp	x29, lr, [sp, #-96]!
-	stp	x19, x20, [sp,#16]
-	stp	x21, x22, [sp,#32]
-	stp	x23, x24, [sp,#48]
-	stp	x25, x26, [sp,#64]
-	stp	x27, x28, [sp,#80]
-	/*
-	 * Stash suspend finisher and its argument in x20 and x19
-	 */
-	mov	x19, x0
-	mov	x20, x1
+	stp	x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
+	stp	x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
+	stp	x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
+	stp	x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
+	stp	x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
+	stp	x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
+
+	/* save the sp in cpu_suspend_ctx */
 	mov	x2, sp
-	sub	sp, sp, #CPU_SUSPEND_SZ	// allocate cpu_suspend_ctx
-	mov	x0, sp
-	/*
-	 * x0 now points to struct cpu_suspend_ctx allocated on the stack
-	 */
-	str	x2, [x0, #CPU_CTX_SP]
-	ldr	x1, =sleep_save_sp
-	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
+	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
+
+	/* find the mpidr_hash */
+	ldr	x1, =sleep_save_stash
+	ldr	x1, [x1]
 	mrs	x7, mpidr_el1
 	ldr	x9, =mpidr_hash
 	ldr	x10, [x9, #MPIDR_HASH_MASK]
@@ -93,74 +86,28 @@
 	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
 	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
 	add	x1, x1, x8, lsl #3
-	bl	__cpu_suspend_save
-	/*
-	 * Grab suspend finisher in x20 and its argument in x19
-	 */
-	mov	x0, x19
-	mov	x1, x20
-	/*
-	 * We are ready for power down, fire off the suspend finisher
-	 * in x1, with argument in x0
-	 */
-	blr	x1
-        /*
-	 * Never gets here, unless suspend finisher fails.
-	 * Successful cpu_suspend should return from cpu_resume, returning
-	 * through this code path is considered an error
-	 * If the return value is set to 0 force x0 = -EOPNOTSUPP
-	 * to make sure a proper error condition is propagated
-	 */
-	cmp	x0, #0
-	mov	x3, #-EOPNOTSUPP
-	csel	x0, x3, x0, eq
-	add	sp, sp, #CPU_SUSPEND_SZ	// rewind stack pointer
-	ldp	x19, x20, [sp, #16]
-	ldp	x21, x22, [sp, #32]
-	ldp	x23, x24, [sp, #48]
-	ldp	x25, x26, [sp, #64]
-	ldp	x27, x28, [sp, #80]
-	ldp	x29, lr, [sp], #96
+
+	str	x0, [x1]
+	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
+	stp	x29, lr, [sp, #-16]!
+	bl	cpu_do_suspend
+	ldp	x29, lr, [sp], #16
+	mov	x0, #1
 	ret
 ENDPROC(__cpu_suspend_enter)
 	.ltorg
 
-/*
- * x0 must contain the sctlr value retrieved from restored context
- */
-	.pushsection	".idmap.text", "ax"
-ENTRY(cpu_resume_mmu)
-	ldr	x3, =cpu_resume_after_mmu
-	msr	sctlr_el1, x0		// restore sctlr_el1
-	isb
-	/*
-	 * Invalidate the local I-cache so that any instructions fetched
-	 * speculatively from the PoC are discarded, since they may have
-	 * been dynamically patched at the PoU.
-	 */
-	ic	iallu
-	dsb	nsh
-	isb
-	br	x3			// global jump to virtual address
-ENDPROC(cpu_resume_mmu)
-	.popsection
-cpu_resume_after_mmu:
-#ifdef CONFIG_KASAN
-	mov	x0, sp
-	bl	kasan_unpoison_remaining_stack
-#endif
-	mov	x0, #0			// return zero on success
-	ldp	x19, x20, [sp, #16]
-	ldp	x21, x22, [sp, #32]
-	ldp	x23, x24, [sp, #48]
-	ldp	x25, x26, [sp, #64]
-	ldp	x27, x28, [sp, #80]
-	ldp	x29, lr, [sp], #96
-	ret
-ENDPROC(cpu_resume_after_mmu)
-
 ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
+	/* enable the MMU early - so we can access sleep_save_stash by va */
+	adr_l	lr, __enable_mmu	/* __cpu_setup will return here */
+	ldr	x27, =_cpu_resume	/* __enable_mmu will branch here */
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
+	b	__cpu_setup
+ENDPROC(cpu_resume)
+
+ENTRY(_cpu_resume)
 	mrs	x1, mpidr_el1
 	adrp	x8, mpidr_hash
 	add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
@@ -170,20 +117,32 @@
 	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
 	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
         /* x7 contains hash index, let's use it to grab context pointer */
-	ldr_l	x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
+	ldr_l	x0, sleep_save_stash
 	ldr	x0, [x0, x7, lsl #3]
+	add	x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
+	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
 	/* load sp from context */
 	ldr	x2, [x0, #CPU_CTX_SP]
-	/* load physical address of identity map page table in x1 */
-	adrp	x1, idmap_pg_dir
 	mov	sp, x2
 	/* save thread_info */
 	and	x2, x2, #~(THREAD_SIZE - 1)
 	msr	sp_el0, x2
 	/*
-	 * cpu_do_resume expects x0 to contain context physical address
-	 * pointer and x1 to contain physical address of 1:1 page tables
+	 * cpu_do_resume expects x0 to contain context address pointer
 	 */
-	bl	cpu_do_resume		// PC relative jump, MMU off
-	b	cpu_resume_mmu		// Resume MMU, never returns
-ENDPROC(cpu_resume)
+	bl	cpu_do_resume
+
+#ifdef CONFIG_KASAN
+	mov	x0, sp
+	bl	kasan_unpoison_remaining_stack
+#endif
+
+	ldp	x19, x20, [x29, #16]
+	ldp	x21, x22, [x29, #32]
+	ldp	x23, x24, [x29, #48]
+	ldp	x25, x26, [x29, #64]
+	ldp	x27, x28, [x29, #80]
+	ldp	x29, lr, [x29]
+	mov	x0, #0
+	ret
+ENDPROC(_cpu_resume)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b2d5f4e..678e084 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c

@@ -45,6 +45,7 @@
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
 #include <asm/mmu_context.h>
+#include <asm/numa.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/processor.h>
@@ -75,6 +76,43 @@
 	IPI_WAKEUP
 };
 
+#ifdef CONFIG_ARM64_VHE
+
+/* Whether the boot CPU is running in HYP mode or not*/
+static bool boot_cpu_hyp_mode;
+
+static inline void save_boot_cpu_run_el(void)
+{
+	boot_cpu_hyp_mode = is_kernel_in_hyp_mode();
+}
+
+static inline bool is_boot_cpu_in_hyp_mode(void)
+{
+	return boot_cpu_hyp_mode;
+}
+
+/*
+ * Verify that a secondary CPU is running the kernel at the same
+ * EL as that of the boot CPU.
+ */
+void verify_cpu_run_el(void)
+{
+	bool in_el2 = is_kernel_in_hyp_mode();
+	bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode();
+
+	if (in_el2 ^ boot_cpu_el2) {
+		pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n",
+					smp_processor_id(),
+					in_el2 ? 2 : 1,
+					boot_cpu_el2 ? 2 : 1);
+		cpu_panic_kernel();
+	}
+}
+
+#else
+static inline void save_boot_cpu_run_el(void) {}
+#endif
+
 #ifdef CONFIG_HOTPLUG_CPU
 static int op_cpu_kill(unsigned int cpu);
 #else
@@ -166,6 +204,7 @@
 static void smp_store_cpu_info(unsigned int cpuid)
 {
 	store_cpu_topology(cpuid);
+	numa_store_cpu_info(cpuid);
 }
 
 /*
@@ -225,8 +264,6 @@
 	pr_info("CPU%u: Booted secondary processor [%08x]\n",
 					 cpu, read_cpuid_id());
 	update_cpu_boot_status(CPU_BOOT_SUCCESS);
-	/* Make sure the status update is visible before we complete */
-	smp_wmb();
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
@@ -401,6 +438,7 @@
 void __init smp_prepare_boot_cpu(void)
 {
 	cpuinfo_store_boot_cpu();
+	save_boot_cpu_run_el();
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
@@ -595,6 +633,8 @@
 
 		pr_debug("cpu logical map 0x%llx\n", hwid);
 		cpu_logical_map(cpu_count) = hwid;
+
+		early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
 next:
 		cpu_count++;
 	}
@@ -647,33 +687,18 @@
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	int err;
-	unsigned int cpu, ncores = num_possible_cpus();
+	unsigned int cpu;
 
 	init_cpu_topology();
 
 	smp_store_cpu_info(smp_processor_id());
 
 	/*
-	 * are we trying to boot more cores than exist?
-	 */
-	if (max_cpus > ncores)
-		max_cpus = ncores;
-
-	/* Don't bother if we're effectively UP */
-	if (max_cpus <= 1)
-		return;
-
-	/*
 	 * Initialise the present map (which describes the set of CPUs
 	 * actually populated at the present time) and release the
 	 * secondaries from the bootloader.
-	 *
-	 * Make sure we online at most (max_cpus - 1) additional CPUs.
 	 */
-	max_cpus--;
 	for_each_possible_cpu(cpu) {
-		if (max_cpus == 0)
-			break;
 
 		if (cpu == smp_processor_id())
 			continue;
@@ -686,7 +711,6 @@
 			continue;
 
 		set_cpu_present(cpu, true);
-		max_cpus--;
 	}
 }
 
@@ -763,21 +787,11 @@
 }
 #endif
 
-static DEFINE_RAW_SPINLOCK(stop_lock);
-
 /*
  * ipi_cpu_stop - handle IPI from smp_send_stop()
  */
 static void ipi_cpu_stop(unsigned int cpu)
 {
-	if (system_state == SYSTEM_BOOTING ||
-	    system_state == SYSTEM_RUNNING) {
-		raw_spin_lock(&stop_lock);
-		pr_crit("CPU%u: stopping\n", cpu);
-		dump_stack();
-		raw_spin_unlock(&stop_lock);
-	}
-
 	set_cpu_online(cpu, false);
 
 	local_irq_disable();
@@ -872,6 +886,9 @@
 		cpumask_copy(&mask, cpu_online_mask);
 		cpumask_clear_cpu(smp_processor_id(), &mask);
 
+		if (system_state == SYSTEM_BOOTING ||
+		    system_state == SYSTEM_RUNNING)
+			pr_crit("SMP: stopping secondary CPUs\n");
 		smp_cross_call(&mask, IPI_CPU_STOP);
 	}
 
@@ -881,7 +898,8 @@
 		udelay(1);
 
 	if (num_online_cpus() > 1)
-		pr_warning("SMP: failed to stop secondary CPUs\n");
+		pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+			   cpumask_pr_args(cpu_online_mask));
 }
 
 /*

diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 6605539..b616e36 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c

@@ -10,30 +10,11 @@
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
-extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
 /*
- * This is called by __cpu_suspend_enter() to save the state, and do whatever
- * flushing is required to ensure that when the CPU goes to sleep we have
- * the necessary data available when the caches are not searched.
- *
- * ptr: CPU context virtual address
- * save_ptr: address of the location where the context physical address
- *           must be saved
+ * This is allocated by cpu_suspend_init(), and used to store a pointer to
+ * the 'struct sleep_stack_data' the contains a particular CPUs state.
  */
-void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
-				phys_addr_t *save_ptr)
-{
-	*save_ptr = virt_to_phys(ptr);
-
-	cpu_do_suspend(ptr);
-	/*
-	 * Only flush the context that must be retrieved with the MMU
-	 * off. VA primitives ensure the flush is applied to all
-	 * cache levels so context is pushed to DRAM.
-	 */
-	__flush_dcache_area(ptr, sizeof(*ptr));
-	__flush_dcache_area(save_ptr, sizeof(*save_ptr));
-}
+unsigned long *sleep_save_stash;
 
 /*
  * This hook is provided so that cpu_suspend code can restore HW
@@ -51,6 +32,30 @@
 	hw_breakpoint_restore = hw_bp_restore;
 }
 
+void notrace __cpu_suspend_exit(void)
+{
+	/*
+	 * We are resuming from reset with the idmap active in TTBR0_EL1.
+	 * We must uninstall the idmap and restore the expected MMU
+	 * state before we can possibly return to userspace.
+	 */
+	cpu_uninstall_idmap();
+
+	/*
+	 * Restore per-cpu offset before any kernel
+	 * subsystem relying on it has a chance to run.
+	 */
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+
+	/*
+	 * Restore HW breakpoint registers to sane values
+	 * before debug exceptions are possibly reenabled
+	 * through local_dbg_restore.
+	 */
+	if (hw_breakpoint_restore)
+		hw_breakpoint_restore(NULL);
+}
+
 /*
  * cpu_suspend
  *
@@ -60,8 +65,9 @@
  */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
-	int ret;
+	int ret = 0;
 	unsigned long flags;
+	struct sleep_stack_data state;
 
 	/*
 	 * From this point debug exceptions are disabled to prevent
@@ -77,34 +83,21 @@
 	 */
 	pause_graph_tracing();
 
-	/*
-	 * mm context saved on the stack, it will be restored when
-	 * the cpu comes out of reset through the identity mapped
-	 * page tables, so that the thread address space is properly
-	 * set-up on function return.
-	 */
-	ret = __cpu_suspend_enter(arg, fn);
-	if (ret == 0) {
-		/*
-		 * We are resuming from reset with the idmap active in TTBR0_EL1.
-		 * We must uninstall the idmap and restore the expected MMU
-		 * state before we can possibly return to userspace.
-		 */
-		cpu_uninstall_idmap();
+	if (__cpu_suspend_enter(&state)) {
+		/* Call the suspend finisher */
+		ret = fn(arg);
 
 		/*
-		 * Restore per-cpu offset before any kernel
-		 * subsystem relying on it has a chance to run.
+		 * Never gets here, unless the suspend finisher fails.
+		 * Successful cpu_suspend() should return from cpu_resume(),
+		 * returning through this code path is considered an error
+		 * If the return value is set to 0 force ret = -EOPNOTSUPP
+		 * to make sure a proper error condition is propagated
 		 */
-		set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
-		/*
-		 * Restore HW breakpoint registers to sane values
-		 * before debug exceptions are possibly reenabled
-		 * through local_dbg_restore.
-		 */
-		if (hw_breakpoint_restore)
-			hw_breakpoint_restore(NULL);
+		if (!ret)
+			ret = -EOPNOTSUPP;
+	} else {
+		__cpu_suspend_exit();
 	}
 
 	unpause_graph_tracing();
@@ -119,22 +112,15 @@
 	return ret;
 }
 
-struct sleep_save_sp sleep_save_sp;
-
 static int __init cpu_suspend_init(void)
 {
-	void *ctx_ptr;
-
 	/* ctx_ptr is an array of physical addresses */
-	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+	sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash),
+				   GFP_KERNEL);
 
-	if (WARN_ON(!ctx_ptr))
+	if (WARN_ON(!sleep_save_stash))
 		return -ENOMEM;
 
-	sleep_save_sp.save_ptr_stash = ctx_ptr;
-	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
-	__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
-
 	return 0;
 }
 early_initcall(cpu_suspend_init);

diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index 75151aa..26fe8ea 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c

@@ -25,6 +25,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/syscalls.h>
+#include <asm/cpufeature.h>
 
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			 unsigned long prot, unsigned long flags,
@@ -36,11 +37,20 @@
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 }
 
+SYSCALL_DEFINE1(arm64_personality, unsigned int, personality)
+{
+	if (personality(personality) == PER_LINUX32 &&
+		!system_supports_32bit_el0())
+		return -EINVAL;
+	return sys_personality(personality);
+}
+
 /*
  * Wrappers to pass the pt_regs argument.
  */
 asmlinkage long sys_rt_sigreturn_wrapper(void);
 #define sys_rt_sigreturn	sys_rt_sigreturn_wrapper
+#define sys_personality		sys_arm64_personality
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym)	[nr] = sym,

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 97bc68f..64fc030 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c

@@ -131,11 +131,11 @@
 		return -ENOMEM;
 
 	/* Grab the vDSO data page. */
-	vdso_pagelist[0] = virt_to_page(vdso_data);
+	vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data)));
 
 	/* Grab the vDSO code pages. */
 	for (i = 0; i < vdso_pages; i++)
-		vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+		vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
 
 	/* Populate the special mapping structures */
 	vdso_spec[0] = (struct vm_special_mapping) {

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5a1939a..435e820 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S

@@ -46,6 +46,16 @@
 	*(.idmap.text)					\
 	VMLINUX_SYMBOL(__idmap_text_end) = .;
 
+#ifdef CONFIG_HIBERNATION
+#define HIBERNATE_TEXT					\
+	. = ALIGN(SZ_4K);				\
+	VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+	*(.hibernate_exit.text)				\
+	VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+#else
+#define HIBERNATE_TEXT
+#endif
+
 /*
  * The size of the PE/COFF section that covers the kernel image, which
  * runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -63,14 +73,19 @@
 #endif
 
 #if defined(CONFIG_DEBUG_ALIGN_RODATA)
-#define ALIGN_DEBUG_RO			. = ALIGN(1<<SECTION_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
-#elif defined(CONFIG_DEBUG_RODATA)
-#define ALIGN_DEBUG_RO			. = ALIGN(1<<PAGE_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
+/*
+ *  4 KB granule:   1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule:  32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN			SZ_2M
 #else
-#define ALIGN_DEBUG_RO
-#define ALIGN_DEBUG_RO_MIN(min)		. = ALIGN(min);
+/*
+ *  4 KB granule:  16 level 3 entries, with contiguous bit
+ * 16 KB granule:   4 level 3 entries, without contiguous bit
+ * 64 KB granule:   1 level 3 entry
+ */
+#define SEGMENT_ALIGN			SZ_64K
 #endif
 
 SECTIONS
@@ -96,7 +111,6 @@
 		_text = .;
 		HEAD_TEXT
 	}
-	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -109,18 +123,19 @@
 			LOCK_TEXT
 			HYPERVISOR_TEXT
 			IDMAP_TEXT
+			HIBERNATE_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
 		*(.got)			/* Global offset table		*/
 	}
 
-	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+	. = ALIGN(SEGMENT_ALIGN);
 	RO_DATA(PAGE_SIZE)		/* everything from this point to */
 	EXCEPTION_TABLE(8)		/* _etext will be marked RO NX   */
 	NOTES
 
-	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+	. = ALIGN(SEGMENT_ALIGN);
 	_etext = .;			/* End of text and rodata section */
 	__init_begin = .;
 
@@ -154,12 +169,9 @@
 		*(.altinstr_replacement)
 	}
 	.rela : ALIGN(8) {
-		__reloc_start = .;
 		*(.rela .rela*)
-		__reloc_end = .;
 	}
 	.dynsym : ALIGN(8) {
-		__dynsym_start = .;
 		*(.dynsym)
 	}
 	.dynstr : {
@@ -169,7 +181,11 @@
 		*(.hash)
 	}
 
-	. = ALIGN(PAGE_SIZE);
+	__rela_offset	= ADDR(.rela) - KIMAGE_VADDR;
+	__rela_size	= SIZEOF(.rela);
+	__dynsym_offset	= ADDR(.dynsym) - KIMAGE_VADDR;
+
+	. = ALIGN(SEGMENT_ALIGN);
 	__init_end = .;
 
 	_data = .;
@@ -201,6 +217,10 @@
 	"HYP init code too big or misaligned")
 ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
 	"ID map text too big or misaligned")
+#ifdef CONFIG_HIBERNATION
+ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+	<= SZ_4K, "Hibernate exit text too big or misaligned")
+#endif
 
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index eba89e4..3246c4a 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c

@@ -186,6 +186,13 @@
 		exit_handler = kvm_get_exit_handler(vcpu);
 
 		return exit_handler(vcpu, run);
+	case ARM_EXCEPTION_HYP_GONE:
+		/*
+		 * EL2 has been reset to the hyp-stub. This happens when a guest
+		 * is pre-empted by kvm_reboot()'s shutdown call.
+		 */
+		run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		return 0;
 	default:
 		kvm_pr_unimpl("Unsupported exception type: %d",
 			      exception_index);

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 7d8747c..a873a6d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S

@@ -21,6 +21,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
 #include <asm/pgtable-hwdef.h>
+#include <asm/sysreg.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -103,8 +104,8 @@
 	dsb	sy
 
 	mrs	x4, sctlr_el2
-	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
-	ldr	x5, =SCTLR_EL2_FLAGS
+	and	x4, x4, #SCTLR_ELx_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_ELx_FLAGS
 	orr	x4, x4, x5
 	msr	sctlr_el2, x4
 	isb
@@ -138,6 +139,49 @@
 	eret
 ENDPROC(__kvm_hyp_init)
 
+	/*
+	 * Reset kvm back to the hyp stub. This is the trampoline dance in
+	 * reverse. If kvm used an extended idmap, __extended_idmap_trampoline
+	 * calls this code directly in the idmap. In this case switching to the
+	 * boot tables is a no-op.
+	 *
+	 * x0: HYP boot pgd
+	 * x1: HYP phys_idmap_start
+	 */
+ENTRY(__kvm_hyp_reset)
+	/* We're in trampoline code in VA, switch back to boot page tables */
+	msr	ttbr0_el2, x0
+	isb
+
+	/* Ensure the PA branch doesn't find a stale tlb entry or stale code. */
+	ic	iallu
+	tlbi	alle2
+	dsb	sy
+	isb
+
+	/* Branch into PA space */
+	adr	x0, 1f
+	bfi	x1, x0, #0, #PAGE_SHIFT
+	br	x1
+
+	/* We're now in idmap, disable MMU */
+1:	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1		// Clear SCTL_M and etc
+	msr	sctlr_el2, x0
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Install stub vectors */
+	adr_l	x0, __hyp_stub_vectors
+	msr	vbar_el2, x0
+
+	eret
+ENDPROC(__kvm_hyp_reset)
+
 	.ltorg
 
 	.popsection

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 48f19a3..7ce9315 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S

@@ -35,16 +35,21 @@
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in x0.
  *
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
+ * A function pointer with a value less than 0xfff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
  * arch/arm64/kernel/hyp_stub.S.
+ * HVC behaves as a 'bl' call and will clobber lr.
  */
 ENTRY(__kvm_call_hyp)
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN	
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	str     lr, [sp, #-16]!
 	hvc	#0
+	ldr     lr, [sp], #16
 	ret
 alternative_else
 	b	__vhe_hyp_call
 	nop
+	nop
+	nop
 alternative_endif
 ENDPROC(__kvm_call_hyp)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index ce9e5e5..70254a6 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S

@@ -164,3 +164,22 @@
 
 	eret
 ENDPROC(__fpsimd_guest_restore)
+
+/*
+ * When using the extended idmap, we don't have a trampoline page we can use
+ * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap
+ * directly would be ideal, but if we're using the extended idmap then the
+ * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by
+ * kvm_call_hyp using kern_hyp_va.
+ *
+ * x0: HYP boot pgd
+ * x1: HYP phys_idmap_start
+ */
+ENTRY(__extended_idmap_trampoline)
+	mov	x4, x1
+	adr_l	x3, __kvm_hyp_reset
+
+	/* insert __kvm_hyp_reset()s offset into phys_idmap_start */
+	bfi	x4, x3, #0, #PAGE_SHIFT
+	br	x4
+ENDPROC(__extended_idmap_trampoline)

diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 3488894..2d87f36 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S

@@ -42,19 +42,17 @@
 	 * Shuffle the parameters before calling the function
 	 * pointed to in x0. Assumes parameters in x[1,2,3].
 	 */
-	sub	sp, sp, #16
-	str	lr, [sp]
 	mov	lr, x0
 	mov	x0, x1
 	mov	x1, x2
 	mov	x2, x3
 	blr	lr
-	ldr	lr, [sp]
-	add	sp, sp, #16
 .endm
 
 ENTRY(__vhe_hyp_call)
+	str	lr, [sp, #-16]!
 	do_el2_call
+	ldr	lr, [sp], #16
 	/*
 	 * We used to rely on having an exception return to get
 	 * an implicit isb. In the E2H case, we don't have it anymore.
@@ -84,8 +82,8 @@
 	/* Here, we're pretty sure the host called HVC. */
 	restore_x0_to_x3
 
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
+	cmp	x0, #HVC_GET_VECTORS
+	b.ne	1f
 	mrs	x0, vbar_el2
 	b	2f
 

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 9677bf0..b1ad730 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c

@@ -29,7 +29,9 @@
 #include <asm/cputype.h>
 #include <asm/ptrace.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 
 /*
  * ARMv8 Reset Values
@@ -130,3 +132,31 @@
 	/* Reset timer */
 	return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
 }
+
+extern char __hyp_idmap_text_start[];
+
+unsigned long kvm_hyp_reset_entry(void)
+{
+	if (!__kvm_cpu_uses_extended_idmap()) {
+		unsigned long offset;
+
+		/*
+		 * Find the address of __kvm_hyp_reset() in the trampoline page.
+		 * This is present in the running page tables, and the boot page
+		 * tables, so we call the code here to start the trampoline
+		 * dance in reverse.
+		 */
+		offset = (unsigned long)__kvm_hyp_reset
+			 - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK);
+
+		return TRAMPOLINE_VA + offset;
+	} else {
+		/*
+		 * KVM is running with merged page tables, which don't have the
+		 * trampoline page mapped. We know the idmap is still mapped,
+		 * but can't be called into directly. Use
+		 * __extended_idmap_trampoline to do the call.
+		 */
+		return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline);
+	}
+}

diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 57f57fd..54bb209 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile

@@ -4,6 +4,7 @@
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
+obj-$(CONFIG_NUMA)		+= numa.o
 
 obj-$(CONFIG_KASAN)		+= kasan_init.o
 KASAN_SANITIZE_kasan_init.o	:= n

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 6df0706..50ff9ba 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S

@@ -24,8 +24,6 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 
-#include "proc-macros.S"
-
 /*
  *	flush_icache_range(start,end)
  *

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index c90c3c5..b7b3978 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c

@@ -75,8 +75,7 @@
 		 */
 		pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
 				smp_processor_id(), asid, asid_bits);
-		update_cpu_boot_status(CPU_PANIC_KERNEL);
-		cpu_park_loop();
+		cpu_panic_kernel();
 	}
 }
 

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a6e757c..fd8b942 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c

@@ -804,57 +804,24 @@
 static LIST_HEAD(iommu_dma_masters);
 static DEFINE_MUTEX(iommu_dma_notifier_lock);
 
-/*
- * Temporarily "borrow" a domain feature flag to to tell if we had to resort
- * to creating our own domain here, in case we need to clean it up again.
- */
-#define __IOMMU_DOMAIN_FAKE_DEFAULT		(1U << 31)
-
 static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
 			   u64 dma_base, u64 size)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 
 	/*
-	 * Best case: The device is either part of a group which was
-	 * already attached to a domain in a previous call, or it's
-	 * been put in a default DMA domain by the IOMMU core.
+	 * If the IOMMU driver has the DMA domain support that we require,
+	 * then the IOMMU core will have already configured a group for this
+	 * device, and allocated the default domain for that group.
 	 */
-	if (!domain) {
-		/*
-		 * Urgh. The IOMMU core isn't going to do default domains
-		 * for non-PCI devices anyway, until it has some means of
-		 * abstracting the entirely implementation-specific
-		 * sideband data/SoC topology/unicorn dust that may or
-		 * may not differentiate upstream masters.
-		 * So until then, HORRIBLE HACKS!
-		 */
-		domain = ops->domain_alloc(IOMMU_DOMAIN_DMA);
-		if (!domain)
-			goto out_no_domain;
-
-		domain->ops = ops;
-		domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT;
-
-		if (iommu_attach_device(domain, dev))
-			goto out_put_domain;
+	if (!domain || iommu_dma_init_domain(domain, dma_base, size)) {
+		pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+			dev_name(dev));
+		return false;
 	}
 
-	if (iommu_dma_init_domain(domain, dma_base, size))
-		goto out_detach;
-
 	dev->archdata.dma_ops = &iommu_dma_ops;
 	return true;
-
-out_detach:
-	iommu_detach_device(domain, dev);
-out_put_domain:
-	if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
-		iommu_domain_free(domain);
-out_no_domain:
-	pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
-		dev_name(dev));
-	return false;
 }
 
 static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
@@ -933,6 +900,10 @@
 		ret = register_iommu_dma_ops_notifier(&platform_bus_type);
 	if (!ret)
 		ret = register_iommu_dma_ops_notifier(&amba_bustype);
+#ifdef CONFIG_PCI
+	if (!ret)
+		ret = register_iommu_dma_ops_notifier(&pci_bus_type);
+#endif
 
 	/* handle devices queued before this arch_initcall */
 	if (!ret)
@@ -967,11 +938,8 @@
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 
-	if (domain) {
+	if (WARN_ON(domain))
 		iommu_detach_device(domain, dev);
-		if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
-			iommu_domain_free(domain);
-	}
 
 	dev->archdata.dma_ops = NULL;
 }

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f9271cb..8404190 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c

@@ -23,6 +23,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/memory.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
@@ -32,37 +33,25 @@
 	const char *name;
 };
 
-enum address_markers_idx {
-	MODULES_START_NR = 0,
-	MODULES_END_NR,
-	VMALLOC_START_NR,
-	VMALLOC_END_NR,
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	VMEMMAP_START_NR,
-	VMEMMAP_END_NR,
+static const struct addr_marker address_markers[] = {
+#ifdef CONFIG_KASAN
+	{ KASAN_SHADOW_START,		"Kasan shadow start" },
+	{ KASAN_SHADOW_END,		"Kasan shadow end" },
 #endif
-	FIXADDR_START_NR,
-	FIXADDR_END_NR,
-	PCI_START_NR,
-	PCI_END_NR,
-	KERNEL_SPACE_NR,
-};
-
-static struct addr_marker address_markers[] = {
-	{ MODULES_VADDR,	"Modules start" },
-	{ MODULES_END,		"Modules end" },
-	{ VMALLOC_START,	"vmalloc() Area" },
-	{ VMALLOC_END,		"vmalloc() End" },
+	{ MODULES_VADDR,		"Modules start" },
+	{ MODULES_END,			"Modules end" },
+	{ VMALLOC_START,		"vmalloc() Area" },
+	{ VMALLOC_END,			"vmalloc() End" },
+	{ FIXADDR_START,		"Fixmap start" },
+	{ FIXADDR_TOP,			"Fixmap end" },
+	{ PCI_IO_START,			"PCI I/O start" },
+	{ PCI_IO_END,			"PCI I/O end" },
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-	{ 0,			"vmemmap start" },
-	{ 0,			"vmemmap end" },
+	{ VMEMMAP_START,		"vmemmap start" },
+	{ VMEMMAP_START + VMEMMAP_SIZE,	"vmemmap end" },
 #endif
-	{ FIXADDR_START,	"Fixmap start" },
-	{ FIXADDR_TOP,		"Fixmap end" },
-	{ PCI_IO_START,		"PCI I/O start" },
-	{ PCI_IO_END,		"PCI I/O end" },
-	{ PAGE_OFFSET,		"Linear Mapping" },
-	{ -1,			NULL },
+	{ PAGE_OFFSET,			"Linear Mapping" },
+	{ -1,				NULL },
 };
 
 /*
@@ -347,13 +336,6 @@
 			for (j = 0; j < pg_level[i].num; j++)
 				pg_level[i].mask |= pg_level[i].bits[j].mask;
 
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	address_markers[VMEMMAP_START_NR].start_address =
-				(unsigned long)virt_to_page(PAGE_OFFSET);
-	address_markers[VMEMMAP_END_NR].start_address =
-				(unsigned long)virt_to_page(high_memory);
-#endif
-
 	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
 				 &ptdump_fops);
 	return pe ? 0 : -ENOMEM;

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 95df28b..5954881 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c

@@ -81,6 +81,56 @@
 	printk("\n");
 }
 
+#ifdef CONFIG_ARM64_HW_AFDBM
+/*
+ * This function sets the access flags (dirty, accessed), as well as write
+ * permission, and only to a more permissive setting.
+ *
+ * It needs to cope with hardware update of the accessed/dirty state by other
+ * agents in the system and can safely skip the __sync_icache_dcache() call as,
+ * like set_pte_at(), the PTE is never changed from no-exec to exec here.
+ *
+ * Returns whether or not the PTE actually changed.
+ */
+int ptep_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pte_t *ptep,
+			  pte_t entry, int dirty)
+{
+	pteval_t old_pteval;
+	unsigned int tmp;
+
+	if (pte_same(*ptep, entry))
+		return 0;
+
+	/* only preserve the access flags and write permission */
+	pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
+
+	/*
+	 * PTE_RDONLY is cleared by default in the asm below, so set it in
+	 * back if necessary (read-only or clean PTE).
+	 */
+	if (!pte_write(entry) || !dirty)
+		pte_val(entry) |= PTE_RDONLY;
+
+	/*
+	 * Setting the flags must be done atomically to avoid racing with the
+	 * hardware update of the access/dirty state.
+	 */
+	asm volatile("//	ptep_set_access_flags\n"
+	"	prfm	pstl1strm, %2\n"
+	"1:	ldxr	%0, %2\n"
+	"	and	%0, %0, %3		// clear PTE_RDONLY\n"
+	"	orr	%0, %0, %4		// set flags\n"
+	"	stxr	%w1, %0, %2\n"
+	"	cbnz	%w1, 1b\n"
+	: "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
+	: "L" (~PTE_RDONLY), "r" (pte_val(entry)));
+
+	flush_tlb_fix_spurious_fault(vma, address);
+	return 1;
+}
+#endif
+
 /*
  * The kernel tried to access some page that wasn't present.
  */
@@ -212,10 +262,6 @@
 	tsk = current;
 	mm  = tsk->mm;
 
-	/* Enable interrupts if they were enabled in the parent context. */
-	if (interrupts_enabled(regs))
-		local_irq_enable();
-
 	/*
 	 * If we're in an interrupt or have no user context, we must not take
 	 * the fault.
@@ -555,20 +601,33 @@
 {
 	const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
 	struct siginfo info;
+	int rv;
 
-	if (!inf->fn(addr, esr, regs))
-		return 1;
+	/*
+	 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
+	 * already disabled to preserve the last enabled/disabled addresses.
+	 */
+	if (interrupts_enabled(regs))
+		trace_hardirqs_off();
 
-	pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
-		 inf->name, esr, addr);
+	if (!inf->fn(addr, esr, regs)) {
+		rv = 1;
+	} else {
+		pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
+			 inf->name, esr, addr);
 
-	info.si_signo = inf->sig;
-	info.si_errno = 0;
-	info.si_code  = inf->code;
-	info.si_addr  = (void __user *)addr;
-	arm64_notify_die("", regs, &info, 0);
+		info.si_signo = inf->sig;
+		info.si_errno = 0;
+		info.si_code  = inf->code;
+		info.si_addr  = (void __user *)addr;
+		arm64_notify_die("", regs, &info, 0);
+		rv = 0;
+	}
 
-	return 0;
+	if (interrupts_enabled(regs))
+		trace_hardirqs_on();
+
+	return rv;
 }
 
 #ifdef CONFIG_ARM64_PAN

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ea989d8..d45f862 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c

@@ -40,6 +40,7 @@
 #include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
+#include <asm/numa.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -86,6 +87,21 @@
 	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
 }
 
+#ifdef CONFIG_NUMA
+
+static void __init zone_sizes_init(unsigned long min, unsigned long max)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
+
+	if (IS_ENABLED(CONFIG_ZONE_DMA))
+		max_zone_pfns[ZONE_DMA] = PFN_DOWN(max_zone_dma_phys());
+	max_zone_pfns[ZONE_NORMAL] = max;
+
+	free_area_init_nodes(max_zone_pfns);
+}
+
+#else
+
 static void __init zone_sizes_init(unsigned long min, unsigned long max)
 {
 	struct memblock_region *reg;
@@ -126,6 +142,8 @@
 	free_area_init_node(0, zone_size, min, zhole_size);
 }
 
+#endif /* CONFIG_NUMA */
+
 #ifdef CONFIG_HAVE_ARCH_PFN_VALID
 int pfn_valid(unsigned long pfn)
 {
@@ -142,10 +160,15 @@
 static void __init arm64_memory_present(void)
 {
 	struct memblock_region *reg;
+	int nid = 0;
 
-	for_each_memblock(memory, reg)
-		memory_present(0, memblock_region_memory_base_pfn(reg),
-			       memblock_region_memory_end_pfn(reg));
+	for_each_memblock(memory, reg) {
+#ifdef CONFIG_NUMA
+		nid = reg->nid;
+#endif
+		memory_present(nid, memblock_region_memory_base_pfn(reg),
+				memblock_region_memory_end_pfn(reg));
+	}
 }
 #endif
 
@@ -190,8 +213,12 @@
 	 */
 	memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
 			ULLONG_MAX);
-	if (memblock_end_of_DRAM() > linear_region_size)
-		memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+	if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {
+		/* ensure that memstart_addr remains sufficiently aligned */
+		memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,
+					 ARM64_MEMSTART_ALIGN);
+		memblock_remove(0, memstart_addr);
+	}
 
 	/*
 	 * Apply the memory limit if it was set. Since the kernel may be loaded
@@ -203,6 +230,35 @@
 		memblock_add(__pa(_text), (u64)(_end - _text));
 	}
 
+	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
+		/*
+		 * Add back the memory we just removed if it results in the
+		 * initrd to become inaccessible via the linear mapping.
+		 * Otherwise, this is a no-op
+		 */
+		u64 base = initrd_start & PAGE_MASK;
+		u64 size = PAGE_ALIGN(initrd_end) - base;
+
+		/*
+		 * We can only add back the initrd memory if we don't end up
+		 * with more memory than we can address via the linear mapping.
+		 * It is up to the bootloader to position the kernel and the
+		 * initrd reasonably close to each other (i.e., within 32 GB of
+		 * each other) so that all granule/#levels combinations can
+		 * always access both.
+		 */
+		if (WARN(base < memblock_start_of_DRAM() ||
+			 base + size > memblock_start_of_DRAM() +
+				       linear_region_size,
+			"initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
+			initrd_start = 0;
+		} else {
+			memblock_remove(base, size); /* clear MEMBLOCK_ flags */
+			memblock_add(base, size);
+			memblock_reserve(base, size);
+		}
+	}
+
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 		extern u16 memstart_offset_seed;
 		u64 range = linear_region_size -
@@ -245,7 +301,6 @@
 	dma_contiguous_reserve(arm64_dma_phys_limit);
 
 	memblock_allow_resize();
-	memblock_dump_all();
 }
 
 void __init bootmem_init(void)
@@ -257,6 +312,9 @@
 
 	early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
 
+	max_pfn = max_low_pfn = max;
+
+	arm64_numa_init();
 	/*
 	 * Sparsemem tries to allocate bootmem in memory_present(), so must be
 	 * done after the fixed reservations.
@@ -267,7 +325,7 @@
 	zone_sizes_init(min, max);
 
 	high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
-	max_pfn = max_low_pfn = max;
+	memblock_dump_all();
 }
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
@@ -371,26 +429,27 @@
 		MLM(MODULES_VADDR, MODULES_END));
 	pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
 		MLG(VMALLOC_START, VMALLOC_END));
-	pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		"    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		"      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		"      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
-		MLK_ROUNDUP(_text, __start_rodata),
-		MLK_ROUNDUP(__start_rodata, _etext),
-		MLK_ROUNDUP(__init_begin, __init_end),
+	pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		MLK_ROUNDUP(_text, __start_rodata));
+	pr_cont("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		MLK_ROUNDUP(__start_rodata, _etext));
+	pr_cont("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		MLK_ROUNDUP(__init_begin, __init_end));
+	pr_cont("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 		MLK_ROUNDUP(_sdata, _edata));
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
-		"              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
-		MLG(VMEMMAP_START,
-		    VMEMMAP_START + VMEMMAP_SIZE),
-		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
-		    (unsigned long)virt_to_page(high_memory)));
-#endif
+	pr_cont("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		MLK_ROUNDUP(__bss_start, __bss_stop));
 	pr_cont("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
 		MLK(FIXADDR_START, FIXADDR_TOP));
 	pr_cont("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 		MLM(PCI_IO_START, PCI_IO_END));
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
+		MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
+	pr_cont("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
+		MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
+		    (unsigned long)virt_to_page(high_memory)));
+#endif
 	pr_cont("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 		MLM(__phys_to_virt(memblock_start_of_DRAM()),
 		    (unsigned long)high_memory));
@@ -407,6 +466,12 @@
 	BUILD_BUG_ON(TASK_SIZE_32			> TASK_SIZE_64);
 #endif
 
+	/*
+	 * Make sure we chose the upper bound of sizeof(struct page)
+	 * correctly.
+	 */
+	BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
+
 	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;
 		/*
@@ -419,7 +484,8 @@
 
 void free_initmem(void)
 {
-	free_initmem_default(0);
+	free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
+			   0, "unused kernel");
 	fixup_init();
 }
 

diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index ef47d99..71fe9898 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h

@@ -1,3 +1,2 @@
-extern void __init bootmem_init(void);
 
 void fixup_init(void);

diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 232f787..01c1717 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c

@@ -95,8 +95,6 @@
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
-EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
-
 
 /*
  * You really shouldn't be using read() or write() on /dev/mem.  This might go

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f3e5c74..0f85a46 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c

@@ -385,7 +385,7 @@
 
 static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
-	unsigned long kernel_start = __pa(_stext);
+	unsigned long kernel_start = __pa(_text);
 	unsigned long kernel_end = __pa(_etext);
 
 	/*
@@ -417,7 +417,7 @@
 				     early_pgtable_alloc);
 
 	/*
-	 * Map the linear alias of the [_stext, _etext) interval as
+	 * Map the linear alias of the [_text, _etext) interval as
 	 * read-only/non-executable. This makes the contents of the
 	 * region accessible to subsystems such as hibernate, but
 	 * protects it from inadvertent modification or execution.
@@ -449,8 +449,8 @@
 {
 	unsigned long section_size;
 
-	section_size = (unsigned long)__start_rodata - (unsigned long)_stext;
-	create_mapping_late(__pa(_stext), (unsigned long)_stext,
+	section_size = (unsigned long)__start_rodata - (unsigned long)_text;
+	create_mapping_late(__pa(_text), (unsigned long)_text,
 			    section_size, PAGE_KERNEL_ROX);
 	/*
 	 * mark .rodata as read only. Use _etext rather than __end_rodata to
@@ -471,8 +471,8 @@
 	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
-static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
-				    pgprot_t prot, struct vm_struct *vma)
+static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+				      pgprot_t prot, struct vm_struct *vma)
 {
 	phys_addr_t pa_start = __pa(va_start);
 	unsigned long size = va_end - va_start;
@@ -499,11 +499,11 @@
 {
 	static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
 
-	map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
-	map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
-	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
-			 &vmlinux_init);
-	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+	map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
+	map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
+	map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+			   &vmlinux_init);
+	map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
 
 	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
 		/*
@@ -564,8 +564,6 @@
 	 */
 	memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
 		      SWAPPER_DIR_SIZE - PAGE_SIZE);
-
-	bootmem_init();
 }
 
 /*

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
new file mode 100644
index 0000000..98dc104
--- /dev/null
+++ b/arch/arm64/mm/numa.c

@@ -0,0 +1,396 @@
+/*
+ * NUMA support, based on the x86 implementation.
+ *
+ * Copyright (C) 2015 Cavium Inc.
+ * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+nodemask_t numa_nodes_parsed __initdata;
+static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
+static int numa_off;
+
+static __init int numa_parse_early_param(char *opt)
+{
+	if (!opt)
+		return -EINVAL;
+	if (!strncmp(opt, "off", 3)) {
+		pr_info("%s\n", "NUMA turned off");
+		numa_off = 1;
+	}
+	return 0;
+}
+early_param("numa", numa_parse_early_param);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const struct cpumask *cpumask_of_node(int node)
+{
+	if (WARN_ON(node >= nr_node_ids))
+		return cpu_none_mask;
+
+	if (WARN_ON(node_to_cpumask_map[node] == NULL))
+		return cpu_online_mask;
+
+	return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+#endif
+
+static void map_cpu_to_node(unsigned int cpu, int nid)
+{
+	set_cpu_numa_node(cpu, nid);
+	if (nid >= 0)
+		cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void numa_clear_node(unsigned int cpu)
+{
+	int nid = cpu_to_node(cpu);
+
+	if (nid >= 0)
+		cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
+	set_cpu_numa_node(cpu, NUMA_NO_NODE);
+}
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: cpumask_of_node() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+	unsigned int cpu;
+	int node;
+
+	/* setup nr_node_ids if not done yet */
+	if (nr_node_ids == MAX_NUMNODES)
+		setup_nr_node_ids();
+
+	/* allocate and clear the mapping */
+	for (node = 0; node < nr_node_ids; node++) {
+		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
+		cpumask_clear(node_to_cpumask_map[node]);
+	}
+
+	for_each_possible_cpu(cpu)
+		set_cpu_numa_node(cpu, NUMA_NO_NODE);
+
+	/* cpumask_of_node() will now work */
+	pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids);
+}
+
+/*
+ *  Set the cpu to node and mem mapping
+ */
+void numa_store_cpu_info(unsigned int cpu)
+{
+	map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]);
+}
+
+void __init early_map_cpu_to_node(unsigned int cpu, int nid)
+{
+	/* fallback to node 0 */
+	if (nid < 0 || nid >= MAX_NUMNODES)
+		nid = 0;
+
+	cpu_to_node_map[cpu] = nid;
+}
+
+/**
+ * numa_add_memblk - Set node id to memblk
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @size:  Size of the new memblk
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 size)
+{
+	int ret;
+
+	ret = memblock_set_node(start, size, &memblock.memory, nid);
+	if (ret < 0) {
+		pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
+			start, (start + size - 1), nid);
+		return ret;
+	}
+
+	node_set(nid, numa_nodes_parsed);
+	pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
+			start, (start + size - 1), nid);
+	return ret;
+}
+
+/**
+ * Initialize NODE_DATA for a node on the local memory
+ */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
+{
+	const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+	u64 nd_pa;
+	void *nd;
+	int tnid;
+
+	pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+			nid, start_pfn << PAGE_SHIFT,
+			(end_pfn << PAGE_SHIFT) - 1);
+
+	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd = __va(nd_pa);
+
+	/* report and initialize */
+	pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n",
+		nd_pa, nd_pa + nd_size - 1);
+	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+	if (tnid != nid)
+		pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid);
+
+	node_data[nid] = nd;
+	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+	NODE_DATA(nid)->node_id = nid;
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+/**
+ * numa_free_distance
+ *
+ * The current table is freed.
+ */
+void __init numa_free_distance(void)
+{
+	size_t size;
+
+	if (!numa_distance)
+		return;
+
+	size = numa_distance_cnt * numa_distance_cnt *
+		sizeof(numa_distance[0]);
+
+	memblock_free(__pa(numa_distance), size);
+	numa_distance_cnt = 0;
+	numa_distance = NULL;
+}
+
+/**
+ *
+ * Create a new NUMA distance table.
+ *
+ */
+static int __init numa_alloc_distance(void)
+{
+	size_t size;
+	u64 phys;
+	int i, j;
+
+	size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
+	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
+				      size, PAGE_SIZE);
+	if (WARN_ON(!phys))
+		return -ENOMEM;
+
+	memblock_reserve(phys, size);
+
+	numa_distance = __va(phys);
+	numa_distance_cnt = nr_node_ids;
+
+	/* fill with the default distances */
+	for (i = 0; i < numa_distance_cnt; i++)
+		for (j = 0; j < numa_distance_cnt; j++)
+			numa_distance[i * numa_distance_cnt + j] = i == j ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+
+	pr_debug("NUMA: Initialized distance table, cnt=%d\n",
+			numa_distance_cnt);
+
+	return 0;
+}
+
+/**
+ * numa_set_distance - Set inter node NUMA distance from node to node.
+ * @from: the 'from' node to set distance
+ * @to: the 'to'  node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.
+ * If distance table doesn't exist, a warning is printed.
+ *
+ * If @from or @to is higher than the highest known node or lower than zero
+ * or @distance doesn't make sense, the call is ignored.
+ *
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+	if (!numa_distance) {
+		pr_warn_once("NUMA: Warning: distance table not allocated yet\n");
+		return;
+	}
+
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
+			from < 0 || to < 0) {
+		pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+			    from, to, distance);
+		return;
+	}
+
+	if ((u8)distance != distance ||
+	    (from == to && distance != LOCAL_DISTANCE)) {
+		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+			     from, to, distance);
+		return;
+	}
+
+	numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+/**
+ * Return NUMA distance @from to @to
+ */
+int __node_distance(int from, int to)
+{
+	if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+		return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+static int __init numa_register_nodes(void)
+{
+	int nid;
+	struct memblock_region *mblk;
+
+	/* Check that valid nid is set to memblks */
+	for_each_memblock(memory, mblk)
+		if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
+			pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+				mblk->nid, mblk->base,
+				mblk->base + mblk->size - 1);
+			return -EINVAL;
+		}
+
+	/* Finally register nodes. */
+	for_each_node_mask(nid, numa_nodes_parsed) {
+		unsigned long start_pfn, end_pfn;
+
+		get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+		setup_node_data(nid, start_pfn, end_pfn);
+		node_set_online(nid);
+	}
+
+	/* Setup online nodes to actual nodes*/
+	node_possible_map = numa_nodes_parsed;
+
+	return 0;
+}
+
+static int __init numa_init(int (*init_func)(void))
+{
+	int ret;
+
+	nodes_clear(numa_nodes_parsed);
+	nodes_clear(node_possible_map);
+	nodes_clear(node_online_map);
+	numa_free_distance();
+
+	ret = numa_alloc_distance();
+	if (ret < 0)
+		return ret;
+
+	ret = init_func();
+	if (ret < 0)
+		return ret;
+
+	if (nodes_empty(numa_nodes_parsed))
+		return -EINVAL;
+
+	ret = numa_register_nodes();
+	if (ret < 0)
+		return ret;
+
+	setup_node_to_cpumask_map();
+
+	/* init boot processor */
+	cpu_to_node_map[0] = 0;
+	map_cpu_to_node(0, 0);
+
+	return 0;
+}
+
+/**
+ * dummy_numa_init - Fallback dummy NUMA init
+ *
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
+ *
+ * Must online at least one node (node 0) and add memory blocks that cover all
+ * allowed memory. It is unlikely that this function fails.
+ */
+static int __init dummy_numa_init(void)
+{
+	int ret;
+	struct memblock_region *mblk;
+
+	pr_info("%s\n", "No NUMA configuration found");
+	pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
+	       0LLU, PFN_PHYS(max_pfn) - 1);
+
+	for_each_memblock(memory, mblk) {
+		ret = numa_add_memblk(0, mblk->base, mblk->size);
+		if (!ret)
+			continue;
+
+		pr_err("NUMA init failed\n");
+		return ret;
+	}
+
+	numa_off = 1;
+	return 0;
+}
+
+/**
+ * arm64_numa_init - Initialize NUMA
+ *
+ * Try each configured NUMA initialization method until one succeeds.  The
+ * last fallback is dummy single node config encomapssing whole memory.
+ */
+void __init arm64_numa_init(void)
+{
+	if (!numa_off) {
+		if (!numa_init(of_numa_init))
+			return;
+	}
+
+	numa_init(dummy_numa_init);
+}

diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
deleted file mode 100644
index e6a30e1..0000000
--- a/arch/arm64/mm/proc-macros.S
+++ /dev/null

@@ -1,98 +0,0 @@
-/*
- * Based on arch/arm/mm/proc-macros.S
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
-	.macro	vma_vm_mm, rd, rn
-	ldr	\rd, [\rn, #VMA_VM_MM]
-	.endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
-	.macro	mmid, rd, rn
-	ldr	\rd, [\rn, #MM_CONTEXT_ID]
-	.endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
- */
-	.macro	dcache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
- */
-	.macro	icache_line_size, reg, tmp
-	mrs	\tmp, ctr_el0			// read CTR
-	and	\tmp, \tmp, #0xf		// cache line size encoding
-	mov	\reg, #4			// bytes per word
-	lsl	\reg, \reg, \tmp		// actual cache line size
-	.endm
-
-/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
- */
-	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
-	ldr_l	\tmpreg, idmap_t0sz
-	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
-	.endm
-
-/*
- * Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
- *
- * 	op:		operation passed to dc instruction
- * 	domain:		domain used in dsb instruciton
- * 	kaddr:		starting virtual address of the region
- * 	size:		size of the region
- * 	Corrupts: 	kaddr, size, tmp1, tmp2
- */
-	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
-	dcache_line_size \tmp1, \tmp2
-	add	\size, \kaddr, \size
-	sub	\tmp2, \tmp1, #1
-	bic	\kaddr, \kaddr, \tmp2
-9998:	dc	\op, \kaddr
-	add	\kaddr, \kaddr, \tmp1
-	cmp	\kaddr, \size
-	b.lo	9998b
-	dsb	\domain
-	.endm
-
-/*
- * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
- */
-	.macro	reset_pmuserenr_el0, tmpreg
-	mrs	\tmpreg, id_aa64dfr0_el1	// Check ID_AA64DFR0_EL1 PMUVer
-	sbfx	\tmpreg, \tmpreg, #8, #4
-	cmp	\tmpreg, #1			// Skip if no PMU present
-	b.lt	9000f
-	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
-9000:
-	.endm

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 543f519..c431787 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S

@@ -23,13 +23,11 @@
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
 #include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 
-#include "proc-macros.S"
-
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
 #elif defined(CONFIG_ARM64_16K_PAGES)
@@ -66,62 +64,50 @@
 	mrs	x2, tpidr_el0
 	mrs	x3, tpidrro_el0
 	mrs	x4, contextidr_el1
-	mrs	x5, mair_el1
-	mrs	x6, cpacr_el1
-	mrs	x7, ttbr1_el1
-	mrs	x8, tcr_el1
-	mrs	x9, vbar_el1
-	mrs	x10, mdscr_el1
-	mrs	x11, oslsr_el1
-	mrs	x12, sctlr_el1
+	mrs	x5, cpacr_el1
+	mrs	x6, tcr_el1
+	mrs	x7, vbar_el1
+	mrs	x8, mdscr_el1
+	mrs	x9, oslsr_el1
+	mrs	x10, sctlr_el1
 	stp	x2, x3, [x0]
-	stp	x4, x5, [x0, #16]
-	stp	x6, x7, [x0, #32]
-	stp	x8, x9, [x0, #48]
-	stp	x10, x11, [x0, #64]
-	str	x12, [x0, #80]
+	stp	x4, xzr, [x0, #16]
+	stp	x5, x6, [x0, #32]
+	stp	x7, x8, [x0, #48]
+	stp	x9, x10, [x0, #64]
 	ret
 ENDPROC(cpu_do_suspend)
 
 /**
  * cpu_do_resume - restore CPU register context
  *
- * x0: Physical address of context pointer
- * x1: ttbr0_el1 to be restored
- *
- * Returns:
- *	sctlr_el1 value in x0
+ * x0: Address of context pointer
  */
 ENTRY(cpu_do_resume)
-	/*
-	 * Invalidate local tlb entries before turning on MMU
-	 */
-	tlbi	vmalle1
 	ldp	x2, x3, [x0]
 	ldp	x4, x5, [x0, #16]
-	ldp	x6, x7, [x0, #32]
-	ldp	x8, x9, [x0, #48]
-	ldp	x10, x11, [x0, #64]
-	ldr	x12, [x0, #80]
+	ldp	x6, x8, [x0, #32]
+	ldp	x9, x10, [x0, #48]
+	ldp	x11, x12, [x0, #64]
 	msr	tpidr_el0, x2
 	msr	tpidrro_el0, x3
 	msr	contextidr_el1, x4
-	msr	mair_el1, x5
 	msr	cpacr_el1, x6
-	msr	ttbr0_el1, x1
-	msr	ttbr1_el1, x7
-	tcr_set_idmap_t0sz x8, x7
+
+	/* Don't change t0sz here, mask those bits when restoring */
+	mrs	x5, tcr_el1
+	bfi	x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
+	msr	sctlr_el1, x12
 	/*
 	 * Restore oslsr_el1 by writing oslar_el1
 	 */
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
 	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
-	mov	x0, x12
-	dsb	nsh		// Make sure local tlb invalidation completed
 	isb
 	ret
 ENDPROC(cpu_do_resume)

diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 105c93b..1d12129 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h

@@ -1,7 +1,6 @@
 #ifndef _ASM_IA64_IOMMU_H
 #define _ASM_IA64_IOMMU_H 1
 
-#define cpu_has_x2apic 0
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
 

diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index ce11247..8b23e07 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h

@@ -49,8 +49,8 @@
 /*
  * lock for writing
  */
-static inline void
-__down_write (struct rw_semaphore *sem)
+static inline long
+___down_write (struct rw_semaphore *sem)
 {
 	long old, new;
 
@@ -59,10 +59,26 @@
 		new = old + RWSEM_ACTIVE_WRITE_BIAS;
 	} while (cmpxchg_acq(&sem->count, old, new) != old);
 
-	if (old != 0)
+	return old;
+}
+
+static inline void
+__down_write (struct rw_semaphore *sem)
+{
+	if (___down_write(sem))
 		rwsem_down_write_failed(sem);
 }
 
+static inline int
+__down_write_killable (struct rw_semaphore *sem)
+{
+	if (___down_write(sem))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+
+	return 0;
+}
+
 /*
  * unlock after reading
  */

diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 300dac3..bf0865c 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c

@@ -531,8 +531,6 @@
 	       efi.systab->hdr.revision >> 16,
 	       efi.systab->hdr.revision & 0xffff, vendor);
 
-	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
 	palo_phys      = EFI_INVALID_TABLE_ADDR;
 
 	if (efi_config_init(arch_tables) != 0)

diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index cf12a17..f7984f4 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c

@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/module.h>
-#include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
+#include <linux/rtc.h>	/* For struct rtc_time and ioctls, etc */
 #include <linux/bcd.h>
 #include <asm/bvme6000hw.h>
 

diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c
index 1755e2f..1cdc732 100644
--- a/arch/m68k/mvme16x/rtc.c
+++ b/arch/m68k/mvme16x/rtc.c

@@ -14,7 +14,7 @@
 #include <linux/fcntl.h>
 #include <linux/init.h>
 #include <linux/poll.h>
-#include <linux/mc146818rtc.h>	/* For struct rtc_time and ioctls, etc */
+#include <linux/rtc.h>	/* For struct rtc_time and ioctls, etc */
 #include <linux/bcd.h>
 #include <asm/mvme16xhw.h>
 

diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index a625818..88fa25f 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h

@@ -61,7 +61,7 @@
 		"	CMPT	%0, #HI(0x02000000)\n"			\
 		"	BNZ 1b\n"					\
 		: "=&d" (temp), "=&da" (result)				\
-		: "da" (&v->counter), "bd" (i)				\
+		: "da" (&v->counter), "br" (i)				\
 		: "cc");						\
 									\
 	smp_mb();							\

diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
index ac8c039..f7b23d3 100644
--- a/arch/metag/kernel/ftrace.c
+++ b/arch/metag/kernel/ftrace.c

@@ -115,7 +115,6 @@
 	return ftrace_modify_code(ip, old, new);
 }
 
-/* run from kstop_machine */
 int __init ftrace_dyn_arch_init(void)
 {
 	return 0;

diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
index 2478ec6..33a365f 100644
--- a/arch/metag/kernel/perf/perf_event.c
+++ b/arch/metag/kernel/perf/perf_event.c

@@ -618,6 +618,8 @@
 
 	/* Check for a core internal or performance channel event. */
 	if (tmp) {
+		/* PERF_ICORE/PERF_CHAN only exist since Meta2 */
+#ifdef METAC_2_1
 		void *perf_addr;
 
 		/*
@@ -640,6 +642,7 @@
 
 		if (perf_addr)
 			metag_out32((config & 0x0f), perf_addr);
+#endif
 
 		/*
 		 * Now we use the high nibble as the performance event to

diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
index 3156334..252abc1 100644
--- a/arch/metag/kernel/perf_callchain.c
+++ b/arch/metag/kernel/perf_callchain.c

@@ -65,7 +65,7 @@
 
 	--frame;
 
-	while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+	while ((entry->nr < sysctl_perf_event_max_stack) && frame)
 		frame = user_backtrace(frame, entry);
 }
 

diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index c1cf9c6..5021c54 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c

@@ -35,7 +35,7 @@
 		addr = *sp++;
 		if (__kernel_text_address(addr)) {
 			perf_callchain_store(entry, addr);
-			if (entry->nr >= PERF_MAX_STACK_DEPTH)
+			if (entry->nr >= sysctl_perf_event_max_stack)
 				break;
 		}
 	}
@@ -59,7 +59,7 @@
 	}
 	do {
 		perf_callchain_store(entry, pc);
-		if (entry->nr >= PERF_MAX_STACK_DEPTH)
+		if (entry->nr >= sysctl_perf_event_max_stack)
 			break;
 		pc = unwind_stack(current, &sp, pc, &ra);
 	} while (pc);

diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index c5a62da..ce072ba 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h

@@ -50,7 +50,6 @@
 
 /* Pages to physical address... */
 #define page_to_phys(page)	virt_to_phys(page_to_virt(page))
-#define page_to_bus(page)	page_to_virt(page)
 
 /* Macros used for converting between virtual and physical mappings. */
 #define phys_to_virt(vaddr)	\

diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h
index 4b32d6f..c1683f5 100644
--- a/arch/nios2/include/asm/page.h
+++ b/arch/nios2/include/asm/page.h

@@ -84,7 +84,7 @@
 	((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
 
 #define page_to_virt(page)	\
-	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
+	((void *)(((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
 
 # define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 # define pfn_valid(pfn)		((pfn) >= ARCH_PFN_OFFSET &&	\

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index a213e8c..298393c 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h

@@ -209,7 +209,7 @@
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pteval)
 {
-	unsigned long paddr = page_to_virt(pte_page(pteval));
+	unsigned long paddr = (unsigned long)page_to_virt(pte_page(pteval));
 
 	flush_dcache_range(paddr, paddr + PAGE_SIZE);
 	set_pte(ptep, pteval);

diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index e613d36..35bcb7c 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h

@@ -81,8 +81,6 @@
 
 #define virt_to_page(addr) \
 	(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
-#define page_to_virt(page) \
-	((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
 
 #define page_to_phys(page)      ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
 

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8cac1eb..55c924b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -565,7 +565,7 @@
 		smp_ops->give_timebase();
 
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 
 	return 0;

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index e04a675..22d9015 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c

@@ -247,7 +247,7 @@
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
 
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+	while (entry->nr < sysctl_perf_event_max_stack) {
 		fp = (unsigned long __user *) sp;
 		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
 			return;
@@ -453,7 +453,7 @@
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
 
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+	while (entry->nr < sysctl_perf_event_max_stack) {
 		fp = (unsigned int __user *) (unsigned long) sp;
 		if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
 			return;

diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index fead491..c75e447 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h

@@ -90,7 +90,7 @@
 /*
  * lock for writing
  */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline long ___down_write(struct rw_semaphore *sem)
 {
 	signed long old, new, tmp;
 
@@ -104,13 +104,23 @@
 		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
 		: "Q" (sem->count), "m" (tmp)
 		: "cc", "memory");
-	if (old != 0)
-		rwsem_down_write_failed(sem);
+
+	return old;
 }
 
 static inline void __down_write(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	if (___down_write(sem))
+		rwsem_down_write_failed(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	if (___down_write(sem))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+
+	return 0;
 }
 
 /*

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 40a6b4f..7b89a75 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c

@@ -832,7 +832,7 @@
 	pcpu_attach_task(pcpu, tidle);
 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
 	/* Wait until cpu puts itself in the online & active maps */
-	while (!cpu_online(cpu) || !cpu_active(cpu))
+	while (!cpu_online(cpu))
 		cpu_relax();
 	return 0;
 }

diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c
index 324599b..0104c81 100644
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c

@@ -20,7 +20,6 @@
 #include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/sh_eth.h>
 #include <linux/sh_intc.h>
 #include <linux/usb/renesas_usbhs.h>

diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 62c3b81..de8393c 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c

@@ -15,7 +15,6 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/sh_flctl.h>
 #include <linux/mfd/tmio.h>

diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index a9c0c07..6d61279 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c

@@ -13,7 +13,6 @@
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mfd/tmio.h>
 #include <linux/gpio.h>

diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 6bd9230..5deb2d8 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c

@@ -11,7 +11,6 @@
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/onenand.h>

diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 7a04da3..5de60a7 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c

@@ -13,7 +13,6 @@
 #include <linux/input.h>
 #include <linux/input/sh_keysc.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mtd/nand.h>

diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index e0e1df1..f1fecd3 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c

@@ -15,7 +15,6 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mtd/physmap.h>
 #include <linux/delay.h>

diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index a319745..751c337 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild

@@ -26,6 +26,7 @@
 generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h

diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
deleted file mode 100644
index edab572..0000000
--- a/arch/sh/include/asm/rwsem.h
+++ /dev/null

@@ -1,132 +0,0 @@
-/*
- * include/asm-sh/rwsem.h: R/W semaphores for SH using the stuff
- * in lib/rwsem.c.
- */
-
-#ifndef _ASM_SH_RWSEM_H
-#define _ASM_SH_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (atomic_inc_return((atomic_t *)(&sem->count)) > 0)
-		smp_wmb();
-	else
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			smp_wmb();
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				(atomic_t *)(&sem->count));
-	if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
-		smp_wmb();
-	else
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	smp_wmb();
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_dec_return((atomic_t *)(&sem->count));
-	if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	smp_wmb();
-	if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-			      (atomic_t *)(&sem->count)) < 0)
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
-	if (tmp < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-	__down_write(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	smp_mb();
-	return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_SH_RWSEM_H */

diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index e9286188..6024c26 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild

@@ -16,6 +16,7 @@
 generic-y += module.h
 generic-y += mutex.h
 generic-y += preempt.h
+generic-y += rwsem.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += types.h

diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
deleted file mode 100644
index 069bf4d..0000000
--- a/arch/sparc/include/asm/rwsem.h
+++ /dev/null

@@ -1,124 +0,0 @@
-/*
- * rwsem.h: R/W semaphores implemented using CAS
- *
- * Written by David S. Miller (davem@redhat.com), 2001.
- * Derived from asm-i386/rwsem.h
- */
-#ifndef _SPARC64_RWSEM_H
-#define _SPARC64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000L
-#define RWSEM_ACTIVE_BIAS		0x00000001L
-#define RWSEM_ACTIVE_MASK		0xffffffffL
-#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic64_inc_return((atomic64_t *)(&sem->count)) <= 0L))
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	while ((tmp = sem->count) >= 0L) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-	long tmp;
-
-	tmp = atomic64_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				  (atomic64_t *)(&sem->count));
-	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
-		rwsem_down_write_failed(sem);
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	__down_write_nested(sem, 0);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic64_dec_return((atomic64_t *)(&sem->count));
-	if (unlikely(tmp < -1L && (tmp & RWSEM_ACTIVE_MASK) == 0L))
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	if (unlikely(atomic64_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-					 (atomic64_t *)(&sem->count)) < 0L))
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
-	atomic64_add(delta, (atomic64_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	long tmp;
-
-	tmp = atomic64_add_return(-RWSEM_WAITING_BIAS, (atomic64_t *)(&sem->count));
-	if (tmp < 0L)
-		rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
-	return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC64_RWSEM_H */

diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 6596f66..a4b8b5a 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c

@@ -1756,7 +1756,7 @@
 			}
 		}
 #endif
-	} while (entry->nr < PERF_MAX_STACK_DEPTH);
+	} while (entry->nr < sysctl_perf_event_max_stack);
 }
 
 static inline int
@@ -1790,7 +1790,7 @@
 		pc = sf.callers_pc;
 		ufp = (unsigned long)sf.fp + STACK_BIAS;
 		perf_callchain_store(entry, pc);
-	} while (entry->nr < PERF_MAX_STACK_DEPTH);
+	} while (entry->nr < sysctl_perf_event_max_stack);
 }
 
 static void perf_callchain_user_32(struct perf_callchain_entry *entry,
@@ -1822,7 +1822,7 @@
 			ufp = (unsigned long)sf.fp;
 		}
 		perf_callchain_store(entry, pc);
-	} while (entry->nr < PERF_MAX_STACK_DEPTH);
+	} while (entry->nr < sysctl_perf_event_max_stack);
 }
 
 void

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2dc18605..7bb1574 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -164,10 +164,6 @@
 	def_bool y
 	depends on KPROBES || PERF_EVENTS || UPROBES
 
-config PERF_EVENTS_INTEL_UNCORE
-	def_bool y
-	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
-
 config OUTPUT_FORMAT
 	string
 	default "elf32-i386" if X86_32
@@ -1046,6 +1042,8 @@
 	def_bool y
 	depends on X86_MCE_INTEL
 
+source "arch/x86/events/Kconfig"
+
 config X86_LEGACY_VM86
 	bool "Legacy VM86 support"
 	default n
@@ -1210,15 +1208,6 @@
 	def_bool y
 	depends on MICROCODE
 
-config PERF_EVENTS_AMD_POWER
-	depends on PERF_EVENTS && CPU_SUP_AMD
-	tristate "AMD Processor Power Reporting Mechanism"
-	---help---
-	  Provide power reporting mechanism support for AMD processors.
-	  Currently, it leverages X86_FEATURE_ACC_POWER
-	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
-	  average power consumption on Family 15h processors.
-
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	---help---
@@ -1932,54 +1921,38 @@
 	  (CONFIG_PHYSICAL_START) is used as the minimum location.
 
 config RANDOMIZE_BASE
-	bool "Randomize the address of the kernel image"
+	bool "Randomize the address of the kernel image (KASLR)"
 	depends on RELOCATABLE
 	default n
 	---help---
-	   Randomizes the physical and virtual address at which the
-	   kernel image is decompressed, as a security feature that
-	   deters exploit attempts relying on knowledge of the location
-	   of kernel internals.
+	  In support of Kernel Address Space Layout Randomization (KASLR),
+	  this randomizes the physical address at which the kernel image
+	  is decompressed and the virtual address where the kernel
+	  image is mapped, as a security feature that deters exploit
+	  attempts relying on knowledge of the location of kernel
+	  code internals.
 
-	   Entropy is generated using the RDRAND instruction if it is
-	   supported. If RDTSC is supported, it is used as well. If
-	   neither RDRAND nor RDTSC are supported, then randomness is
-	   read from the i8254 timer.
+	  The kernel physical and virtual address can be randomized
+	  from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
+	  using RANDOMIZE_BASE reduces the memory space available to
+	  kernel modules from 1.5GB to 1GB.)
 
-	   The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
-	   and aligned according to PHYSICAL_ALIGN. Since the kernel is
-	   built using 2GiB addressing, and PHYSICAL_ALGIN must be at a
-	   minimum of 2MiB, only 10 bits of entropy is theoretically
-	   possible. At best, due to page table layouts, 64-bit can use
-	   9 bits of entropy and 32-bit uses 8 bits.
+	  Entropy is generated using the RDRAND instruction if it is
+	  supported. If RDTSC is supported, its value is mixed into
+	  the entropy pool as well. If neither RDRAND nor RDTSC are
+	  supported, then entropy is read from the i8254 timer.
 
-	   If unsure, say N.
+	  Since the kernel is built using 2GB addressing, and
+	  PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
+	  entropy is theoretically possible. Currently, with the
+	  default value for PHYSICAL_ALIGN and due to page table
+	  layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
 
-config RANDOMIZE_BASE_MAX_OFFSET
-	hex "Maximum kASLR offset allowed" if EXPERT
-	depends on RANDOMIZE_BASE
-	range 0x0 0x20000000 if X86_32
-	default "0x20000000" if X86_32
-	range 0x0 0x40000000 if X86_64
-	default "0x40000000" if X86_64
-	---help---
-	  The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical
-	  memory is used to determine the maximal offset in bytes that will
-	  be applied to the kernel when kernel Address Space Layout
-	  Randomization (kASLR) is active. This must be a multiple of
-	  PHYSICAL_ALIGN.
+	  If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
+	  time. To enable it, boot with "kaslr" on the kernel command
+	  line (which will also disable hibernation).
 
-	  On 32-bit this is limited to 512MiB by page table layouts. The
-	  default is 512MiB.
-
-	  On 64-bit this is limited by how the kernel fixmap page table is
-	  positioned, so this cannot be larger than 1GiB currently. Without
-	  RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel
-	  and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the
-	  modules area will shrink to compensate, up to the current maximum
-	  1GiB to 1GiB split. The default is 1GiB.
-
-	  If unsure, leave at the default value.
+	  If unsure, say N.
 
 # Relocation on x86 needs some additional build support
 config X86_NEED_RELOCS

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4086abc..6fce7f0 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile

@@ -208,7 +208,8 @@
 
 head-y := arch/x86/kernel/head_$(BITS).o
 head-y += arch/x86/kernel/head$(BITS).o
-head-y += arch/x86/kernel/head.o
+head-y += arch/x86/kernel/ebda.o
+head-y += arch/x86/kernel/platform-quirks.o
 
 libs-y  += arch/x86/lib/
 

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index b1ef9e4..700a9c6 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile

@@ -86,16 +86,7 @@
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
-
-quiet_cmd_voffset = VOFFSET $@
-      cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
-
-targets += voffset.h
-$(obj)/voffset.h: vmlinux FORCE
-	$(call if_changed,voffset)
-
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
 
 quiet_cmd_zoffset = ZOFFSET $@
       cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
@@ -106,7 +97,7 @@
 
 
 AFLAGS_header.o += -I$(obj)
-$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
+$(obj)/header.o: $(obj)/zoffset.h
 
 LDFLAGS_setup.elf	:= -T
 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 8774cb2..cfdd8c3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile

@@ -57,12 +57,27 @@
 hostprogs-y	:= mkpiggy
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+
+quiet_cmd_voffset = VOFFSET $@
+      cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
+
+targets += ../voffset.h
+
+$(obj)/../voffset.h: vmlinux FORCE
+	$(call if_changed,voffset)
+
+$(obj)/misc.o: $(obj)/../voffset.h
+
 vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
-	$(obj)/string.o $(obj)/cmdline.o \
+	$(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
 	$(obj)/piggy.o $(obj)/cpuflags.o
 
 vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
-vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o
+vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
+ifdef CONFIG_X86_64
+	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
+endif
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
 
@@ -109,10 +124,8 @@
 suffix-$(CONFIG_KERNEL_LZO) 	:= lzo
 suffix-$(CONFIG_KERNEL_LZ4) 	:= lz4
 
-RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \
-	     $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh)
 quiet_cmd_mkpiggy = MKPIGGY $@
-      cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
+      cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
 
 targets += piggy.S
 $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE

diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
deleted file mode 100644
index 6a9b96b..0000000
--- a/arch/x86/boot/compressed/aslr.c
+++ /dev/null

@@ -1,339 +0,0 @@
-#include "misc.h"
-
-#include <asm/msr.h>
-#include <asm/archrandom.h>
-#include <asm/e820.h>
-
-#include <generated/compile.h>
-#include <linux/module.h>
-#include <linux/uts.h>
-#include <linux/utsname.h>
-#include <generated/utsrelease.h>
-
-/* Simplified build-specific string for starting entropy. */
-static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
-		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
-
-#define I8254_PORT_CONTROL	0x43
-#define I8254_PORT_COUNTER0	0x40
-#define I8254_CMD_READBACK	0xC0
-#define I8254_SELECT_COUNTER0	0x02
-#define I8254_STATUS_NOTREADY	0x40
-static inline u16 i8254(void)
-{
-	u16 status, timer;
-
-	do {
-		outb(I8254_PORT_CONTROL,
-		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
-		status = inb(I8254_PORT_COUNTER0);
-		timer  = inb(I8254_PORT_COUNTER0);
-		timer |= inb(I8254_PORT_COUNTER0) << 8;
-	} while (status & I8254_STATUS_NOTREADY);
-
-	return timer;
-}
-
-static unsigned long rotate_xor(unsigned long hash, const void *area,
-				size_t size)
-{
-	size_t i;
-	unsigned long *ptr = (unsigned long *)area;
-
-	for (i = 0; i < size / sizeof(hash); i++) {
-		/* Rotate by odd number of bits and XOR. */
-		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
-		hash ^= ptr[i];
-	}
-
-	return hash;
-}
-
-/* Attempt to create a simple but unpredictable starting entropy. */
-static unsigned long get_random_boot(void)
-{
-	unsigned long hash = 0;
-
-	hash = rotate_xor(hash, build_str, sizeof(build_str));
-	hash = rotate_xor(hash, real_mode, sizeof(*real_mode));
-
-	return hash;
-}
-
-static unsigned long get_random_long(void)
-{
-#ifdef CONFIG_X86_64
-	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
-#else
-	const unsigned long mix_const = 0x3f39e593UL;
-#endif
-	unsigned long raw, random = get_random_boot();
-	bool use_i8254 = true;
-
-	debug_putstr("KASLR using");
-
-	if (has_cpuflag(X86_FEATURE_RDRAND)) {
-		debug_putstr(" RDRAND");
-		if (rdrand_long(&raw)) {
-			random ^= raw;
-			use_i8254 = false;
-		}
-	}
-
-	if (has_cpuflag(X86_FEATURE_TSC)) {
-		debug_putstr(" RDTSC");
-		raw = rdtsc();
-
-		random ^= raw;
-		use_i8254 = false;
-	}
-
-	if (use_i8254) {
-		debug_putstr(" i8254");
-		random ^= i8254();
-	}
-
-	/* Circular multiply for better bit diffusion */
-	asm("mul %3"
-	    : "=a" (random), "=d" (raw)
-	    : "a" (random), "rm" (mix_const));
-	random += raw;
-
-	debug_putstr("...\n");
-
-	return random;
-}
-
-struct mem_vector {
-	unsigned long start;
-	unsigned long size;
-};
-
-#define MEM_AVOID_MAX 5
-static struct mem_vector mem_avoid[MEM_AVOID_MAX];
-
-static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
-{
-	/* Item at least partially before region. */
-	if (item->start < region->start)
-		return false;
-	/* Item at least partially after region. */
-	if (item->start + item->size > region->start + region->size)
-		return false;
-	return true;
-}
-
-static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
-{
-	/* Item one is entirely before item two. */
-	if (one->start + one->size <= two->start)
-		return false;
-	/* Item one is entirely after item two. */
-	if (one->start >= two->start + two->size)
-		return false;
-	return true;
-}
-
-static void mem_avoid_init(unsigned long input, unsigned long input_size,
-			   unsigned long output, unsigned long output_size)
-{
-	u64 initrd_start, initrd_size;
-	u64 cmd_line, cmd_line_size;
-	unsigned long unsafe, unsafe_len;
-	char *ptr;
-
-	/*
-	 * Avoid the region that is unsafe to overlap during
-	 * decompression (see calculations at top of misc.c).
-	 */
-	unsafe_len = (output_size >> 12) + 32768 + 18;
-	unsafe = (unsigned long)input + input_size - unsafe_len;
-	mem_avoid[0].start = unsafe;
-	mem_avoid[0].size = unsafe_len;
-
-	/* Avoid initrd. */
-	initrd_start  = (u64)real_mode->ext_ramdisk_image << 32;
-	initrd_start |= real_mode->hdr.ramdisk_image;
-	initrd_size  = (u64)real_mode->ext_ramdisk_size << 32;
-	initrd_size |= real_mode->hdr.ramdisk_size;
-	mem_avoid[1].start = initrd_start;
-	mem_avoid[1].size = initrd_size;
-
-	/* Avoid kernel command line. */
-	cmd_line  = (u64)real_mode->ext_cmd_line_ptr << 32;
-	cmd_line |= real_mode->hdr.cmd_line_ptr;
-	/* Calculate size of cmd_line. */
-	ptr = (char *)(unsigned long)cmd_line;
-	for (cmd_line_size = 0; ptr[cmd_line_size++]; )
-		;
-	mem_avoid[2].start = cmd_line;
-	mem_avoid[2].size = cmd_line_size;
-
-	/* Avoid heap memory. */
-	mem_avoid[3].start = (unsigned long)free_mem_ptr;
-	mem_avoid[3].size = BOOT_HEAP_SIZE;
-
-	/* Avoid stack memory. */
-	mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
-	mem_avoid[4].size = BOOT_STACK_SIZE;
-}
-
-/* Does this memory vector overlap a known avoided area? */
-static bool mem_avoid_overlap(struct mem_vector *img)
-{
-	int i;
-	struct setup_data *ptr;
-
-	for (i = 0; i < MEM_AVOID_MAX; i++) {
-		if (mem_overlaps(img, &mem_avoid[i]))
-			return true;
-	}
-
-	/* Avoid all entries in the setup_data linked list. */
-	ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
-	while (ptr) {
-		struct mem_vector avoid;
-
-		avoid.start = (unsigned long)ptr;
-		avoid.size = sizeof(*ptr) + ptr->len;
-
-		if (mem_overlaps(img, &avoid))
-			return true;
-
-		ptr = (struct setup_data *)(unsigned long)ptr->next;
-	}
-
-	return false;
-}
-
-static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
-			   CONFIG_PHYSICAL_ALIGN];
-static unsigned long slot_max;
-
-static void slots_append(unsigned long addr)
-{
-	/* Overflowing the slots list should be impossible. */
-	if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
-			CONFIG_PHYSICAL_ALIGN)
-		return;
-
-	slots[slot_max++] = addr;
-}
-
-static unsigned long slots_fetch_random(void)
-{
-	/* Handle case of no slots stored. */
-	if (slot_max == 0)
-		return 0;
-
-	return slots[get_random_long() % slot_max];
-}
-
-static void process_e820_entry(struct e820entry *entry,
-			       unsigned long minimum,
-			       unsigned long image_size)
-{
-	struct mem_vector region, img;
-
-	/* Skip non-RAM entries. */
-	if (entry->type != E820_RAM)
-		return;
-
-	/* Ignore entries entirely above our maximum. */
-	if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
-		return;
-
-	/* Ignore entries entirely below our minimum. */
-	if (entry->addr + entry->size < minimum)
-		return;
-
-	region.start = entry->addr;
-	region.size = entry->size;
-
-	/* Potentially raise address to minimum location. */
-	if (region.start < minimum)
-		region.start = minimum;
-
-	/* Potentially raise address to meet alignment requirements. */
-	region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
-
-	/* Did we raise the address above the bounds of this e820 region? */
-	if (region.start > entry->addr + entry->size)
-		return;
-
-	/* Reduce size by any delta from the original address. */
-	region.size -= region.start - entry->addr;
-
-	/* Reduce maximum size to fit end of image within maximum limit. */
-	if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
-		region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
-
-	/* Walk each aligned slot and check for avoided areas. */
-	for (img.start = region.start, img.size = image_size ;
-	     mem_contains(&region, &img) ;
-	     img.start += CONFIG_PHYSICAL_ALIGN) {
-		if (mem_avoid_overlap(&img))
-			continue;
-		slots_append(img.start);
-	}
-}
-
-static unsigned long find_random_addr(unsigned long minimum,
-				      unsigned long size)
-{
-	int i;
-	unsigned long addr;
-
-	/* Make sure minimum is aligned. */
-	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
-
-	/* Verify potential e820 positions, appending to slots list. */
-	for (i = 0; i < real_mode->e820_entries; i++) {
-		process_e820_entry(&real_mode->e820_map[i], minimum, size);
-	}
-
-	return slots_fetch_random();
-}
-
-unsigned char *choose_kernel_location(struct boot_params *boot_params,
-				      unsigned char *input,
-				      unsigned long input_size,
-				      unsigned char *output,
-				      unsigned long output_size)
-{
-	unsigned long choice = (unsigned long)output;
-	unsigned long random;
-
-#ifdef CONFIG_HIBERNATION
-	if (!cmdline_find_option_bool("kaslr")) {
-		debug_putstr("KASLR disabled by default...\n");
-		goto out;
-	}
-#else
-	if (cmdline_find_option_bool("nokaslr")) {
-		debug_putstr("KASLR disabled by cmdline...\n");
-		goto out;
-	}
-#endif
-
-	boot_params->hdr.loadflags |= KASLR_FLAG;
-
-	/* Record the various known unsafe memory ranges. */
-	mem_avoid_init((unsigned long)input, input_size,
-		       (unsigned long)output, output_size);
-
-	/* Walk e820 and find a random address. */
-	random = find_random_addr(choice, output_size);
-	if (!random) {
-		debug_putstr("KASLR could not find suitable E820 region...\n");
-		goto out;
-	}
-
-	/* Always enforce the minimum. */
-	if (random < choice)
-		goto out;
-
-	choice = random;
-out:
-	return (unsigned char *)choice;
-}

diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index b68e303..73ccf63 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c

@@ -15,9 +15,9 @@
 #include "../cmdline.c"
 static unsigned long get_cmd_line_ptr(void)
 {
-	unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
+	unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
 
-	cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
+	cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
 
 	return cmd_line_ptr;
 }

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 583d539..52fef60 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c

@@ -571,312 +571,6 @@
 	efi_call_early(free_pool, pci_handle);
 }
 
-static void
-setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
-		 struct efi_pixel_bitmask pixel_info, int pixel_format)
-{
-	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
-		si->lfb_depth = 32;
-		si->lfb_linelength = pixels_per_scan_line * 4;
-		si->red_size = 8;
-		si->red_pos = 0;
-		si->green_size = 8;
-		si->green_pos = 8;
-		si->blue_size = 8;
-		si->blue_pos = 16;
-		si->rsvd_size = 8;
-		si->rsvd_pos = 24;
-	} else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
-		si->lfb_depth = 32;
-		si->lfb_linelength = pixels_per_scan_line * 4;
-		si->red_size = 8;
-		si->red_pos = 16;
-		si->green_size = 8;
-		si->green_pos = 8;
-		si->blue_size = 8;
-		si->blue_pos = 0;
-		si->rsvd_size = 8;
-		si->rsvd_pos = 24;
-	} else if (pixel_format == PIXEL_BIT_MASK) {
-		find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
-		find_bits(pixel_info.green_mask, &si->green_pos,
-			  &si->green_size);
-		find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
-		find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
-			  &si->rsvd_size);
-		si->lfb_depth = si->red_size + si->green_size +
-			si->blue_size + si->rsvd_size;
-		si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
-	} else {
-		si->lfb_depth = 4;
-		si->lfb_linelength = si->lfb_width / 2;
-		si->red_size = 0;
-		si->red_pos = 0;
-		si->green_size = 0;
-		si->green_pos = 0;
-		si->blue_size = 0;
-		si->blue_pos = 0;
-		si->rsvd_size = 0;
-		si->rsvd_pos = 0;
-	}
-}
-
-static efi_status_t
-__gop_query32(struct efi_graphics_output_protocol_32 *gop32,
-	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u64 *fb_base)
-{
-	struct efi_graphics_output_protocol_mode_32 *mode;
-	efi_status_t status;
-	unsigned long m;
-
-	m = gop32->mode;
-	mode = (struct efi_graphics_output_protocol_mode_32 *)m;
-
-	status = efi_early->call(gop32->query_mode, gop32,
-				 mode->mode, size, info);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	*fb_base = mode->frame_buffer_base;
-	return status;
-}
-
-static efi_status_t
-setup_gop32(struct screen_info *si, efi_guid_t *proto,
-	    unsigned long size, void **gop_handle)
-{
-	struct efi_graphics_output_protocol_32 *gop32, *first_gop;
-	unsigned long nr_gops;
-	u16 width, height;
-	u32 pixels_per_scan_line;
-	u32 ext_lfb_base;
-	u64 fb_base;
-	struct efi_pixel_bitmask pixel_info;
-	int pixel_format;
-	efi_status_t status;
-	u32 *handles = (u32 *)(unsigned long)gop_handle;
-	int i;
-
-	first_gop = NULL;
-	gop32 = NULL;
-
-	nr_gops = size / sizeof(u32);
-	for (i = 0; i < nr_gops; i++) {
-		struct efi_graphics_output_mode_info *info = NULL;
-		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
-		bool conout_found = false;
-		void *dummy = NULL;
-		u32 h = handles[i];
-		u64 current_fb_base;
-
-		status = efi_call_early(handle_protocol, h,
-					proto, (void **)&gop32);
-		if (status != EFI_SUCCESS)
-			continue;
-
-		status = efi_call_early(handle_protocol, h,
-					&conout_proto, &dummy);
-		if (status == EFI_SUCCESS)
-			conout_found = true;
-
-		status = __gop_query32(gop32, &info, &size, &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
-			/*
-			 * Systems that use the UEFI Console Splitter may
-			 * provide multiple GOP devices, not all of which are
-			 * backed by real hardware. The workaround is to search
-			 * for a GOP implementing the ConOut protocol, and if
-			 * one isn't found, to just fall back to the first GOP.
-			 */
-			width = info->horizontal_resolution;
-			height = info->vertical_resolution;
-			pixel_format = info->pixel_format;
-			pixel_info = info->pixel_information;
-			pixels_per_scan_line = info->pixels_per_scan_line;
-			fb_base = current_fb_base;
-
-			/*
-			 * Once we've found a GOP supporting ConOut,
-			 * don't bother looking any further.
-			 */
-			first_gop = gop32;
-			if (conout_found)
-				break;
-		}
-	}
-
-	/* Did we find any GOPs? */
-	if (!first_gop)
-		goto out;
-
-	/* EFI framebuffer */
-	si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
-	si->lfb_width = width;
-	si->lfb_height = height;
-	si->lfb_base = fb_base;
-
-	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
-	if (ext_lfb_base) {
-		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
-		si->ext_lfb_base = ext_lfb_base;
-	}
-
-	si->pages = 1;
-
-	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
-	si->lfb_size = si->lfb_linelength * si->lfb_height;
-
-	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
-	return status;
-}
-
-static efi_status_t
-__gop_query64(struct efi_graphics_output_protocol_64 *gop64,
-	      struct efi_graphics_output_mode_info **info,
-	      unsigned long *size, u64 *fb_base)
-{
-	struct efi_graphics_output_protocol_mode_64 *mode;
-	efi_status_t status;
-	unsigned long m;
-
-	m = gop64->mode;
-	mode = (struct efi_graphics_output_protocol_mode_64 *)m;
-
-	status = efi_early->call(gop64->query_mode, gop64,
-				 mode->mode, size, info);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	*fb_base = mode->frame_buffer_base;
-	return status;
-}
-
-static efi_status_t
-setup_gop64(struct screen_info *si, efi_guid_t *proto,
-	    unsigned long size, void **gop_handle)
-{
-	struct efi_graphics_output_protocol_64 *gop64, *first_gop;
-	unsigned long nr_gops;
-	u16 width, height;
-	u32 pixels_per_scan_line;
-	u32 ext_lfb_base;
-	u64 fb_base;
-	struct efi_pixel_bitmask pixel_info;
-	int pixel_format;
-	efi_status_t status;
-	u64 *handles = (u64 *)(unsigned long)gop_handle;
-	int i;
-
-	first_gop = NULL;
-	gop64 = NULL;
-
-	nr_gops = size / sizeof(u64);
-	for (i = 0; i < nr_gops; i++) {
-		struct efi_graphics_output_mode_info *info = NULL;
-		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
-		bool conout_found = false;
-		void *dummy = NULL;
-		u64 h = handles[i];
-		u64 current_fb_base;
-
-		status = efi_call_early(handle_protocol, h,
-					proto, (void **)&gop64);
-		if (status != EFI_SUCCESS)
-			continue;
-
-		status = efi_call_early(handle_protocol, h,
-					&conout_proto, &dummy);
-		if (status == EFI_SUCCESS)
-			conout_found = true;
-
-		status = __gop_query64(gop64, &info, &size, &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
-			/*
-			 * Systems that use the UEFI Console Splitter may
-			 * provide multiple GOP devices, not all of which are
-			 * backed by real hardware. The workaround is to search
-			 * for a GOP implementing the ConOut protocol, and if
-			 * one isn't found, to just fall back to the first GOP.
-			 */
-			width = info->horizontal_resolution;
-			height = info->vertical_resolution;
-			pixel_format = info->pixel_format;
-			pixel_info = info->pixel_information;
-			pixels_per_scan_line = info->pixels_per_scan_line;
-			fb_base = current_fb_base;
-
-			/*
-			 * Once we've found a GOP supporting ConOut,
-			 * don't bother looking any further.
-			 */
-			first_gop = gop64;
-			if (conout_found)
-				break;
-		}
-	}
-
-	/* Did we find any GOPs? */
-	if (!first_gop)
-		goto out;
-
-	/* EFI framebuffer */
-	si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
-	si->lfb_width = width;
-	si->lfb_height = height;
-	si->lfb_base = fb_base;
-
-	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
-	if (ext_lfb_base) {
-		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
-		si->ext_lfb_base = ext_lfb_base;
-	}
-
-	si->pages = 1;
-
-	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
-	si->lfb_size = si->lfb_linelength * si->lfb_height;
-
-	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
-	return status;
-}
-
-/*
- * See if we have Graphics Output Protocol
- */
-static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
-			      unsigned long size)
-{
-	efi_status_t status;
-	void **gop_handle = NULL;
-
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				size, (void **)&gop_handle);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	status = efi_call_early(locate_handle,
-				EFI_LOCATE_BY_PROTOCOL,
-				proto, NULL, &size, gop_handle);
-	if (status != EFI_SUCCESS)
-		goto free_handle;
-
-	if (efi_early->is64)
-		status = setup_gop64(si, proto, size, gop_handle);
-	else
-		status = setup_gop32(si, proto, size, gop_handle);
-
-free_handle:
-	efi_call_early(free_pool, gop_handle);
-	return status;
-}
-
 static efi_status_t
 setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
 {
@@ -1038,7 +732,7 @@
 				EFI_LOCATE_BY_PROTOCOL,
 				&graphics_proto, NULL, &size, gop_handle);
 	if (status == EFI_BUFFER_TOO_SMALL)
-		status = setup_gop(si, &graphics_proto, size);
+		status = efi_setup_gop(NULL, si, &graphics_proto, size);
 
 	if (status != EFI_SUCCESS) {
 		size = 0;

diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index d487e72..c0223f1 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h

@@ -11,80 +11,6 @@
 
 #define DESC_TYPE_CODE_DATA	(1 << 0)
 
-#define EFI_CONSOLE_OUT_DEVICE_GUID    \
-	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
-		  0x3f, 0xc1, 0x4d)
-
-#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR		0
-#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR		1
-#define PIXEL_BIT_MASK					2
-#define PIXEL_BLT_ONLY					3
-#define PIXEL_FORMAT_MAX				4
-
-struct efi_pixel_bitmask {
-	u32 red_mask;
-	u32 green_mask;
-	u32 blue_mask;
-	u32 reserved_mask;
-};
-
-struct efi_graphics_output_mode_info {
-	u32 version;
-	u32 horizontal_resolution;
-	u32 vertical_resolution;
-	int pixel_format;
-	struct efi_pixel_bitmask pixel_information;
-	u32 pixels_per_scan_line;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_32 {
-	u32 max_mode;
-	u32 mode;
-	u32 info;
-	u32 size_of_info;
-	u64 frame_buffer_base;
-	u32 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_64 {
-	u32 max_mode;
-	u32 mode;
-	u64 info;
-	u64 size_of_info;
-	u64 frame_buffer_base;
-	u64 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode {
-	u32 max_mode;
-	u32 mode;
-	unsigned long info;
-	unsigned long size_of_info;
-	u64 frame_buffer_base;
-	unsigned long frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_32 {
-	u32 query_mode;
-	u32 set_mode;
-	u32 blt;
-	u32 mode;
-};
-
-struct efi_graphics_output_protocol_64 {
-	u64 query_mode;
-	u64 set_mode;
-	u64 blt;
-	u64 mode;
-};
-
-struct efi_graphics_output_protocol {
-	void *query_mode;
-	unsigned long set_mode;
-	unsigned long blt;
-	struct efi_graphics_output_protocol_mode *mode;
-};
-
 struct efi_uga_draw_protocol_32 {
 	u32 get_mode;
 	u32 set_mode;

diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
new file mode 100644
index 0000000..6248740
--- /dev/null
+++ b/arch/x86/boot/compressed/error.c

@@ -0,0 +1,22 @@
+/*
+ * Callers outside of misc.c need access to the error reporting routines,
+ * but the *_putstr() functions need to stay in misc.c because of how
+ * memcpy() and memmove() are defined for the compressed boot environment.
+ */
+#include "misc.h"
+
+void warn(char *m)
+{
+	error_putstr("\n\n");
+	error_putstr(m);
+	error_putstr("\n\n");
+}
+
+void error(char *m)
+{
+	warn(m);
+	error_putstr(" -- System halted");
+
+	while (1)
+		asm("hlt");
+}

diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
new file mode 100644
index 0000000..2e59dac
--- /dev/null
+++ b/arch/x86/boot/compressed/error.h

@@ -0,0 +1,7 @@
+#ifndef BOOT_COMPRESSED_ERROR_H
+#define BOOT_COMPRESSED_ERROR_H
+
+void warn(char *m);
+void error(char *m);
+
+#endif /* BOOT_COMPRESSED_ERROR_H */

diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 0256064..1038524 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S

@@ -176,7 +176,9 @@
 1:
 
 	/* Target address to relocate to for decompression */
-	addl	$z_extract_offset, %ebx
+	movl    BP_init_size(%esi), %eax
+	subl    $_end, %eax
+	addl    %eax, %ebx
 
 	/* Set up the stack */
 	leal	boot_stack_end(%ebx), %esp
@@ -233,24 +235,28 @@
 2:
 
 /*
- * Do the decompression, and jump to the new kernel..
+ * Do the extraction, and jump to the new kernel..
  */
-				/* push arguments for decompress_kernel: */
-	pushl	$z_run_size	/* size of kernel with .bss and .brk */
+				/* push arguments for extract_kernel: */
 	pushl	$z_output_len	/* decompressed length, end of relocs */
-	leal	z_extract_offset_negative(%ebx), %ebp
+
+	movl    BP_init_size(%esi), %eax
+	subl    $_end, %eax
+	movl    %ebx, %ebp
+	subl    %eax, %ebp
 	pushl	%ebp		/* output address */
+
 	pushl	$z_input_len	/* input_len */
 	leal	input_data(%ebx), %eax
 	pushl	%eax		/* input_data */
 	leal	boot_heap(%ebx), %eax
 	pushl	%eax		/* heap area */
 	pushl	%esi		/* real mode pointer */
-	call	decompress_kernel /* returns kernel location in %eax */
-	addl	$28, %esp
+	call	extract_kernel	/* returns kernel location in %eax */
+	addl	$24, %esp
 
 /*
- * Jump to the decompressed kernel.
+ * Jump to the extracted kernel.
  */
 	xorl	%ebx, %ebx
 	jmp	*%eax

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 86558a1..0d80a7a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S

@@ -110,7 +110,9 @@
 1:
 
 	/* Target address to relocate to for decompression */
-	addl	$z_extract_offset, %ebx
+	movl	BP_init_size(%esi), %eax
+	subl	$_end, %eax
+	addl	%eax, %ebx
 
 /*
  * Prepare for entering 64 bit mode
@@ -132,7 +134,7 @@
 	/* Initialize Page tables to 0 */
 	leal	pgtable(%ebx), %edi
 	xorl	%eax, %eax
-	movl	$((4096*6)/4), %ecx
+	movl	$(BOOT_INIT_PGT_SIZE/4), %ecx
 	rep	stosl
 
 	/* Build Level 4 */
@@ -338,7 +340,9 @@
 1:
 
 	/* Target address to relocate to for decompression */
-	leaq	z_extract_offset(%rbp), %rbx
+	movl	BP_init_size(%rsi), %ebx
+	subl	$_end, %ebx
+	addq	%rbp, %rbx
 
 	/* Set up the stack */
 	leaq	boot_stack_end(%rbx), %rsp
@@ -408,19 +412,16 @@
 2:
 	
 /*
- * Do the decompression, and jump to the new kernel..
+ * Do the extraction, and jump to the new kernel..
  */
 	pushq	%rsi			/* Save the real mode argument */
-	movq	$z_run_size, %r9	/* size of kernel with .bss and .brk */
-	pushq	%r9
 	movq	%rsi, %rdi		/* real mode address */
 	leaq	boot_heap(%rip), %rsi	/* malloc area for uncompression */
 	leaq	input_data(%rip), %rdx  /* input_data */
 	movl	$z_input_len, %ecx	/* input_len */
 	movq	%rbp, %r8		/* output target address */
 	movq	$z_output_len, %r9	/* decompressed length, end of relocs */
-	call	decompress_kernel	/* returns kernel location in %rax */
-	popq	%r9
+	call	extract_kernel		/* returns kernel location in %rax */
 	popq	%rsi
 
 /*
@@ -485,4 +486,4 @@
 	.section ".pgtable","a",@nobits
 	.balign 4096
 pgtable:
-	.fill 6*4096, 1, 0
+	.fill BOOT_PGT_SIZE, 1, 0

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
new file mode 100644
index 0000000..cfeb025
--- /dev/null
+++ b/arch/x86/boot/compressed/kaslr.c

@@ -0,0 +1,510 @@
+/*
+ * kaslr.c
+ *
+ * This contains the routines needed to generate a reasonable level of
+ * entropy to choose a randomized kernel base address offset in support
+ * of Kernel Address Space Layout Randomization (KASLR). Additionally
+ * handles walking the physical memory maps (and tracking memory regions
+ * to avoid) in order to select a physical memory location that can
+ * contain the entire properly aligned running kernel image.
+ *
+ */
+#include "misc.h"
+#include "error.h"
+
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+#include <asm/e820.h>
+
+#include <generated/compile.h>
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <generated/utsrelease.h>
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+#define I8254_PORT_CONTROL	0x43
+#define I8254_PORT_COUNTER0	0x40
+#define I8254_CMD_READBACK	0xC0
+#define I8254_SELECT_COUNTER0	0x02
+#define I8254_STATUS_NOTREADY	0x40
+static inline u16 i8254(void)
+{
+	u16 status, timer;
+
+	do {
+		outb(I8254_PORT_CONTROL,
+		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+		status = inb(I8254_PORT_COUNTER0);
+		timer  = inb(I8254_PORT_COUNTER0);
+		timer |= inb(I8254_PORT_COUNTER0) << 8;
+	} while (status & I8254_STATUS_NOTREADY);
+
+	return timer;
+}
+
+static unsigned long rotate_xor(unsigned long hash, const void *area,
+				size_t size)
+{
+	size_t i;
+	unsigned long *ptr = (unsigned long *)area;
+
+	for (i = 0; i < size / sizeof(hash); i++) {
+		/* Rotate by odd number of bits and XOR. */
+		hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+		hash ^= ptr[i];
+	}
+
+	return hash;
+}
+
+/* Attempt to create a simple but unpredictable starting entropy. */
+static unsigned long get_random_boot(void)
+{
+	unsigned long hash = 0;
+
+	hash = rotate_xor(hash, build_str, sizeof(build_str));
+	hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
+
+	return hash;
+}
+
+static unsigned long get_random_long(const char *purpose)
+{
+#ifdef CONFIG_X86_64
+	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
+#else
+	const unsigned long mix_const = 0x3f39e593UL;
+#endif
+	unsigned long raw, random = get_random_boot();
+	bool use_i8254 = true;
+
+	debug_putstr(purpose);
+	debug_putstr(" KASLR using");
+
+	if (has_cpuflag(X86_FEATURE_RDRAND)) {
+		debug_putstr(" RDRAND");
+		if (rdrand_long(&raw)) {
+			random ^= raw;
+			use_i8254 = false;
+		}
+	}
+
+	if (has_cpuflag(X86_FEATURE_TSC)) {
+		debug_putstr(" RDTSC");
+		raw = rdtsc();
+
+		random ^= raw;
+		use_i8254 = false;
+	}
+
+	if (use_i8254) {
+		debug_putstr(" i8254");
+		random ^= i8254();
+	}
+
+	/* Circular multiply for better bit diffusion */
+	asm("mul %3"
+	    : "=a" (random), "=d" (raw)
+	    : "a" (random), "rm" (mix_const));
+	random += raw;
+
+	debug_putstr("...\n");
+
+	return random;
+}
+
+struct mem_vector {
+	unsigned long start;
+	unsigned long size;
+};
+
+enum mem_avoid_index {
+	MEM_AVOID_ZO_RANGE = 0,
+	MEM_AVOID_INITRD,
+	MEM_AVOID_CMDLINE,
+	MEM_AVOID_BOOTPARAMS,
+	MEM_AVOID_MAX,
+};
+
+static struct mem_vector mem_avoid[MEM_AVOID_MAX];
+
+static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
+{
+	/* Item at least partially before region. */
+	if (item->start < region->start)
+		return false;
+	/* Item at least partially after region. */
+	if (item->start + item->size > region->start + region->size)
+		return false;
+	return true;
+}
+
+static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
+{
+	/* Item one is entirely before item two. */
+	if (one->start + one->size <= two->start)
+		return false;
+	/* Item one is entirely after item two. */
+	if (one->start >= two->start + two->size)
+		return false;
+	return true;
+}
+
+/*
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
+ * The mem_avoid array is used to store the ranges that need to be avoided
+ * when KASLR searches for an appropriate random address. We must avoid any
+ * regions that are unsafe to overlap with during decompression, and other
+ * things like the initrd, cmdline and boot_params. This comment seeks to
+ * explain mem_avoid as clearly as possible since incorrect mem_avoid
+ * memory ranges lead to really hard to debug boot failures.
+ *
+ * The initrd, cmdline, and boot_params are trivial to identify for
+ * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
+ * MEM_AVOID_BOOTPARAMS respectively below.
+ *
+ * What is not obvious how to avoid is the range of memory that is used
+ * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
+ * the compressed kernel (ZO) and its run space, which is used to extract
+ * the uncompressed kernel (VO) and relocs.
+ *
+ * ZO's full run size sits against the end of the decompression buffer, so
+ * we can calculate where text, data, bss, etc of ZO are positioned more
+ * easily.
+ *
+ * For additional background, the decompression calculations can be found
+ * in header.S, and the memory diagram is based on the one found in misc.c.
+ *
+ * The following conditions are already enforced by the image layouts and
+ * associated code:
+ *  - input + input_size >= output + output_size
+ *  - kernel_total_size <= init_size
+ *  - kernel_total_size <= output_size (see Note below)
+ *  - output + init_size >= output + output_size
+ *
+ * (Note that kernel_total_size and output_size have no fundamental
+ * relationship, but output_size is passed to choose_random_location
+ * as a maximum of the two. The diagram is showing a case where
+ * kernel_total_size is larger than output_size, but this case is
+ * handled by bumping output_size.)
+ *
+ * The above conditions can be illustrated by a diagram:
+ *
+ * 0   output            input            input+input_size    output+init_size
+ * |     |                 |                             |             |
+ * |     |                 |                             |             |
+ * |-----|--------|--------|--------------|-----------|--|-------------|
+ *                |                       |           |
+ *                |                       |           |
+ * output+init_size-ZO_INIT_SIZE  output+output_size  output+kernel_total_size
+ *
+ * [output, output+init_size) is the entire memory range used for
+ * extracting the compressed image.
+ *
+ * [output, output+kernel_total_size) is the range needed for the
+ * uncompressed kernel (VO) and its run size (bss, brk, etc).
+ *
+ * [output, output+output_size) is VO plus relocs (i.e. the entire
+ * uncompressed payload contained by ZO). This is the area of the buffer
+ * written to during decompression.
+ *
+ * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
+ * range of the copied ZO and decompression code. (i.e. the range
+ * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
+ *
+ * [input, input+input_size) is the original copied compressed image (ZO)
+ * (i.e. it does not include its run size). This range must be avoided
+ * because it contains the data used for decompression.
+ *
+ * [input+input_size, output+init_size) is [_text, _end) for ZO. This
+ * range includes ZO's heap and stack, and must be avoided since it
+ * performs the decompression.
+ *
+ * Since the above two ranges need to be avoided and they are adjacent,
+ * they can be merged, resulting in: [input, output+init_size) which
+ * becomes the MEM_AVOID_ZO_RANGE below.
+ */
+static void mem_avoid_init(unsigned long input, unsigned long input_size,
+			   unsigned long output)
+{
+	unsigned long init_size = boot_params->hdr.init_size;
+	u64 initrd_start, initrd_size;
+	u64 cmd_line, cmd_line_size;
+	char *ptr;
+
+	/*
+	 * Avoid the region that is unsafe to overlap during
+	 * decompression.
+	 */
+	mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
+	mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
+	add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
+			 mem_avoid[MEM_AVOID_ZO_RANGE].size);
+
+	/* Avoid initrd. */
+	initrd_start  = (u64)boot_params->ext_ramdisk_image << 32;
+	initrd_start |= boot_params->hdr.ramdisk_image;
+	initrd_size  = (u64)boot_params->ext_ramdisk_size << 32;
+	initrd_size |= boot_params->hdr.ramdisk_size;
+	mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
+	mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
+	/* No need to set mapping for initrd, it will be handled in VO. */
+
+	/* Avoid kernel command line. */
+	cmd_line  = (u64)boot_params->ext_cmd_line_ptr << 32;
+	cmd_line |= boot_params->hdr.cmd_line_ptr;
+	/* Calculate size of cmd_line. */
+	ptr = (char *)(unsigned long)cmd_line;
+	for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+		;
+	mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+	mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+	add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
+			 mem_avoid[MEM_AVOID_CMDLINE].size);
+
+	/* Avoid boot parameters. */
+	mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
+	mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
+	add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
+			 mem_avoid[MEM_AVOID_BOOTPARAMS].size);
+
+	/* We don't need to set a mapping for setup_data. */
+
+#ifdef CONFIG_X86_VERBOSE_BOOTUP
+	/* Make sure video RAM can be used. */
+	add_identity_map(0, PMD_SIZE);
+#endif
+}
+
+/*
+ * Does this memory vector overlap a known avoided area? If so, record the
+ * overlap region with the lowest address.
+ */
+static bool mem_avoid_overlap(struct mem_vector *img,
+			      struct mem_vector *overlap)
+{
+	int i;
+	struct setup_data *ptr;
+	unsigned long earliest = img->start + img->size;
+	bool is_overlapping = false;
+
+	for (i = 0; i < MEM_AVOID_MAX; i++) {
+		if (mem_overlaps(img, &mem_avoid[i]) &&
+		    mem_avoid[i].start < earliest) {
+			*overlap = mem_avoid[i];
+			is_overlapping = true;
+		}
+	}
+
+	/* Avoid all entries in the setup_data linked list. */
+	ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+	while (ptr) {
+		struct mem_vector avoid;
+
+		avoid.start = (unsigned long)ptr;
+		avoid.size = sizeof(*ptr) + ptr->len;
+
+		if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+			*overlap = avoid;
+			is_overlapping = true;
+		}
+
+		ptr = (struct setup_data *)(unsigned long)ptr->next;
+	}
+
+	return is_overlapping;
+}
+
+static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
+
+struct slot_area {
+	unsigned long addr;
+	int num;
+};
+
+#define MAX_SLOT_AREA 100
+
+static struct slot_area slot_areas[MAX_SLOT_AREA];
+
+static unsigned long slot_max;
+
+static unsigned long slot_area_index;
+
+static void store_slot_info(struct mem_vector *region, unsigned long image_size)
+{
+	struct slot_area slot_area;
+
+	if (slot_area_index == MAX_SLOT_AREA)
+		return;
+
+	slot_area.addr = region->start;
+	slot_area.num = (region->size - image_size) /
+			CONFIG_PHYSICAL_ALIGN + 1;
+
+	if (slot_area.num > 0) {
+		slot_areas[slot_area_index++] = slot_area;
+		slot_max += slot_area.num;
+	}
+}
+
+static void slots_append(unsigned long addr)
+{
+	/* Overflowing the slots list should be impossible. */
+	if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
+		return;
+
+	slots[slot_max++] = addr;
+}
+
+static unsigned long slots_fetch_random(void)
+{
+	/* Handle case of no slots stored. */
+	if (slot_max == 0)
+		return 0;
+
+	return slots[get_random_long("Physical") % slot_max];
+}
+
+static void process_e820_entry(struct e820entry *entry,
+			       unsigned long minimum,
+			       unsigned long image_size)
+{
+	struct mem_vector region, img, overlap;
+
+	/* Skip non-RAM entries. */
+	if (entry->type != E820_RAM)
+		return;
+
+	/* Ignore entries entirely above our maximum. */
+	if (entry->addr >= KERNEL_IMAGE_SIZE)
+		return;
+
+	/* Ignore entries entirely below our minimum. */
+	if (entry->addr + entry->size < minimum)
+		return;
+
+	region.start = entry->addr;
+	region.size = entry->size;
+
+	/* Potentially raise address to minimum location. */
+	if (region.start < minimum)
+		region.start = minimum;
+
+	/* Potentially raise address to meet alignment requirements. */
+	region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+
+	/* Did we raise the address above the bounds of this e820 region? */
+	if (region.start > entry->addr + entry->size)
+		return;
+
+	/* Reduce size by any delta from the original address. */
+	region.size -= region.start - entry->addr;
+
+	/* Reduce maximum size to fit end of image within maximum limit. */
+	if (region.start + region.size > KERNEL_IMAGE_SIZE)
+		region.size = KERNEL_IMAGE_SIZE - region.start;
+
+	/* Walk each aligned slot and check for avoided areas. */
+	for (img.start = region.start, img.size = image_size ;
+	     mem_contains(&region, &img) ;
+	     img.start += CONFIG_PHYSICAL_ALIGN) {
+		if (mem_avoid_overlap(&img, &overlap))
+			continue;
+		slots_append(img.start);
+	}
+}
+
+static unsigned long find_random_phys_addr(unsigned long minimum,
+					   unsigned long image_size)
+{
+	int i;
+	unsigned long addr;
+
+	/* Make sure minimum is aligned. */
+	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+	/* Verify potential e820 positions, appending to slots list. */
+	for (i = 0; i < boot_params->e820_entries; i++) {
+		process_e820_entry(&boot_params->e820_map[i], minimum,
+				   image_size);
+	}
+
+	return slots_fetch_random();
+}
+
+static unsigned long find_random_virt_addr(unsigned long minimum,
+					   unsigned long image_size)
+{
+	unsigned long slots, random_addr;
+
+	/* Make sure minimum is aligned. */
+	minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+	/* Align image_size for easy slot calculations. */
+	image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
+
+	/*
+	 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
+	 * that can hold image_size within the range of minimum to
+	 * KERNEL_IMAGE_SIZE?
+	 */
+	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
+		 CONFIG_PHYSICAL_ALIGN + 1;
+
+	random_addr = get_random_long("Virtual") % slots;
+
+	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
+}
+
+/*
+ * Since this function examines addresses much more numerically,
+ * it takes the input and output pointers as 'unsigned long'.
+ */
+unsigned char *choose_random_location(unsigned long input,
+				      unsigned long input_size,
+				      unsigned long output,
+				      unsigned long output_size)
+{
+	unsigned long choice = output;
+	unsigned long random_addr;
+
+#ifdef CONFIG_HIBERNATION
+	if (!cmdline_find_option_bool("kaslr")) {
+		warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
+		goto out;
+	}
+#else
+	if (cmdline_find_option_bool("nokaslr")) {
+		warn("KASLR disabled: 'nokaslr' on cmdline.");
+		goto out;
+	}
+#endif
+
+	boot_params->hdr.loadflags |= KASLR_FLAG;
+
+	/* Record the various known unsafe memory ranges. */
+	mem_avoid_init(input, input_size, output);
+
+	/* Walk e820 and find a random address. */
+	random_addr = find_random_phys_addr(output, output_size);
+	if (!random_addr) {
+		warn("KASLR disabled: could not find suitable E820 region!");
+		goto out;
+	}
+
+	/* Always enforce the minimum. */
+	if (random_addr < choice)
+		goto out;
+
+	choice = random_addr;
+
+	add_identity_map(choice, output_size);
+
+	/* This actually loads the identity pagetable on x86_64. */
+	finalize_identity_maps();
+out:
+	return (unsigned char *)choice;
+}

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 79dac17..f14db4e 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c

@@ -1,8 +1,10 @@
 /*
  * misc.c
  *
- * This is a collection of several routines from gzip-1.0.3
- * adapted for Linux.
+ * This is a collection of several routines used to extract the kernel
+ * which includes KASLR relocation, decompression, ELF parsing, and
+ * relocation processing. Additionally included are the screen and serial
+ * output functions and related debugging support functions.
  *
  * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
  * puts by Nick Holloway 1993, better puts by Martin Mares 1995
@@ -10,111 +12,37 @@
  */
 
 #include "misc.h"
+#include "error.h"
 #include "../string.h"
-
-/* WARNING!!
- * This code is compiled with -fPIC and it is relocated dynamically
- * at run time, but no relocation processing is performed.
- * This means that it is not safe to place pointers in static structures.
- */
+#include "../voffset.h"
 
 /*
- * Getting to provable safe in place decompression is hard.
- * Worst case behaviours need to be analyzed.
- * Background information:
- *
- * The file layout is:
- *    magic[2]
- *    method[1]
- *    flags[1]
- *    timestamp[4]
- *    extraflags[1]
- *    os[1]
- *    compressed data blocks[N]
- *    crc[4] orig_len[4]
- *
- * resulting in 18 bytes of non compressed data overhead.
- *
- * Files divided into blocks
- * 1 bit (last block flag)
- * 2 bits (block type)
- *
- * 1 block occurs every 32K -1 bytes or when there 50% compression
- * has been achieved. The smallest block type encoding is always used.
- *
- * stored:
- *    32 bits length in bytes.
- *
- * fixed:
- *    magic fixed tree.
- *    symbols.
- *
- * dynamic:
- *    dynamic tree encoding.
- *    symbols.
- *
- *
- * The buffer for decompression in place is the length of the
- * uncompressed data, plus a small amount extra to keep the algorithm safe.
- * The compressed data is placed at the end of the buffer.  The output
- * pointer is placed at the start of the buffer and the input pointer
- * is placed where the compressed data starts.  Problems will occur
- * when the output pointer overruns the input pointer.
- *
- * The output pointer can only overrun the input pointer if the input
- * pointer is moving faster than the output pointer.  A condition only
- * triggered by data whose compressed form is larger than the uncompressed
- * form.
- *
- * The worst case at the block level is a growth of the compressed data
- * of 5 bytes per 32767 bytes.
- *
- * The worst case internal to a compressed block is very hard to figure.
- * The worst case can at least be boundined by having one bit that represents
- * 32764 bytes and then all of the rest of the bytes representing the very
- * very last byte.
- *
- * All of which is enough to compute an amount of extra data that is required
- * to be safe.  To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient.  To avoind problems internal to a
- * block adding an extra 32767 bytes (the worst case uncompressed block size)
- * is sufficient, to ensure that in the worst case the decompressed data for
- * block will stop the byte before the compressed data for a block begins.
- * To avoid problems with the compressed data's meta information an extra 18
- * bytes are needed.  Leading to the formula:
- *
- * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
- *
- * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
- * Adding 32768 instead of 32767 just makes for round numbers.
- * Adding the decompressor_size is necessary as it musht live after all
- * of the data as well.  Last I measured the decompressor is about 14K.
- * 10K of actual data and 4K of bss.
- *
+ * WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically at
+ * run time, but no relocation processing is performed. This means that
+ * it is not safe to place pointers in static structures.
  */
 
-/*
- * gzip declarations
- */
+/* Macros used by the included decompressor code below. */
 #define STATIC		static
 
-#undef memcpy
-
 /*
- * Use a normal definition of memset() from string.c. There are already
+ * Use normal definitions of mem*() from string.c. There are already
  * included header files which expect a definition of memset() and by
  * the time we define memset macro, it is too late.
  */
+#undef memcpy
 #undef memset
 #define memzero(s, n)	memset((s), 0, (n))
+#define memmove		memmove
 
-
-static void error(char *m);
+/* Functions used by the included decompressor code below. */
+void *memmove(void *dest, const void *src, size_t n);
 
 /*
  * This is set up by the setup-routine at boot-time
  */
-struct boot_params *real_mode;		/* Pointer to real-mode data */
+struct boot_params *boot_params;
 
 memptr free_mem_ptr;
 memptr free_mem_end_ptr;
@@ -146,12 +74,16 @@
 #ifdef CONFIG_KERNEL_LZ4
 #include "../../../../lib/decompress_unlz4.c"
 #endif
+/*
+ * NOTE: When adding a new decompressor, please update the analysis in
+ * ../header.S.
+ */
 
 static void scroll(void)
 {
 	int i;
 
-	memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+	memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
 	for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
 		vidmem[i] = ' ';
 }
@@ -184,12 +116,12 @@
 		}
 	}
 
-	if (real_mode->screen_info.orig_video_mode == 0 &&
+	if (boot_params->screen_info.orig_video_mode == 0 &&
 	    lines == 0 && cols == 0)
 		return;
 
-	x = real_mode->screen_info.orig_x;
-	y = real_mode->screen_info.orig_y;
+	x = boot_params->screen_info.orig_x;
+	y = boot_params->screen_info.orig_y;
 
 	while ((c = *s++) != '\0') {
 		if (c == '\n') {
@@ -210,8 +142,8 @@
 		}
 	}
 
-	real_mode->screen_info.orig_x = x;
-	real_mode->screen_info.orig_y = y;
+	boot_params->screen_info.orig_x = x;
+	boot_params->screen_info.orig_y = y;
 
 	pos = (x + cols * y) * 2;	/* Update cursor position */
 	outb(14, vidport);
@@ -237,23 +169,13 @@
 	}
 }
 
-static void error(char *x)
-{
-	error_putstr("\n\n");
-	error_putstr(x);
-	error_putstr("\n\n -- System halted");
-
-	while (1)
-		asm("hlt");
-}
-
 #if CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len)
 {
 	int *reloc;
 	unsigned long delta, map, ptr;
 	unsigned long min_addr = (unsigned long)output;
-	unsigned long max_addr = min_addr + output_len;
+	unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
 
 	/*
 	 * Calculate the delta between where vmlinux was linked to load
@@ -295,7 +217,7 @@
 	 * So we work backwards from the end of the decompressed image.
 	 */
 	for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
-		int extended = *reloc;
+		long extended = *reloc;
 		extended += map;
 
 		ptr = (unsigned long)extended;
@@ -372,9 +294,7 @@
 #else
 			dest = (void *)(phdr->p_paddr);
 #endif
-			memcpy(dest,
-			       output + phdr->p_offset,
-			       phdr->p_filesz);
+			memmove(dest, output + phdr->p_offset, phdr->p_filesz);
 			break;
 		default: /* Ignore other PT_* */ break;
 		}
@@ -383,23 +303,41 @@
 	free(phdrs);
 }
 
-asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
+/*
+ * The compressed kernel image (ZO), has been moved so that its position
+ * is against the end of the buffer used to hold the uncompressed kernel
+ * image (VO) and the execution environment (.bss, .brk), which makes sure
+ * there is room to do the in-place decompression. (See header.S for the
+ * calculations.)
+ *
+ *                             |-----compressed kernel image------|
+ *                             V                                  V
+ * 0                       extract_offset                      +INIT_SIZE
+ * |-----------|---------------|-------------------------|--------|
+ *             |               |                         |        |
+ *           VO__text      startup_32 of ZO          VO__end    ZO__end
+ *             ^                                         ^
+ *             |-------uncompressed kernel image---------|
+ *
+ */
+asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 				  unsigned char *input_data,
 				  unsigned long input_len,
 				  unsigned char *output,
-				  unsigned long output_len,
-				  unsigned long run_size)
+				  unsigned long output_len)
 {
+	const unsigned long kernel_total_size = VO__end - VO__text;
 	unsigned char *output_orig = output;
 
-	real_mode = rmode;
+	/* Retain x86 boot parameters pointer passed from startup_32/64. */
+	boot_params = rmode;
 
-	/* Clear it for solely in-kernel use */
-	real_mode->hdr.loadflags &= ~KASLR_FLAG;
+	/* Clear flags intended for solely in-kernel use. */
+	boot_params->hdr.loadflags &= ~KASLR_FLAG;
 
-	sanitize_boot_params(real_mode);
+	sanitize_boot_params(boot_params);
 
-	if (real_mode->screen_info.orig_video_mode == 7) {
+	if (boot_params->screen_info.orig_video_mode == 7) {
 		vidmem = (char *) 0xb0000;
 		vidport = 0x3b4;
 	} else {
@@ -407,11 +345,11 @@
 		vidport = 0x3d4;
 	}
 
-	lines = real_mode->screen_info.orig_video_lines;
-	cols = real_mode->screen_info.orig_video_cols;
+	lines = boot_params->screen_info.orig_video_lines;
+	cols = boot_params->screen_info.orig_video_cols;
 
 	console_init();
-	debug_putstr("early console in decompress_kernel\n");
+	debug_putstr("early console in extract_kernel\n");
 
 	free_mem_ptr     = heap;	/* Heap */
 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
@@ -421,16 +359,16 @@
 	debug_putaddr(input_len);
 	debug_putaddr(output);
 	debug_putaddr(output_len);
-	debug_putaddr(run_size);
+	debug_putaddr(kernel_total_size);
 
 	/*
 	 * The memory hole needed for the kernel is the larger of either
 	 * the entire decompressed kernel plus relocation table, or the
 	 * entire decompressed kernel plus .bss and .brk sections.
 	 */
-	output = choose_kernel_location(real_mode, input_data, input_len, output,
-					output_len > run_size ? output_len
-							      : run_size);
+	output = choose_random_location((unsigned long)input_data, input_len,
+					(unsigned long)output,
+					max(output_len, kernel_total_size));
 
 	/* Validate memory location choices. */
 	if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))

diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 3783dc3..b6fec1f 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h

@@ -32,7 +32,7 @@
 /* misc.c */
 extern memptr free_mem_ptr;
 extern memptr free_mem_end_ptr;
-extern struct boot_params *real_mode;		/* Pointer to real-mode data */
+extern struct boot_params *boot_params;
 void __putstr(const char *s);
 void __puthex(unsigned long value);
 #define error_putstr(__x)  __putstr(__x)
@@ -66,26 +66,35 @@
 
 
 #if CONFIG_RANDOMIZE_BASE
-/* aslr.c */
-unsigned char *choose_kernel_location(struct boot_params *boot_params,
-				      unsigned char *input,
+/* kaslr.c */
+unsigned char *choose_random_location(unsigned long input_ptr,
 				      unsigned long input_size,
-				      unsigned char *output,
+				      unsigned long output_ptr,
 				      unsigned long output_size);
 /* cpuflags.c */
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(struct boot_params *boot_params,
-				      unsigned char *input,
+unsigned char *choose_random_location(unsigned long input_ptr,
 				      unsigned long input_size,
-				      unsigned char *output,
+				      unsigned long output_ptr,
 				      unsigned long output_size)
 {
-	return output;
+	return (unsigned char *)output_ptr;
 }
 #endif
 
+#ifdef CONFIG_X86_64
+void add_identity_map(unsigned long start, unsigned long size);
+void finalize_identity_maps(void);
+extern unsigned char _pgtable[];
+#else
+static inline void add_identity_map(unsigned long start, unsigned long size)
+{ }
+static inline void finalize_identity_maps(void)
+{ }
+#endif
+
 #ifdef CONFIG_EARLY_PRINTK
 /* early_serial_console.c */
 extern int early_serial_base;

diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index d8222f2..72bad2c 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c

@@ -18,11 +18,10 @@
  *
  *  H. Peter Anvin <hpa@linux.intel.com>
  *
- * ----------------------------------------------------------------------- */
-
-/*
- * Compute the desired load offset from a compressed program; outputs
- * a small assembly wrapper with the appropriate symbols defined.
+ * -----------------------------------------------------------------------
+ *
+ * Outputs a small assembly wrapper with the appropriate symbols defined.
+ *
  */
 
 #include <stdlib.h>
@@ -35,14 +34,11 @@
 {
 	uint32_t olen;
 	long ilen;
-	unsigned long offs;
-	unsigned long run_size;
 	FILE *f = NULL;
 	int retval = 1;
 
-	if (argc < 3) {
-		fprintf(stderr, "Usage: %s compressed_file run_size\n",
-				argv[0]);
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
 		goto bail;
 	}
 
@@ -67,29 +63,11 @@
 	ilen = ftell(f);
 	olen = get_unaligned_le32(&olen);
 
-	/*
-	 * Now we have the input (compressed) and output (uncompressed)
-	 * sizes, compute the necessary decompression offset...
-	 */
-
-	offs = (olen > ilen) ? olen - ilen : 0;
-	offs += olen >> 12;	/* Add 8 bytes for each 32K block */
-	offs += 64*1024 + 128;	/* Add 64K + 128 bytes slack */
-	offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
-	run_size = atoi(argv[2]);
-
 	printf(".section \".rodata..compressed\",\"a\",@progbits\n");
 	printf(".globl z_input_len\n");
 	printf("z_input_len = %lu\n", ilen);
 	printf(".globl z_output_len\n");
 	printf("z_output_len = %lu\n", (unsigned long)olen);
-	printf(".globl z_extract_offset\n");
-	printf("z_extract_offset = 0x%lx\n", offs);
-	/* z_extract_offset_negative allows simplification of head_32.S */
-	printf(".globl z_extract_offset_negative\n");
-	printf("z_extract_offset_negative = -0x%lx\n", offs);
-	printf(".globl z_run_size\n");
-	printf("z_run_size = %lu\n", run_size);
 
 	printf(".globl input_data, input_data_end\n");
 	printf("input_data:\n");

diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
new file mode 100644
index 0000000..34b95df
--- /dev/null
+++ b/arch/x86/boot/compressed/pagetable.c

@@ -0,0 +1,129 @@
+/*
+ * This code is used on x86_64 to create page table identity mappings on
+ * demand by building up a new set of page tables (or appending to the
+ * existing ones), and then switching over to them when ready.
+ */
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))
+
+#include "misc.h"
+
+/* These actually do the work of building the kernel identity maps. */
+#include <asm/init.h>
+#include <asm/pgtable.h>
+#include "../../mm/ident_map.c"
+
+/* Used by pgtable.h asm code to force instruction serialization. */
+unsigned long __force_order;
+
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+	unsigned char *pgt_buf;
+	unsigned long pgt_buf_size;
+	unsigned long pgt_buf_offset;
+};
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+static void *alloc_pgt_page(void *context)
+{
+	struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+	unsigned char *entry;
+
+	/* Validate there is space available for a new page. */
+	if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+		debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+		debug_putaddr(pages->pgt_buf_offset);
+		debug_putaddr(pages->pgt_buf_size);
+		return NULL;
+	}
+
+	entry = pages->pgt_buf + pages->pgt_buf_offset;
+	pages->pgt_buf_offset += PAGE_SIZE;
+
+	return entry;
+}
+
+/* Used to track our allocated page tables. */
+static struct alloc_pgt_data pgt_data;
+
+/* The top level page table entry pointer. */
+static unsigned long level4p;
+
+/* Locates and clears a region for a new top level page table. */
+static void prepare_level4(void)
+{
+	/*
+	 * It should be impossible for this not to already be true,
+	 * but since calling this a second time would rewind the other
+	 * counters, let's just make sure this is reset too.
+	 */
+	pgt_data.pgt_buf_offset = 0;
+
+	/*
+	 * If we came here via startup_32(), cr3 will be _pgtable already
+	 * and we must append to the existing area instead of entirely
+	 * overwriting it.
+	 */
+	level4p = read_cr3();
+	if (level4p == (unsigned long)_pgtable) {
+		debug_putstr("booted via startup_32()\n");
+		pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+		pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+	} else {
+		debug_putstr("booted via startup_64()\n");
+		pgt_data.pgt_buf = _pgtable;
+		pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+		memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+		level4p = (unsigned long)alloc_pgt_page(&pgt_data);
+	}
+}
+
+/*
+ * Adds the specified range to what will become the new identity mappings.
+ * Once all ranges have been added, the new mapping is activated by calling
+ * finalize_identity_maps() below.
+ */
+void add_identity_map(unsigned long start, unsigned long size)
+{
+	struct x86_mapping_info mapping_info = {
+		.alloc_pgt_page	= alloc_pgt_page,
+		.context	= &pgt_data,
+		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
+	};
+	unsigned long end = start + size;
+
+	/* Make sure we have a top level page table ready to use. */
+	if (!level4p)
+		prepare_level4();
+
+	/* Align boundary to 2M. */
+	start = round_down(start, PMD_SIZE);
+	end = round_up(end, PMD_SIZE);
+	if (start >= end)
+		return;
+
+	/* Build the mapping. */
+	kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
+				  start, end);
+}
+
+/*
+ * This switches the page tables to the new level4 that has been built
+ * via calls to add_identity_map() above. If booted via startup_32(),
+ * this is effectively a no-op.
+ */
+void finalize_identity_maps(void)
+{
+	write_cr3(level4p);
+}

diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 00e788b..cea140c 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c

@@ -1,7 +1,16 @@
+/*
+ * This provides an optimized implementation of memcpy, and a simplified
+ * implementation of memset and memmove. These are used here because the
+ * standard kernel runtime versions are not yet available and we don't
+ * trust the gcc built-in implementations as they may do unexpected things
+ * (e.g. FPU ops) in the minimal decompression stub execution environment.
+ */
+#include "error.h"
+
 #include "../string.c"
 
 #ifdef CONFIG_X86_32
-void *memcpy(void *dest, const void *src, size_t n)
+static void *__memcpy(void *dest, const void *src, size_t n)
 {
 	int d0, d1, d2;
 	asm volatile(
@@ -15,7 +24,7 @@
 	return dest;
 }
 #else
-void *memcpy(void *dest, const void *src, size_t n)
+static void *__memcpy(void *dest, const void *src, size_t n)
 {
 	long d0, d1, d2;
 	asm volatile(
@@ -39,3 +48,27 @@
 		ss[i] = c;
 	return s;
 }
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	unsigned char *d = dest;
+	const unsigned char *s = src;
+
+	if (d <= s || d - s >= n)
+		return __memcpy(dest, src, n);
+
+	while (n-- > 0)
+		d[n] = s[n];
+
+	return dest;
+}
+
+/* Detect and warn about potential overlaps, but handle them with memmove. */
+void *memcpy(void *dest, const void *src, size_t n)
+{
+	if (dest > src && dest - src < n) {
+		warn("Avoiding potentially unsafe overlapping memcpy()!");
+		return memmove(dest, src, n);
+	}
+	return __memcpy(dest, src, n);
+}

diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 34d047c..e24e0a0 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S

@@ -70,5 +70,6 @@
 		_epgtable = . ;
 	}
 #endif
+	. = ALIGN(PAGE_SIZE);	/* keep ZO size page aligned */
 	_end = .;
 }

diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
index 45a0768..f0b8d6d 100644
--- a/arch/x86/boot/early_serial_console.c
+++ b/arch/x86/boot/early_serial_console.c

@@ -1,3 +1,7 @@
+/*
+ * Serial port routines for use during early boot reporting. This code is
+ * included from both the compressed kernel and the regular kernel.
+ */
 #include "boot.h"
 
 #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 6236b9e..3dd5be3 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S

@@ -440,13 +440,116 @@
 
 pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 
-#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_z_extract_offset)
+#
+# Getting to provably safe in-place decompression is hard. Worst case
+# behaviours need to be analyzed. Here let's take the decompression of
+# a gzip-compressed kernel as example, to illustrate it:
+#
+# The file layout of gzip compressed kernel is:
+#
+#    magic[2]
+#    method[1]
+#    flags[1]
+#    timestamp[4]
+#    extraflags[1]
+#    os[1]
+#    compressed data blocks[N]
+#    crc[4] orig_len[4]
+#
+# ... resulting in +18 bytes overhead of uncompressed data.
+#
+# (For more information, please refer to RFC 1951 and RFC 1952.)
+#
+# Files divided into blocks
+# 1 bit (last block flag)
+# 2 bits (block type)
+#
+# 1 block occurs every 32K -1 bytes or when there 50% compression
+# has been achieved. The smallest block type encoding is always used.
+#
+# stored:
+#    32 bits length in bytes.
+#
+# fixed:
+#    magic fixed tree.
+#    symbols.
+#
+# dynamic:
+#    dynamic tree encoding.
+#    symbols.
+#
+#
+# The buffer for decompression in place is the length of the uncompressed
+# data, plus a small amount extra to keep the algorithm safe. The
+# compressed data is placed at the end of the buffer.  The output pointer
+# is placed at the start of the buffer and the input pointer is placed
+# where the compressed data starts. Problems will occur when the output
+# pointer overruns the input pointer.
+#
+# The output pointer can only overrun the input pointer if the input
+# pointer is moving faster than the output pointer.  A condition only
+# triggered by data whose compressed form is larger than the uncompressed
+# form.
+#
+# The worst case at the block level is a growth of the compressed data
+# of 5 bytes per 32767 bytes.
+#
+# The worst case internal to a compressed block is very hard to figure.
+# The worst case can at least be bounded by having one bit that represents
+# 32764 bytes and then all of the rest of the bytes representing the very
+# very last byte.
+#
+# All of which is enough to compute an amount of extra data that is required
+# to be safe.  To avoid problems at the block level allocating 5 extra bytes
+# per 32767 bytes of data is sufficient.  To avoid problems internal to a
+# block adding an extra 32767 bytes (the worst case uncompressed block size)
+# is sufficient, to ensure that in the worst case the decompressed data for
+# block will stop the byte before the compressed data for a block begins.
+# To avoid problems with the compressed data's meta information an extra 18
+# bytes are needed.  Leading to the formula:
+#
+# extra_bytes = (uncompressed_size >> 12) + 32768 + 18
+#
+# Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+# Adding 32768 instead of 32767 just makes for round numbers.
+#
+# Above analysis is for decompressing gzip compressed kernel only. Up to
+# now 6 different decompressor are supported all together. And among them
+# xz stores data in chunks and has maximum chunk of 64K. Hence safety
+# margin should be updated to cover all decompressors so that we don't
+# need to deal with each of them separately. Please check
+# the description in lib/decompressor_xxx.c for specific information.
+#
+# extra_bytes = (uncompressed_size >> 12) + 65536 + 128
+
+#define ZO_z_extra_bytes	((ZO_z_output_len >> 12) + 65536 + 128)
+#if ZO_z_output_len > ZO_z_input_len
+# define ZO_z_extract_offset	(ZO_z_output_len + ZO_z_extra_bytes - \
+				 ZO_z_input_len)
+#else
+# define ZO_z_extract_offset	ZO_z_extra_bytes
+#endif
+
+/*
+ * The extract_offset has to be bigger than ZO head section. Otherwise when
+ * the head code is running to move ZO to the end of the buffer, it will
+ * overwrite the head code itself.
+ */
+#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset
+# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095)
+#else
+# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095)
+#endif
+
+#define ZO_INIT_SIZE	(ZO__end - ZO_startup_32 + ZO_z_min_extract_offset)
+
 #define VO_INIT_SIZE	(VO__end - VO__text)
 #if ZO_INIT_SIZE > VO_INIT_SIZE
-#define INIT_SIZE ZO_INIT_SIZE
+# define INIT_SIZE ZO_INIT_SIZE
 #else
-#define INIT_SIZE VO_INIT_SIZE
+# define INIT_SIZE VO_INIT_SIZE
 #endif
+
 init_size:		.long INIT_SIZE		# kernel initialization size
 handover_offset:	.long 0			# Filled in by build.c
 

diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4f404a6..0c8d796 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig

@@ -173,6 +173,7 @@
 CONFIG_NET_TULIP=y
 CONFIG_E100=y
 CONFIG_E1000=y
+CONFIG_E1000E=y
 CONFIG_SKY2=y
 CONFIG_FORCEDETH=y
 CONFIG_8139TOO=y

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 064c7e2..5b7fa14 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c

@@ -1477,7 +1477,7 @@
 	}
 	aesni_ctr_enc_tfm = aesni_ctr_enc;
 #ifdef CONFIG_AS_AVX
-	if (cpu_has_avx) {
+	if (boot_cpu_has(X86_FEATURE_AVX)) {
 		/* optimize performance of ctr mode encryption transform */
 		aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
 		pr_info("AES CTR mode by8 optimization enabled\n");

diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index d844569..60907c1 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c

@@ -562,7 +562,10 @@
 {
 	const char *feature_name;
 
-	if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+	if (!boot_cpu_has(X86_FEATURE_AVX) ||
+	    !boot_cpu_has(X86_FEATURE_AVX2) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX2 or AES-NI instructions are not detected.\n");
 		return -ENODEV;
 	}

diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 93d8f29..d96429d 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c

@@ -554,7 +554,9 @@
 {
 	const char *feature_name;
 
-	if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+	if (!boot_cpu_has(X86_FEATURE_AVX) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX or AES-NI instructions are not detected.\n");
 		return -ENODEV;
 	}

diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 8baaff5..2d5c2e0b 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c

@@ -129,7 +129,8 @@
 		return -ENODEV;
 
 #ifdef CONFIG_AS_AVX2
-	chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+	chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+			    boot_cpu_has(X86_FEATURE_AVX2) &&
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 #endif
 	return crypto_register_alg(&alg);

diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4264a3d..e32142b 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c

@@ -179,11 +179,12 @@
 
 static int __init poly1305_simd_mod_init(void)
 {
-	if (!cpu_has_xmm2)
+	if (!boot_cpu_has(X86_FEATURE_XMM2))
 		return -ENODEV;
 
 #ifdef CONFIG_AS_AVX2
-	poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+	poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+			    boot_cpu_has(X86_FEATURE_AVX2) &&
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
 	if (poly1305_use_avx2)

diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 6d19834..870f6d8 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c

@@ -538,7 +538,7 @@
 {
 	const char *feature_name;
 
-	if (!cpu_has_avx2 || !cpu_has_osxsave) {
+	if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
 		pr_info("AVX2 instructions are not detected.\n");
 		return -ENODEV;
 	}

diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 8943407..644f97a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c

@@ -600,7 +600,7 @@
 
 static int __init serpent_sse2_init(void)
 {
-	if (!cpu_has_xmm2) {
+	if (!boot_cpu_has(X86_FEATURE_XMM2)) {
 		printk(KERN_INFO "SSE2 instructions are not detected.\n");
 		return -ENODEV;
 	}

diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index 081255c..9c5af33 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c

@@ -102,14 +102,14 @@
 static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state);
 static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state);
 
-inline void sha1_init_digest(uint32_t *digest)
+static inline void sha1_init_digest(uint32_t *digest)
 {
 	static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
 					SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
 	memcpy(digest, initial_digest, sizeof(initial_digest));
 }
 
-inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
+static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
 			 uint32_t total_len)
 {
 	uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);

diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index dd14616..1024e37 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c

@@ -166,7 +166,7 @@
 static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (cpu_has_avx)
+		if (boot_cpu_has(X86_FEATURE_AVX))
 			pr_info("AVX detected but unusable.\n");
 		return false;
 	}

diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 5f4d608..3ae0f43 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c

@@ -201,7 +201,7 @@
 static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (cpu_has_avx)
+		if (boot_cpu_has(X86_FEATURE_AVX))
 			pr_info("AVX detected but unusable.\n");
 		return false;
 	}

diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 34e5083..0b17c83 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c

@@ -151,7 +151,7 @@
 static bool avx_usable(void)
 {
 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-		if (cpu_has_avx)
+		if (boot_cpu_has(X86_FEATURE_AVX))
 			pr_info("AVX detected but unusable.\n");
 		return false;
 	}

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d..ec138e5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c

@@ -191,7 +191,7 @@
 
 long syscall_trace_enter(struct pt_regs *regs)
 {
-	u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 	unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
 
 	if (phase1_result == 0)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 10868aa..983e5d3 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S

@@ -207,10 +207,7 @@
 ENTRY(ret_from_fork)
 	pushl	%eax
 	call	schedule_tail
-	GET_THREAD_INFO(%ebp)
 	popl	%eax
-	pushl	$0x0202				# Reset kernel eflags
-	popfl
 
 	/* When we fork, we trace the syscall return in the child, too. */
 	movl    %esp, %eax
@@ -221,10 +218,7 @@
 ENTRY(ret_from_kernel_thread)
 	pushl	%eax
 	call	schedule_tail
-	GET_THREAD_INFO(%ebp)
 	popl	%eax
-	pushl	$0x0202				# Reset kernel eflags
-	popfl
 	movl	PT_EBP(%esp), %eax
 	call	*PT_EBX(%esp)
 	movl	$0, PT_EAX(%esp)
@@ -251,7 +245,6 @@
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
 ret_from_intr:
-	GET_THREAD_INFO(%ebp)
 #ifdef CONFIG_VM86
 	movl	PT_EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb	PT_CS(%esp), %al

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 858b555..9ee0da1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S

@@ -372,9 +372,6 @@
 ENTRY(ret_from_fork)
 	LOCK ; btr $TIF_FORK, TI_flags(%r8)
 
-	pushq	$0x0002
-	popfq					/* reset kernel eflags */
-
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
 	testb	$3, CS(%rsp)			/* from kernel_thread? */
@@ -781,19 +778,25 @@
 	pushfq
 	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
 	SWAPGS
-gs_change:
+.Lgs_change:
 	movl	%edi, %gs
-2:	mfence					/* workaround */
+2:	ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
 	SWAPGS
 	popfq
 	ret
 END(native_load_gs_index)
 
-	_ASM_EXTABLE(gs_change, bad_gs)
+	_ASM_EXTABLE(.Lgs_change, bad_gs)
 	.section .fixup, "ax"
 	/* running with kernelgs */
 bad_gs:
 	SWAPGS					/* switch back to user gs */
+.macro ZAP_GS
+	/* This can't be a string because the preprocessor needs to see it. */
+	movl $__USER_DS, %eax
+	movl %eax, %gs
+.endm
+	ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
 	xorl	%eax, %eax
 	movl	%eax, %gs
 	jmp	2b
@@ -1019,13 +1022,13 @@
 	movl	%ecx, %eax			/* zero extend */
 	cmpq	%rax, RIP+8(%rsp)
 	je	.Lbstep_iret
-	cmpq	$gs_change, RIP+8(%rsp)
+	cmpq	$.Lgs_change, RIP+8(%rsp)
 	jne	.Lerror_entry_done
 
 	/*
-	 * hack: gs_change can fail with user gsbase.  If this happens, fix up
+	 * hack: .Lgs_change can fail with user gsbase.  If this happens, fix up
 	 * gsbase and proceed.  We'll fix up the exception and land in
-	 * gs_change's error handler with kernel gsbase.
+	 * .Lgs_change's error handler with kernel gsbase.
 	 */
 	jmp	.Lerror_entry_from_usermode_swapgs
 

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 847f2f0..e1721da 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S

@@ -72,24 +72,23 @@
 	pushfq				/* pt_regs->flags (except IF = 0) */
 	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
 	pushq	$__USER32_CS		/* pt_regs->cs */
-	xorq    %r8,%r8
-	pushq	%r8			/* pt_regs->ip = 0 (placeholder) */
+	pushq	$0			/* pt_regs->ip = 0 (placeholder) */
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	pushq   %r8                     /* pt_regs->r12 = 0 */
-	pushq   %r8                     /* pt_regs->r13 = 0 */
-	pushq   %r8                     /* pt_regs->r14 = 0 */
-	pushq   %r8                     /* pt_regs->r15 = 0 */
+	pushq   $0			/* pt_regs->r12 = 0 */
+	pushq   $0			/* pt_regs->r13 = 0 */
+	pushq   $0			/* pt_regs->r14 = 0 */
+	pushq   $0			/* pt_regs->r15 = 0 */
 	cld
 
 	/*
@@ -205,17 +204,16 @@
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	xorq    %r8,%r8
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
-	pushq   %r8                     /* pt_regs->r12 = 0 */
-	pushq   %r8                     /* pt_regs->r13 = 0 */
-	pushq   %r8                     /* pt_regs->r14 = 0 */
-	pushq   %r8                     /* pt_regs->r15 = 0 */
+	pushq   $0			/* pt_regs->r12 = 0 */
+	pushq   $0			/* pt_regs->r13 = 0 */
+	pushq   $0			/* pt_regs->r14 = 0 */
+	pushq   $0			/* pt_regs->r15 = 0 */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -316,11 +314,10 @@
 	pushq	%rdx			/* pt_regs->dx */
 	pushq	%rcx			/* pt_regs->cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
-	xorq    %r8,%r8
-	pushq   %r8                     /* pt_regs->r8  = 0 */
-	pushq   %r8                     /* pt_regs->r9  = 0 */
-	pushq   %r8                     /* pt_regs->r10 = 0 */
-	pushq   %r8                     /* pt_regs->r11 = 0 */
+	pushq   $0			/* pt_regs->r8  = 0 */
+	pushq   $0			/* pt_regs->r9  = 0 */
+	pushq   $0			/* pt_regs->r10 = 0 */
+	pushq   $0			/* pt_regs->r11 = 0 */
 	pushq   %rbx                    /* pt_regs->rbx */
 	pushq   %rbp                    /* pt_regs->rbp */
 	pushq   %r12                    /* pt_regs->r12 */

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index cac6d17..555263e 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl

@@ -374,3 +374,5 @@
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat/ptregs
+534	x32	preadv2			compat_sys_preadv2
+535	x32	pwritev2		compat_sys_pwritev2

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 03c3eb7..2f02d23 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c

@@ -13,7 +13,6 @@
 
 #include <uapi/linux/time.h>
 #include <asm/vgtod.h>
-#include <asm/hpet.h>
 #include <asm/vvar.h>
 #include <asm/unistd.h>
 #include <asm/msr.h>
@@ -28,16 +27,6 @@
 extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
 extern time_t __vdso_time(time_t *t);
 
-#ifdef CONFIG_HPET_TIMER
-extern u8 hpet_page
-	__attribute__((visibility("hidden")));
-
-static notrace cycle_t vread_hpet(void)
-{
-	return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
-}
-#endif
-
 #ifdef CONFIG_PARAVIRT_CLOCK
 extern u8 pvclock_page
 	__attribute__((visibility("hidden")));
@@ -195,10 +184,6 @@
 
 	if (gtod->vclock_mode == VCLOCK_TSC)
 		cycles = vread_tsc();
-#ifdef CONFIG_HPET_TIMER
-	else if (gtod->vclock_mode == VCLOCK_HPET)
-		cycles = vread_hpet();
-#endif
 #ifdef CONFIG_PARAVIRT_CLOCK
 	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
 		cycles = vread_pvclock(mode);

diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 4158acc..a708aa9 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S

@@ -25,7 +25,7 @@
 	 * segment.
 	 */
 
-	vvar_start = . - 3 * PAGE_SIZE;
+	vvar_start = . - 2 * PAGE_SIZE;
 	vvar_page = vvar_start;
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
@@ -35,8 +35,7 @@
 #undef __VVAR_KERNEL_LDS
 #undef EMIT_VVAR
 
-	hpet_page = vvar_start + PAGE_SIZE;
-	pvclock_page = vvar_start + 2 * PAGE_SIZE;
+	pvclock_page = vvar_start + PAGE_SIZE;
 
 	. = SIZEOF_HEADERS;
 

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f7045..b3cf813 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c

@@ -18,7 +18,6 @@
 #include <asm/vdso.h>
 #include <asm/vvar.h>
 #include <asm/page.h>
-#include <asm/hpet.h>
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 
@@ -129,16 +128,6 @@
 	if (sym_offset == image->sym_vvar_page) {
 		ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
 				    __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
-	} else if (sym_offset == image->sym_hpet_page) {
-#ifdef CONFIG_HPET_TIMER
-		if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
-			ret = vm_insert_pfn_prot(
-				vma,
-				(unsigned long)vmf->virtual_address,
-				hpet_address >> PAGE_SHIFT,
-				pgprot_noncached(PAGE_READONLY));
-		}
-#endif
 	} else if (sym_offset == image->sym_pvclock_page) {
 		struct pvclock_vsyscall_time_info *pvti =
 			pvclock_pvti_cpu0_va();

diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
new file mode 100644
index 0000000..98397db
--- /dev/null
+++ b/arch/x86/events/Kconfig

@@ -0,0 +1,36 @@
+menu "Performance monitoring"
+
+config PERF_EVENTS_INTEL_UNCORE
+	tristate "Intel uncore performance events"
+	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+	default y
+	---help---
+	Include support for Intel uncore performance events. These are
+	available on NehalemEX and more modern processors.
+
+config PERF_EVENTS_INTEL_RAPL
+	tristate "Intel rapl performance events"
+	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+	default y
+	---help---
+	Include support for Intel rapl performance events for power
+	monitoring on modern processors.
+
+config PERF_EVENTS_INTEL_CSTATE
+	tristate "Intel cstate performance events"
+	depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+	default y
+	---help---
+	Include support for Intel cstate performance events for power
+	monitoring on modern processors.
+
+config PERF_EVENTS_AMD_POWER
+	depends on PERF_EVENTS && CPU_SUP_AMD
+	tristate "AMD Processor Power Reporting Mechanism"
+	---help---
+	  Provide power reporting mechanism support for AMD processors.
+	  Currently, it leverages X86_FEATURE_ACC_POWER
+	  (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+	  average power consumption on Family 15h processors.
+
+endmenu

diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index f59618a..1d392c3 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile

@@ -6,9 +6,6 @@
 ifdef CONFIG_AMD_IOMMU
 obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
 endif
-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/core.o intel/bts.o intel/cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/cstate.o intel/ds.o intel/knc.o 
-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/rapl.o msr.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore.o intel/uncore_nhmex.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel/uncore_snb.o intel/uncore_snbep.o
+
+obj-$(CONFIG_CPU_SUP_INTEL)		+= msr.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 3db9569..98ac573 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c

@@ -263,6 +263,7 @@
 };
 
 static struct pmu amd_nb_pmu = {
+	.task_ctx_nr	= perf_invalid_context,
 	.attr_groups	= amd_uncore_attr_groups,
 	.name		= "amd_nb",
 	.event_init	= amd_uncore_event_init,
@@ -274,6 +275,7 @@
 };
 
 static struct pmu amd_l2_pmu = {
+	.task_ctx_nr	= perf_invalid_context,
 	.attr_groups	= amd_uncore_attr_groups,
 	.name		= "amd_l2",
 	.event_init	= amd_uncore_event_init,

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 041e442..73a75aa 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c

@@ -360,6 +360,9 @@
 {
 	int i;
 
+	if (x86_pmu.lbr_pt_coexist)
+		return 0;
+
 	if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
 		mutex_lock(&pmc_reserve_mutex);
 		for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
@@ -380,6 +383,9 @@
 
 void x86_del_exclusive(unsigned int what)
 {
+	if (x86_pmu.lbr_pt_coexist)
+		return;
+
 	atomic_dec(&x86_pmu.lbr_exclusive[what]);
 	atomic_dec(&active_events);
 }
@@ -1518,7 +1524,7 @@
 
 static void __init pmu_check_apic(void)
 {
-	if (cpu_has_apic)
+	if (boot_cpu_has(X86_FEATURE_APIC))
 		return;
 
 	x86_pmu.apic = 0;
@@ -2177,7 +2183,7 @@
 	 * cap_user_time_zero doesn't make sense when we're using a different
 	 * time base for the records.
 	 */
-	if (event->clock == &local_clock) {
+	if (!event->attr.use_clockid) {
 		userpg->cap_user_time_zero = 1;
 		userpg->time_zero = data->cyc2ns_offset;
 	}
@@ -2277,7 +2283,7 @@
 
 	fp = compat_ptr(ss_base + regs->bp);
 	pagefault_disable();
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+	while (entry->nr < sysctl_perf_event_max_stack) {
 		unsigned long bytes;
 		frame.next_frame     = 0;
 		frame.return_address = 0;
@@ -2337,7 +2343,7 @@
 		return;
 
 	pagefault_disable();
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+	while (entry->nr < sysctl_perf_event_max_stack) {
 		unsigned long bytes;
 		frame.next_frame	     = NULL;
 		frame.return_address = 0;

diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
new file mode 100644
index 0000000..3660b2c
--- /dev/null
+++ b/arch/x86/events/intel/Makefile

@@ -0,0 +1,9 @@
+obj-$(CONFIG_CPU_SUP_INTEL)		+= core.o bts.o cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= ds.o knc.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= lbr.o p4.o p6.o pt.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= intel-rapl.o
+intel-rapl-objs				:= rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= intel-uncore.o
+intel-uncore-objs			:= uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE)	+= intel-cstate.o
+intel-cstate-objs			:= cstate.o

diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index b99dc92..0a6e393 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c

@@ -171,18 +171,6 @@
 	memset(page_address(phys->page) + index, 0, phys->size - index);
 }
 
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
-	if (buf->snapshot)
-		return false;
-
-	if (local_read(&buf->data_size) >= bts->handle.size ||
-	    bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
-		return true;
-
-	return false;
-}
-
 static void bts_update(struct bts_ctx *bts)
 {
 	int cpu = raw_smp_processor_id();
@@ -213,18 +201,15 @@
 	}
 }
 
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
+
 static void __bts_event_start(struct perf_event *event)
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 	struct bts_buffer *buf = perf_get_aux(&bts->handle);
 	u64 config = 0;
 
-	if (!buf || bts_buffer_is_full(buf, bts))
-		return;
-
-	event->hw.itrace_started = 1;
-	event->hw.state = 0;
-
 	if (!buf->snapshot)
 		config |= ARCH_PERFMON_EVENTSEL_INT;
 	if (!event->attr.exclude_kernel)
@@ -241,16 +226,41 @@
 	wmb();
 
 	intel_pmu_enable_bts(config);
+
 }
 
 static void bts_event_start(struct perf_event *event, int flags)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct bts_buffer *buf;
+
+	buf = perf_aux_output_begin(&bts->handle, event);
+	if (!buf)
+		goto fail_stop;
+
+	if (bts_buffer_reset(buf, &bts->handle))
+		goto fail_end_stop;
+
+	bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+	event->hw.itrace_started = 1;
+	event->hw.state = 0;
 
 	__bts_event_start(event);
 
 	/* PMI handler: this counter is running and likely generating PMIs */
 	ACCESS_ONCE(bts->started) = 1;
+
+	return;
+
+fail_end_stop:
+	perf_aux_output_end(&bts->handle, 0, false);
+
+fail_stop:
+	event->hw.state = PERF_HES_STOPPED;
 }
 
 static void __bts_event_stop(struct perf_event *event)
@@ -269,15 +279,32 @@
 
 static void bts_event_stop(struct perf_event *event, int flags)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	struct bts_buffer *buf = perf_get_aux(&bts->handle);
 
 	/* PMI handler: don't restart this counter */
 	ACCESS_ONCE(bts->started) = 0;
 
 	__bts_event_stop(event);
 
-	if (flags & PERF_EF_UPDATE)
+	if (flags & PERF_EF_UPDATE) {
 		bts_update(bts);
+
+		if (buf) {
+			if (buf->snapshot)
+				bts->handle.head =
+					local_xchg(&buf->data_size,
+						   buf->nr_pages << PAGE_SHIFT);
+			perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+					    !!local_xchg(&buf->lost, 0));
+		}
+
+		cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+		cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+		cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+		cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+	}
 }
 
 void intel_bts_enable_local(void)
@@ -417,34 +444,14 @@
 
 static void bts_event_del(struct perf_event *event, int mode)
 {
-	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-	struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
 	bts_event_stop(event, PERF_EF_UPDATE);
-
-	if (buf) {
-		if (buf->snapshot)
-			bts->handle.head =
-				local_xchg(&buf->data_size,
-					   buf->nr_pages << PAGE_SHIFT);
-		perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
-				    !!local_xchg(&buf->lost, 0));
-	}
-
-	cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
-	cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
-	cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
-	cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
 }
 
 static int bts_event_add(struct perf_event *event, int mode)
 {
-	struct bts_buffer *buf;
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	int ret = -EBUSY;
 
 	event->hw.state = PERF_HES_STOPPED;
 
@@ -454,26 +461,10 @@
 	if (bts->handle.event)
 		return -EBUSY;
 
-	buf = perf_aux_output_begin(&bts->handle, event);
-	if (!buf)
-		return -EINVAL;
-
-	ret = bts_buffer_reset(buf, &bts->handle);
-	if (ret) {
-		perf_aux_output_end(&bts->handle, 0, false);
-		return ret;
-	}
-
-	bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
-	bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
-	bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
 	if (mode & PERF_EF_START) {
 		bts_event_start(event, 0);
-		if (hwc->state & PERF_HES_STOPPED) {
-			bts_event_del(event, 0);
-			return -EBUSY;
-		}
+		if (hwc->state & PERF_HES_STOPPED)
+			return -EINVAL;
 	}
 
 	return 0;

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5210eaa..7c66695 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c

@@ -1465,6 +1465,140 @@
  },
 };
 
+static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
+	/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+	INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
+	INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
+	EVENT_EXTRA_END
+};
+
+#define GLM_DEMAND_DATA_RD		BIT_ULL(0)
+#define GLM_DEMAND_RFO			BIT_ULL(1)
+#define GLM_ANY_RESPONSE		BIT_ULL(16)
+#define GLM_SNP_NONE_OR_MISS		BIT_ULL(33)
+#define GLM_DEMAND_READ			GLM_DEMAND_DATA_RD
+#define GLM_DEMAND_WRITE		GLM_DEMAND_RFO
+#define GLM_DEMAND_PREFETCH		(SNB_PF_DATA_RD|SNB_PF_RFO)
+#define GLM_LLC_ACCESS			GLM_ANY_RESPONSE
+#define GLM_SNP_ANY			(GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
+#define GLM_LLC_MISS			(GLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 glm_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x0380,	/* ICACHE.ACCESSES */
+			[C(RESULT_MISS)]	= 0x0280,	/* ICACHE.MISSES */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+			[C(RESULT_MISS)]	= 0x1b7,	/* OFFCORE_RESPONSE */
+		},
+	},
+	[C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x81d0,	/* MEM_UOPS_RETIRED.ALL_LOADS */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= 0x82d0,	/* MEM_UOPS_RETIRED.ALL_STORES */
+			[C(RESULT_MISS)]	= 0x0,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= 0x0,
+			[C(RESULT_MISS)]	= 0x0,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x00c0,	/* INST_RETIRED.ANY_P */
+			[C(RESULT_MISS)]	= 0x0481,	/* ITLB.MISS */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= 0x00c4,	/* BR_INST_RETIRED.ALL_BRANCHES */
+			[C(RESULT_MISS)]	= 0x00c5,	/* BR_MISP_RETIRED.ALL_BRANCHES */
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= -1,
+			[C(RESULT_MISS)]	= -1,
+		},
+	},
+};
+
+static __initconst const u64 glm_hw_cache_extra_regs
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_READ|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_READ|
+						  GLM_LLC_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_WRITE|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_WRITE|
+						  GLM_LLC_MISS,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)]	= GLM_DEMAND_PREFETCH|
+						  GLM_LLC_ACCESS,
+			[C(RESULT_MISS)]	= GLM_DEMAND_PREFETCH|
+						  GLM_LLC_MISS,
+		},
+	},
+};
+
 #define KNL_OT_L2_HITE		BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF		BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL	BIT_ULL(21)
@@ -3447,7 +3581,7 @@
 		memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
 		       sizeof(hw_cache_extra_regs));
 
-		intel_pmu_lbr_init_atom();
+		intel_pmu_lbr_init_slm();
 
 		x86_pmu.event_constraints = intel_slm_event_constraints;
 		x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
@@ -3456,6 +3590,30 @@
 		pr_cont("Silvermont events, ");
 		break;
 
+	case 92: /* 14nm Atom "Goldmont" */
+	case 95: /* 14nm Atom "Goldmont Denverton" */
+		memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_skl();
+
+		x86_pmu.event_constraints = intel_slm_event_constraints;
+		x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
+		x86_pmu.extra_regs = intel_glm_extra_regs;
+		/*
+		 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+		 * for precise cycles.
+		 * :pp is identical to :ppp
+		 */
+		x86_pmu.pebs_aliases = NULL;
+		x86_pmu.pebs_prec_dist = true;
+		x86_pmu.lbr_pt_coexist = true;
+		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+		pr_cont("Goldmont events, ");
+		break;
+
 	case 37: /* 32nm Westmere    */
 	case 44: /* 32nm Westmere-EP */
 	case 47: /* 32nm Westmere-EX */

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 7946c42..9ba4e41 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c

@@ -91,6 +91,8 @@
 #include <asm/cpu_device_id.h>
 #include "../perf_event.h"
 
+MODULE_LICENSE("GPL");
+
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
 static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\
 				struct kobj_attribute *attr,	\
@@ -106,22 +108,27 @@
 				       struct device_attribute *attr,
 				       char *buf);
 
+/* Model -> events mapping */
+struct cstate_model {
+	unsigned long		core_events;
+	unsigned long		pkg_events;
+	unsigned long		quirks;
+};
+
+/* Quirk flags */
+#define SLM_PKG_C6_USE_C7_MSR	(1UL << 0)
+
 struct perf_cstate_msr {
 	u64	msr;
 	struct	perf_pmu_events_attr *attr;
-	bool	(*test)(int idx);
 };
 
 
 /* cstate_core PMU */
-
 static struct pmu cstate_core_pmu;
 static bool has_cstate_core;
 
-enum perf_cstate_core_id {
-	/*
-	 * cstate_core events
-	 */
+enum perf_cstate_core_events {
 	PERF_CSTATE_CORE_C1_RES = 0,
 	PERF_CSTATE_CORE_C3_RES,
 	PERF_CSTATE_CORE_C6_RES,
@@ -130,69 +137,16 @@
 	PERF_CSTATE_CORE_EVENT_MAX,
 };
 
-bool test_core(int idx)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-	    boot_cpu_data.x86 != 6)
-		return false;
-
-	switch (boot_cpu_data.x86_model) {
-	case 30: /* 45nm Nehalem    */
-	case 26: /* 45nm Nehalem-EP */
-	case 46: /* 45nm Nehalem-EX */
-
-	case 37: /* 32nm Westmere    */
-	case 44: /* 32nm Westmere-EP */
-	case 47: /* 32nm Westmere-EX */
-		if (idx == PERF_CSTATE_CORE_C3_RES ||
-		    idx == PERF_CSTATE_CORE_C6_RES)
-			return true;
-		break;
-	case 42: /* 32nm SandyBridge         */
-	case 45: /* 32nm SandyBridge-E/EN/EP */
-
-	case 58: /* 22nm IvyBridge       */
-	case 62: /* 22nm IvyBridge-EP/EX */
-
-	case 60: /* 22nm Haswell Core */
-	case 63: /* 22nm Haswell Server */
-	case 69: /* 22nm Haswell ULT */
-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-	case 61: /* 14nm Broadwell Core-M */
-	case 86: /* 14nm Broadwell Xeon D */
-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-	case 79: /* 14nm Broadwell Server */
-
-	case 78: /* 14nm Skylake Mobile */
-	case 94: /* 14nm Skylake Desktop */
-		if (idx == PERF_CSTATE_CORE_C3_RES ||
-		    idx == PERF_CSTATE_CORE_C6_RES ||
-		    idx == PERF_CSTATE_CORE_C7_RES)
-			return true;
-		break;
-	case 55: /* 22nm Atom "Silvermont"                */
-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-	case 76: /* 14nm Atom "Airmont"                   */
-		if (idx == PERF_CSTATE_CORE_C1_RES ||
-		    idx == PERF_CSTATE_CORE_C6_RES)
-			return true;
-		break;
-	}
-
-	return false;
-}
-
 PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
 PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
 
 static struct perf_cstate_msr core_msr[] = {
-	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&evattr_cstate_core_c1,	test_core, },
-	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&evattr_cstate_core_c3, test_core, },
-	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&evattr_cstate_core_c6, test_core, },
-	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&evattr_cstate_core_c7,	test_core, },
+	[PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,		&evattr_cstate_core_c1 },
+	[PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,	&evattr_cstate_core_c3 },
+	[PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,	&evattr_cstate_core_c6 },
+	[PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,	&evattr_cstate_core_c7 },
 };
 
 static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
@@ -234,18 +188,11 @@
 	NULL,
 };
 
-/* cstate_core PMU end */
-
-
 /* cstate_pkg PMU */
-
 static struct pmu cstate_pkg_pmu;
 static bool has_cstate_pkg;
 
-enum perf_cstate_pkg_id {
-	/*
-	 * cstate_pkg events
-	 */
+enum perf_cstate_pkg_events {
 	PERF_CSTATE_PKG_C2_RES = 0,
 	PERF_CSTATE_PKG_C3_RES,
 	PERF_CSTATE_PKG_C6_RES,
@@ -257,69 +204,6 @@
 	PERF_CSTATE_PKG_EVENT_MAX,
 };
 
-bool test_pkg(int idx)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-	    boot_cpu_data.x86 != 6)
-		return false;
-
-	switch (boot_cpu_data.x86_model) {
-	case 30: /* 45nm Nehalem    */
-	case 26: /* 45nm Nehalem-EP */
-	case 46: /* 45nm Nehalem-EX */
-
-	case 37: /* 32nm Westmere    */
-	case 44: /* 32nm Westmere-EP */
-	case 47: /* 32nm Westmere-EX */
-		if (idx == PERF_CSTATE_CORE_C3_RES ||
-		    idx == PERF_CSTATE_CORE_C6_RES ||
-		    idx == PERF_CSTATE_CORE_C7_RES)
-			return true;
-		break;
-	case 42: /* 32nm SandyBridge         */
-	case 45: /* 32nm SandyBridge-E/EN/EP */
-
-	case 58: /* 22nm IvyBridge       */
-	case 62: /* 22nm IvyBridge-EP/EX */
-
-	case 60: /* 22nm Haswell Core */
-	case 63: /* 22nm Haswell Server */
-	case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-	case 61: /* 14nm Broadwell Core-M */
-	case 86: /* 14nm Broadwell Xeon D */
-	case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-	case 79: /* 14nm Broadwell Server */
-
-	case 78: /* 14nm Skylake Mobile */
-	case 94: /* 14nm Skylake Desktop */
-		if (idx == PERF_CSTATE_PKG_C2_RES ||
-		    idx == PERF_CSTATE_PKG_C3_RES ||
-		    idx == PERF_CSTATE_PKG_C6_RES ||
-		    idx == PERF_CSTATE_PKG_C7_RES)
-			return true;
-		break;
-	case 55: /* 22nm Atom "Silvermont"                */
-	case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-	case 76: /* 14nm Atom "Airmont"                   */
-		if (idx == PERF_CSTATE_CORE_C6_RES)
-			return true;
-		break;
-	case 69: /* 22nm Haswell ULT */
-		if (idx == PERF_CSTATE_PKG_C2_RES ||
-		    idx == PERF_CSTATE_PKG_C3_RES ||
-		    idx == PERF_CSTATE_PKG_C6_RES ||
-		    idx == PERF_CSTATE_PKG_C7_RES ||
-		    idx == PERF_CSTATE_PKG_C8_RES ||
-		    idx == PERF_CSTATE_PKG_C9_RES ||
-		    idx == PERF_CSTATE_PKG_C10_RES)
-			return true;
-		break;
-	}
-
-	return false;
-}
-
 PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
 PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
 PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
@@ -329,13 +213,13 @@
 PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
 
 static struct perf_cstate_msr pkg_msr[] = {
-	[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,	&evattr_cstate_pkg_c2,	test_pkg, },
-	[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,	&evattr_cstate_pkg_c3,	test_pkg, },
-	[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,	&evattr_cstate_pkg_c6,	test_pkg, },
-	[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,	&evattr_cstate_pkg_c7,	test_pkg, },
-	[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,	&evattr_cstate_pkg_c8,	test_pkg, },
-	[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,	&evattr_cstate_pkg_c9,	test_pkg, },
-	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&evattr_cstate_pkg_c10,	test_pkg, },
+	[PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY,	&evattr_cstate_pkg_c2 },
+	[PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY,	&evattr_cstate_pkg_c3 },
+	[PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY,	&evattr_cstate_pkg_c6 },
+	[PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY,	&evattr_cstate_pkg_c7 },
+	[PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY,	&evattr_cstate_pkg_c8 },
+	[PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY,	&evattr_cstate_pkg_c9 },
+	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&evattr_cstate_pkg_c10 },
 };
 
 static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
@@ -366,8 +250,6 @@
 	NULL,
 };
 
-/* cstate_pkg PMU end*/
-
 static ssize_t cstate_get_attr_cpumask(struct device *dev,
 				       struct device_attribute *attr,
 				       char *buf)
@@ -385,7 +267,7 @@
 static int cstate_pmu_event_init(struct perf_event *event)
 {
 	u64 cfg = event->attr.config;
-	int ret = 0;
+	int cpu;
 
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
@@ -400,26 +282,36 @@
 	    event->attr.sample_period) /* no sampling */
 		return -EINVAL;
 
+	if (event->cpu < 0)
+		return -EINVAL;
+
 	if (event->pmu == &cstate_core_pmu) {
 		if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
 			return -EINVAL;
 		if (!core_msr[cfg].attr)
 			return -EINVAL;
 		event->hw.event_base = core_msr[cfg].msr;
+		cpu = cpumask_any_and(&cstate_core_cpu_mask,
+				      topology_sibling_cpumask(event->cpu));
 	} else if (event->pmu == &cstate_pkg_pmu) {
 		if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
 			return -EINVAL;
 		if (!pkg_msr[cfg].attr)
 			return -EINVAL;
 		event->hw.event_base = pkg_msr[cfg].msr;
-	} else
+		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+				      topology_core_cpumask(event->cpu));
+	} else {
 		return -ENOENT;
+	}
 
-	/* must be done before validate_group */
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+
+	event->cpu = cpu;
 	event->hw.config = cfg;
 	event->hw.idx = -1;
-
-	return ret;
+	return 0;
 }
 
 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
@@ -469,172 +361,91 @@
 	return 0;
 }
 
+/*
+ * Check if exiting cpu is the designated reader. If so migrate the
+ * events when there is a valid target available
+ */
 static void cstate_cpu_exit(int cpu)
 {
-	int i, id, target;
+	unsigned int target;
 
-	/* cpu exit for cstate core */
-	if (has_cstate_core) {
-		id = topology_core_id(cpu);
-		target = -1;
+	if (has_cstate_core &&
+	    cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
 
-		for_each_online_cpu(i) {
-			if (i == cpu)
-				continue;
-			if (id == topology_core_id(i)) {
-				target = i;
-				break;
-			}
-		}
-		if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+		target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+		/* Migrate events if there is a valid target */
+		if (target < nr_cpu_ids) {
 			cpumask_set_cpu(target, &cstate_core_cpu_mask);
-		WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
-		if (target >= 0)
 			perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+		}
 	}
 
-	/* cpu exit for cstate pkg */
-	if (has_cstate_pkg) {
-		id = topology_physical_package_id(cpu);
-		target = -1;
+	if (has_cstate_pkg &&
+	    cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
 
-		for_each_online_cpu(i) {
-			if (i == cpu)
-				continue;
-			if (id == topology_physical_package_id(i)) {
-				target = i;
-				break;
-			}
-		}
-		if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+		target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+		/* Migrate events if there is a valid target */
+		if (target < nr_cpu_ids) {
 			cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
-		WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
-		if (target >= 0)
 			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+		}
 	}
 }
 
 static void cstate_cpu_init(int cpu)
 {
-	int i, id;
+	unsigned int target;
 
-	/* cpu init for cstate core */
-	if (has_cstate_core) {
-		id = topology_core_id(cpu);
-		for_each_cpu(i, &cstate_core_cpu_mask) {
-			if (id == topology_core_id(i))
-				break;
-		}
-		if (i >= nr_cpu_ids)
-			cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
-	}
+	/*
+	 * If this is the first online thread of that core, set it in
+	 * the core cpu mask as the designated reader.
+	 */
+	target = cpumask_any_and(&cstate_core_cpu_mask,
+				 topology_sibling_cpumask(cpu));
 
-	/* cpu init for cstate pkg */
-	if (has_cstate_pkg) {
-		id = topology_physical_package_id(cpu);
-		for_each_cpu(i, &cstate_pkg_cpu_mask) {
-			if (id == topology_physical_package_id(i))
-				break;
-		}
-		if (i >= nr_cpu_ids)
-			cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
-	}
+	if (has_cstate_core && target >= nr_cpu_ids)
+		cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+
+	/*
+	 * If this is the first online thread of that package, set it
+	 * in the package cpu mask as the designated reader.
+	 */
+	target = cpumask_any_and(&cstate_pkg_cpu_mask,
+				 topology_core_cpumask(cpu));
+	if (has_cstate_pkg && target >= nr_cpu_ids)
+		cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
 }
 
 static int cstate_cpu_notifier(struct notifier_block *self,
-				  unsigned long action, void *hcpu)
+			       unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-		break;
 	case CPU_STARTING:
 		cstate_cpu_init(cpu);
 		break;
-	case CPU_UP_CANCELED:
-	case CPU_DYING:
-		break;
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		break;
 	case CPU_DOWN_PREPARE:
 		cstate_cpu_exit(cpu);
 		break;
 	default:
 		break;
 	}
-
 	return NOTIFY_OK;
 }
 
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
-			     struct attribute	**events_attrs,
-			     int max_event_nr)
-{
-	int i, j = 0;
-	u64 val;
-
-	/* Probe the cstate events. */
-	for (i = 0; i < max_event_nr; i++) {
-		if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
-			msr[i].attr = NULL;
-	}
-
-	/* List remaining events in the sysfs attrs. */
-	for (i = 0; i < max_event_nr; i++) {
-		if (msr[i].attr)
-			events_attrs[j++] = &msr[i].attr->attr.attr;
-	}
-	events_attrs[j] = NULL;
-
-	return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
-	/* SLM has different MSR for PKG C6 */
-	switch (boot_cpu_data.x86_model) {
-	case 55:
-	case 76:
-	case 77:
-		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
-	}
-
-	if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
-		has_cstate_core = true;
-
-	if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
-		has_cstate_pkg = true;
-
-	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
-	int cpu;
-
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu)
-		cstate_cpu_init(cpu);
-
-	__perf_cpu_notifier(cstate_cpu_notifier);
-
-	cpu_notifier_register_done();
-}
+static struct notifier_block cstate_cpu_nb = {
+	.notifier_call	= cstate_cpu_notifier,
+	.priority       = CPU_PRI_PERF + 1,
+};
 
 static struct pmu cstate_core_pmu = {
 	.attr_groups	= core_attr_groups,
 	.name		= "cstate_core",
 	.task_ctx_nr	= perf_invalid_context,
 	.event_init	= cstate_pmu_event_init,
-	.add		= cstate_pmu_event_add, /* must have */
-	.del		= cstate_pmu_event_del, /* must have */
+	.add		= cstate_pmu_event_add,
+	.del		= cstate_pmu_event_del,
 	.start		= cstate_pmu_event_start,
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
@@ -646,49 +457,203 @@
 	.name		= "cstate_pkg",
 	.task_ctx_nr	= perf_invalid_context,
 	.event_init	= cstate_pmu_event_init,
-	.add		= cstate_pmu_event_add, /* must have */
-	.del		= cstate_pmu_event_del, /* must have */
+	.add		= cstate_pmu_event_add,
+	.del		= cstate_pmu_event_del,
 	.start		= cstate_pmu_event_start,
 	.stop		= cstate_pmu_event_stop,
 	.read		= cstate_pmu_event_update,
 	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT,
 };
 
-static void __init cstate_pmus_register(void)
+static const struct cstate_model nhm_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
+				  BIT(PERF_CSTATE_CORE_C6_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES) |
+				  BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model snb_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
+				  BIT(PERF_CSTATE_CORE_C6_RES) |
+				  BIT(PERF_CSTATE_CORE_C7_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES) |
+				  BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model hswult_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
+				  BIT(PERF_CSTATE_CORE_C6_RES) |
+				  BIT(PERF_CSTATE_CORE_C7_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C2_RES) |
+				  BIT(PERF_CSTATE_PKG_C3_RES) |
+				  BIT(PERF_CSTATE_PKG_C6_RES) |
+				  BIT(PERF_CSTATE_PKG_C7_RES) |
+				  BIT(PERF_CSTATE_PKG_C8_RES) |
+				  BIT(PERF_CSTATE_PKG_C9_RES) |
+				  BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
+static const struct cstate_model slm_cstates __initconst = {
+	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
+				  BIT(PERF_CSTATE_CORE_C6_RES),
+
+	.pkg_events		= BIT(PERF_CSTATE_PKG_C6_RES),
+	.quirks			= SLM_PKG_C6_USE_C7_MSR,
+};
+
+#define X86_CSTATES_MODEL(model, states)				\
+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
+
+static const struct x86_cpu_id intel_cstates_match[] __initconst = {
+	X86_CSTATES_MODEL(30, nhm_cstates),    /* 45nm Nehalem              */
+	X86_CSTATES_MODEL(26, nhm_cstates),    /* 45nm Nehalem-EP           */
+	X86_CSTATES_MODEL(46, nhm_cstates),    /* 45nm Nehalem-EX           */
+
+	X86_CSTATES_MODEL(37, nhm_cstates),    /* 32nm Westmere             */
+	X86_CSTATES_MODEL(44, nhm_cstates),    /* 32nm Westmere-EP          */
+	X86_CSTATES_MODEL(47, nhm_cstates),    /* 32nm Westmere-EX          */
+
+	X86_CSTATES_MODEL(42, snb_cstates),    /* 32nm SandyBridge          */
+	X86_CSTATES_MODEL(45, snb_cstates),    /* 32nm SandyBridge-E/EN/EP  */
+
+	X86_CSTATES_MODEL(58, snb_cstates),    /* 22nm IvyBridge            */
+	X86_CSTATES_MODEL(62, snb_cstates),    /* 22nm IvyBridge-EP/EX      */
+
+	X86_CSTATES_MODEL(60, snb_cstates),    /* 22nm Haswell Core         */
+	X86_CSTATES_MODEL(63, snb_cstates),    /* 22nm Haswell Server       */
+	X86_CSTATES_MODEL(70, snb_cstates),    /* 22nm Haswell + GT3e       */
+
+	X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT          */
+
+	X86_CSTATES_MODEL(55, slm_cstates),    /* 22nm Atom Silvermont      */
+	X86_CSTATES_MODEL(77, slm_cstates),    /* 22nm Atom Avoton/Rangely  */
+	X86_CSTATES_MODEL(76, slm_cstates),    /* 22nm Atom Airmont         */
+
+	X86_CSTATES_MODEL(61, snb_cstates),    /* 14nm Broadwell Core-M     */
+	X86_CSTATES_MODEL(86, snb_cstates),    /* 14nm Broadwell Xeon D     */
+	X86_CSTATES_MODEL(71, snb_cstates),    /* 14nm Broadwell + GT3e     */
+	X86_CSTATES_MODEL(79, snb_cstates),    /* 14nm Broadwell Server     */
+
+	X86_CSTATES_MODEL(78, snb_cstates),    /* 14nm Skylake Mobile       */
+	X86_CSTATES_MODEL(94, snb_cstates),    /* 14nm Skylake Desktop      */
+	{ },
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there are no available events.
+ */
+static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
+                                   struct perf_cstate_msr *msr,
+                                   struct attribute **attrs)
 {
-	int err;
+	bool found = false;
+	unsigned int bit;
+	u64 val;
+
+	for (bit = 0; bit < max; bit++) {
+		if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
+			*attrs++ = &msr[bit].attr->attr.attr;
+			found = true;
+		} else {
+			msr[bit].attr = NULL;
+		}
+	}
+	*attrs = NULL;
+
+	return found;
+}
+
+static int __init cstate_probe(const struct cstate_model *cm)
+{
+	/* SLM has different MSR for PKG C6 */
+	if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
+		pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+
+	has_cstate_core = cstate_probe_msr(cm->core_events,
+					   PERF_CSTATE_CORE_EVENT_MAX,
+					   core_msr, core_events_attrs);
+
+	has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
+					  PERF_CSTATE_PKG_EVENT_MAX,
+					  pkg_msr, pkg_events_attrs);
+
+	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static inline void cstate_cleanup(void)
+{
+	if (has_cstate_core)
+		perf_pmu_unregister(&cstate_core_pmu);
+
+	if (has_cstate_pkg)
+		perf_pmu_unregister(&cstate_pkg_pmu);
+}
+
+static int __init cstate_init(void)
+{
+	int cpu, err;
+
+	cpu_notifier_register_begin();
+	for_each_online_cpu(cpu)
+		cstate_cpu_init(cpu);
 
 	if (has_cstate_core) {
 		err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
-		if (WARN_ON(err))
-			pr_info("Failed to register PMU %s error %d\n",
-				cstate_core_pmu.name, err);
+		if (err) {
+			has_cstate_core = false;
+			pr_info("Failed to register cstate core pmu\n");
+			goto out;
+		}
 	}
 
 	if (has_cstate_pkg) {
 		err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
-		if (WARN_ON(err))
-			pr_info("Failed to register PMU %s error %d\n",
-				cstate_pkg_pmu.name, err);
+		if (err) {
+			has_cstate_pkg = false;
+			pr_info("Failed to register cstate pkg pmu\n");
+			cstate_cleanup();
+			goto out;
+		}
 	}
+	__register_cpu_notifier(&cstate_cpu_nb);
+out:
+	cpu_notifier_register_done();
+	return err;
 }
 
 static int __init cstate_pmu_init(void)
 {
+	const struct x86_cpu_id *id;
 	int err;
 
-	if (cpu_has_hypervisor)
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return -ENODEV;
 
-	err = cstate_init();
+	id = x86_match_cpu(intel_cstates_match);
+	if (!id)
+		return -ENODEV;
+
+	err = cstate_probe((const struct cstate_model *) id->driver_data);
 	if (err)
 		return err;
 
-	cstate_cpumask_init();
-
-	cstate_pmus_register();
-
-	return 0;
+	return cstate_init();
 }
+module_init(cstate_pmu_init);
 
-device_initcall(cstate_pmu_init);
+static void __exit cstate_pmu_exit(void)
+{
+	cpu_notifier_register_begin();
+	__unregister_cpu_notifier(&cstate_cpu_nb);
+	cstate_cleanup();
+	cpu_notifier_register_done();
+}
+module_exit(cstate_pmu_exit);

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8584b90..7ce9f3f 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c

@@ -645,6 +645,12 @@
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_glm_pebs_event_constraints[] = {
+	/* Allow all events as PEBS with no flags */
+	INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
 struct event_constraint intel_nehalem_pebs_event_constraints[] = {
 	INTEL_PLD_CONSTRAINT(0x100b, 0xf),      /* MEM_INST_RETIRED.* */
 	INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf),    /* MEM_UNCORE_RETIRED.* */

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 1ca5d1e..9e2b40c 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c

@@ -14,7 +14,8 @@
 	LBR_FORMAT_EIP_FLAGS	= 0x03,
 	LBR_FORMAT_EIP_FLAGS2	= 0x04,
 	LBR_FORMAT_INFO		= 0x05,
-	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_INFO,
+	LBR_FORMAT_TIME		= 0x06,
+	LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
 };
 
 static enum {
@@ -464,6 +465,16 @@
 			abort = !!(info & LBR_INFO_ABORT);
 			cycles = (info & LBR_INFO_CYCLES);
 		}
+
+		if (lbr_format == LBR_FORMAT_TIME) {
+			mis = !!(from & LBR_FROM_FLAG_MISPRED);
+			pred = !mis;
+			skip = 1;
+			cycles = ((to >> 48) & LBR_INFO_CYCLES);
+
+			to = (u64)((((s64)to) << 16) >> 16);
+		}
+
 		if (lbr_flags & LBR_EIP_FLAGS) {
 			mis = !!(from & LBR_FROM_FLAG_MISPRED);
 			pred = !mis;
@@ -1049,6 +1060,24 @@
 	pr_cont("8-deep LBR, ");
 }
 
+/* slm */
+void __init intel_pmu_lbr_init_slm(void)
+{
+	x86_pmu.lbr_nr	   = 8;
+	x86_pmu.lbr_tos    = MSR_LBR_TOS;
+	x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
+	x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
+
+	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+	x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
+
+	/*
+	 * SW branch filter usage:
+	 * - compensate for lack of HW filter
+	 */
+	pr_cont("8-deep LBR, ");
+}
+
 /* Knights Landing */
 void intel_pmu_lbr_init_knl(void)
 {

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 7377814..04bb5fb 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c

@@ -67,11 +67,13 @@
 	PT_CAP(max_subleaf,		0, CR_EAX, 0xffffffff),
 	PT_CAP(cr3_filtering,		0, CR_EBX, BIT(0)),
 	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
+	PT_CAP(ip_filtering,		0, CR_EBX, BIT(2)),
 	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
 	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
 	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
 	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
 	PT_CAP(payloads_lip,		0, CR_ECX, BIT(31)),
+	PT_CAP(num_address_ranges,	1, CR_EAX, 0x3),
 	PT_CAP(mtc_periods,		1, CR_EAX, 0xffff0000),
 	PT_CAP(cycle_thresholds,	1, CR_EBX, 0xffff),
 	PT_CAP(psb_periods,		1, CR_EBX, 0xffff0000),
@@ -125,9 +127,46 @@
 	.attrs	= pt_formats_attr,
 };
 
+static ssize_t
+pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
+		    char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	switch (pmu_attr->id) {
+	case 0:
+		return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
+	case 1:
+		return sprintf(page, "%u:%u\n",
+			       pt_pmu.tsc_art_num,
+			       pt_pmu.tsc_art_den);
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
+	       pt_timing_attr_show);
+PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
+	       pt_timing_attr_show);
+
+static struct attribute *pt_timing_attr[] = {
+	&timing_attr_max_nonturbo_ratio.attr.attr,
+	&timing_attr_tsc_art_ratio.attr.attr,
+	NULL,
+};
+
+static struct attribute_group pt_timing_group = {
+	.attrs	= pt_timing_attr,
+};
+
 static const struct attribute_group *pt_attr_groups[] = {
 	&pt_cap_group,
 	&pt_format_group,
+	&pt_timing_group,
 	NULL,
 };
 
@@ -140,6 +179,23 @@
 	int ret;
 	long i;
 
+	rdmsrl(MSR_PLATFORM_INFO, reg);
+	pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
+
+	/*
+	 * if available, read in TSC to core crystal clock ratio,
+	 * otherwise, zero for numerator stands for "not enumerated"
+	 * as per SDM
+	 */
+	if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
+
+		pt_pmu.tsc_art_num = ebx;
+		pt_pmu.tsc_art_den = eax;
+	}
+
 	if (boot_cpu_has(X86_FEATURE_VMX)) {
 		/*
 		 * Intel SDM, 36.5 "Tracing post-VMXON" says that
@@ -263,6 +319,75 @@
  * These all are cpu affine and operate on a local PT
  */
 
+/* Address ranges and their corresponding msr configuration registers */
+static const struct pt_address_range {
+	unsigned long	msr_a;
+	unsigned long	msr_b;
+	unsigned int	reg_off;
+} pt_address_ranges[] = {
+	{
+		.msr_a	 = MSR_IA32_RTIT_ADDR0_A,
+		.msr_b	 = MSR_IA32_RTIT_ADDR0_B,
+		.reg_off = RTIT_CTL_ADDR0_OFFSET,
+	},
+	{
+		.msr_a	 = MSR_IA32_RTIT_ADDR1_A,
+		.msr_b	 = MSR_IA32_RTIT_ADDR1_B,
+		.reg_off = RTIT_CTL_ADDR1_OFFSET,
+	},
+	{
+		.msr_a	 = MSR_IA32_RTIT_ADDR2_A,
+		.msr_b	 = MSR_IA32_RTIT_ADDR2_B,
+		.reg_off = RTIT_CTL_ADDR2_OFFSET,
+	},
+	{
+		.msr_a	 = MSR_IA32_RTIT_ADDR3_A,
+		.msr_b	 = MSR_IA32_RTIT_ADDR3_B,
+		.reg_off = RTIT_CTL_ADDR3_OFFSET,
+	}
+};
+
+static u64 pt_config_filters(struct perf_event *event)
+{
+	struct pt_filters *filters = event->hw.addr_filters;
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
+	unsigned int range = 0;
+	u64 rtit_ctl = 0;
+
+	if (!filters)
+		return 0;
+
+	perf_event_addr_filters_sync(event);
+
+	for (range = 0; range < filters->nr_filters; range++) {
+		struct pt_filter *filter = &filters->filter[range];
+
+		/*
+		 * Note, if the range has zero start/end addresses due
+		 * to its dynamic object not being loaded yet, we just
+		 * go ahead and program zeroed range, which will simply
+		 * produce no data. Note^2: if executable code at 0x0
+		 * is a concern, we can set up an "invalid" configuration
+		 * such as msr_b < msr_a.
+		 */
+
+		/* avoid redundant msr writes */
+		if (pt->filters.filter[range].msr_a != filter->msr_a) {
+			wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+			pt->filters.filter[range].msr_a = filter->msr_a;
+		}
+
+		if (pt->filters.filter[range].msr_b != filter->msr_b) {
+			wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+			pt->filters.filter[range].msr_b = filter->msr_b;
+		}
+
+		rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+	}
+
+	return rtit_ctl;
+}
+
 static void pt_config(struct perf_event *event)
 {
 	u64 reg;
@@ -272,7 +397,8 @@
 		wrmsrl(MSR_IA32_RTIT_STATUS, 0);
 	}
 
-	reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+	reg = pt_config_filters(event);
+	reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
 
 	if (!event->attr.exclude_kernel)
 		reg |= RTIT_CTL_OS;
@@ -921,24 +1047,80 @@
 	kfree(buf);
 }
 
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf:	PT buffer.
- * @pt:		Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+static int pt_addr_filters_init(struct perf_event *event)
 {
-	if (buf->snapshot)
-		return false;
+	struct pt_filters *filters;
+	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
 
-	if (local_read(&buf->data_size) >= pt->handle.size)
-		return true;
+	if (!pt_cap_get(PT_CAP_num_address_ranges))
+		return 0;
 
-	return false;
+	filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
+	if (!filters)
+		return -ENOMEM;
+
+	if (event->parent)
+		memcpy(filters, event->parent->hw.addr_filters,
+		       sizeof(*filters));
+
+	event->hw.addr_filters = filters;
+
+	return 0;
+}
+
+static void pt_addr_filters_fini(struct perf_event *event)
+{
+	kfree(event->hw.addr_filters);
+	event->hw.addr_filters = NULL;
+}
+
+static int pt_event_addr_filters_validate(struct list_head *filters)
+{
+	struct perf_addr_filter *filter;
+	int range = 0;
+
+	list_for_each_entry(filter, filters, entry) {
+		/* PT doesn't support single address triggers */
+		if (!filter->range)
+			return -EOPNOTSUPP;
+
+		if (!filter->inode && !kernel_ip(filter->offset))
+			return -EINVAL;
+
+		if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void pt_event_addr_filters_sync(struct perf_event *event)
+{
+	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+	unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+	struct pt_filters *filters = event->hw.addr_filters;
+	struct perf_addr_filter *filter;
+	int range = 0;
+
+	if (!filters)
+		return;
+
+	list_for_each_entry(filter, &head->list, entry) {
+		if (filter->inode && !offs[range]) {
+			msr_a = msr_b = 0;
+		} else {
+			/* apply the offset */
+			msr_a = filter->offset + offs[range];
+			msr_b = filter->size + msr_a;
+		}
+
+		filters->filter[range].msr_a  = msr_a;
+		filters->filter[range].msr_b  = msr_b;
+		filters->filter[range].config = filter->filter ? 1 : 2;
+		range++;
+	}
+
+	filters->nr_filters = range;
 }
 
 /**
@@ -955,7 +1137,7 @@
 	 * after PT has been disabled by pt_event_stop(). Make sure we don't
 	 * do anything (particularly, re-enable) for this event here.
 	 */
-	if (!ACCESS_ONCE(pt->handle_nmi))
+	if (!READ_ONCE(pt->handle_nmi))
 		return;
 
 	/*
@@ -1040,23 +1222,36 @@
 
 static void pt_event_start(struct perf_event *event, int mode)
 {
+	struct hw_perf_event *hwc = &event->hw;
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
-	struct pt_buffer *buf = perf_get_aux(&pt->handle);
+	struct pt_buffer *buf;
 
 	if (READ_ONCE(pt->vmx_on))
 		return;
 
-	if (!buf || pt_buffer_is_full(buf, pt)) {
-		event->hw.state = PERF_HES_STOPPED;
-		return;
+	buf = perf_aux_output_begin(&pt->handle, event);
+	if (!buf)
+		goto fail_stop;
+
+	pt_buffer_reset_offsets(buf, pt->handle.head);
+	if (!buf->snapshot) {
+		if (pt_buffer_reset_markers(buf, &pt->handle))
+			goto fail_end_stop;
 	}
 
-	ACCESS_ONCE(pt->handle_nmi) = 1;
-	event->hw.state = 0;
+	WRITE_ONCE(pt->handle_nmi, 1);
+	hwc->state = 0;
 
 	pt_config_buffer(buf->cur->table, buf->cur_idx,
 			 buf->output_off);
 	pt_config(event);
+
+	return;
+
+fail_end_stop:
+	perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+	hwc->state = PERF_HES_STOPPED;
 }
 
 static void pt_event_stop(struct perf_event *event, int mode)
@@ -1067,7 +1262,7 @@
 	 * Protect against the PMI racing with disabling wrmsr,
 	 * see comment in intel_pt_interrupt().
 	 */
-	ACCESS_ONCE(pt->handle_nmi) = 0;
+	WRITE_ONCE(pt->handle_nmi, 0);
 
 	pt_config_stop(event);
 
@@ -1090,19 +1285,7 @@
 		pt_handle_status(pt);
 
 		pt_update_head(pt);
-	}
-}
 
-static void pt_event_del(struct perf_event *event, int mode)
-{
-	struct pt *pt = this_cpu_ptr(&pt_ctx);
-	struct pt_buffer *buf;
-
-	pt_event_stop(event, PERF_EF_UPDATE);
-
-	buf = perf_get_aux(&pt->handle);
-
-	if (buf) {
 		if (buf->snapshot)
 			pt->handle.head =
 				local_xchg(&buf->data_size,
@@ -1112,9 +1295,13 @@
 	}
 }
 
+static void pt_event_del(struct perf_event *event, int mode)
+{
+	pt_event_stop(event, PERF_EF_UPDATE);
+}
+
 static int pt_event_add(struct perf_event *event, int mode)
 {
-	struct pt_buffer *buf;
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct hw_perf_event *hwc = &event->hw;
 	int ret = -EBUSY;
@@ -1122,34 +1309,18 @@
 	if (pt->handle.event)
 		goto fail;
 
-	buf = perf_aux_output_begin(&pt->handle, event);
-	ret = -EINVAL;
-	if (!buf)
-		goto fail_stop;
-
-	pt_buffer_reset_offsets(buf, pt->handle.head);
-	if (!buf->snapshot) {
-		ret = pt_buffer_reset_markers(buf, &pt->handle);
-		if (ret)
-			goto fail_end_stop;
-	}
-
 	if (mode & PERF_EF_START) {
 		pt_event_start(event, 0);
-		ret = -EBUSY;
+		ret = -EINVAL;
 		if (hwc->state == PERF_HES_STOPPED)
-			goto fail_end_stop;
+			goto fail;
 	} else {
 		hwc->state = PERF_HES_STOPPED;
 	}
 
-	return 0;
-
-fail_end_stop:
-	perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
-	hwc->state = PERF_HES_STOPPED;
+	ret = 0;
 fail:
+
 	return ret;
 }
 
@@ -1159,6 +1330,7 @@
 
 static void pt_event_destroy(struct perf_event *event)
 {
+	pt_addr_filters_fini(event);
 	x86_del_exclusive(x86_lbr_exclusive_pt);
 }
 
@@ -1173,6 +1345,11 @@
 	if (x86_add_exclusive(x86_lbr_exclusive_pt))
 		return -EBUSY;
 
+	if (pt_addr_filters_init(event)) {
+		x86_del_exclusive(x86_lbr_exclusive_pt);
+		return -ENOMEM;
+	}
+
 	event->destroy = pt_event_destroy;
 
 	return 0;
@@ -1192,7 +1369,7 @@
 
 	BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
 
-	if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
 		return -ENODEV;
 
 	get_online_cpus();
@@ -1226,16 +1403,21 @@
 			PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
 
 	pt_pmu.pmu.capabilities	|= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
-	pt_pmu.pmu.attr_groups	= pt_attr_groups;
-	pt_pmu.pmu.task_ctx_nr	= perf_sw_context;
-	pt_pmu.pmu.event_init	= pt_event_init;
-	pt_pmu.pmu.add		= pt_event_add;
-	pt_pmu.pmu.del		= pt_event_del;
-	pt_pmu.pmu.start	= pt_event_start;
-	pt_pmu.pmu.stop		= pt_event_stop;
-	pt_pmu.pmu.read		= pt_event_read;
-	pt_pmu.pmu.setup_aux	= pt_buffer_setup_aux;
-	pt_pmu.pmu.free_aux	= pt_buffer_free_aux;
+	pt_pmu.pmu.attr_groups		 = pt_attr_groups;
+	pt_pmu.pmu.task_ctx_nr		 = perf_sw_context;
+	pt_pmu.pmu.event_init		 = pt_event_init;
+	pt_pmu.pmu.add			 = pt_event_add;
+	pt_pmu.pmu.del			 = pt_event_del;
+	pt_pmu.pmu.start		 = pt_event_start;
+	pt_pmu.pmu.stop			 = pt_event_stop;
+	pt_pmu.pmu.read			 = pt_event_read;
+	pt_pmu.pmu.setup_aux		 = pt_buffer_setup_aux;
+	pt_pmu.pmu.free_aux		 = pt_buffer_free_aux;
+	pt_pmu.pmu.addr_filters_sync     = pt_event_addr_filters_sync;
+	pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
+	pt_pmu.pmu.nr_addr_filters       =
+		pt_cap_get(PT_CAP_num_address_ranges);
+
 	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
 
 	return ret;

diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 3abb5f5..efffa4a 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h

@@ -20,6 +20,40 @@
 #define __INTEL_PT_H__
 
 /*
+ * PT MSR bit definitions
+ */
+#define RTIT_CTL_TRACEEN		BIT(0)
+#define RTIT_CTL_CYCLEACC		BIT(1)
+#define RTIT_CTL_OS			BIT(2)
+#define RTIT_CTL_USR			BIT(3)
+#define RTIT_CTL_CR3EN			BIT(7)
+#define RTIT_CTL_TOPA			BIT(8)
+#define RTIT_CTL_MTC_EN			BIT(9)
+#define RTIT_CTL_TSC_EN			BIT(10)
+#define RTIT_CTL_DISRETC		BIT(11)
+#define RTIT_CTL_BRANCH_EN		BIT(13)
+#define RTIT_CTL_MTC_RANGE_OFFSET	14
+#define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
+#define RTIT_CTL_CYC_THRESH_OFFSET	19
+#define RTIT_CTL_CYC_THRESH		(0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
+#define RTIT_CTL_PSB_FREQ_OFFSET	24
+#define RTIT_CTL_PSB_FREQ      		(0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
+#define RTIT_CTL_ADDR0_OFFSET		32
+#define RTIT_CTL_ADDR0      		(0x0full << RTIT_CTL_ADDR0_OFFSET)
+#define RTIT_CTL_ADDR1_OFFSET		36
+#define RTIT_CTL_ADDR1      		(0x0full << RTIT_CTL_ADDR1_OFFSET)
+#define RTIT_CTL_ADDR2_OFFSET		40
+#define RTIT_CTL_ADDR2      		(0x0full << RTIT_CTL_ADDR2_OFFSET)
+#define RTIT_CTL_ADDR3_OFFSET		44
+#define RTIT_CTL_ADDR3      		(0x0full << RTIT_CTL_ADDR3_OFFSET)
+#define RTIT_STATUS_FILTEREN		BIT(0)
+#define RTIT_STATUS_CONTEXTEN		BIT(1)
+#define RTIT_STATUS_TRIGGEREN		BIT(2)
+#define RTIT_STATUS_BUFFOVF		BIT(3)
+#define RTIT_STATUS_ERROR		BIT(4)
+#define RTIT_STATUS_STOPPED		BIT(5)
+
+/*
  * Single-entry ToPA: when this close to region boundary, switch
  * buffers to avoid losing data.
  */
@@ -48,15 +82,20 @@
 #define PT_CPUID_LEAVES		2
 #define PT_CPUID_REGS_NUM	4 /* number of regsters (eax, ebx, ecx, edx) */
 
+/* TSC to Core Crystal Clock Ratio */
+#define CPUID_TSC_LEAF		0x15
+
 enum pt_capabilities {
 	PT_CAP_max_subleaf = 0,
 	PT_CAP_cr3_filtering,
 	PT_CAP_psb_cyc,
+	PT_CAP_ip_filtering,
 	PT_CAP_mtc,
 	PT_CAP_topa_output,
 	PT_CAP_topa_multiple_entries,
 	PT_CAP_single_range_output,
 	PT_CAP_payloads_lip,
+	PT_CAP_num_address_ranges,
 	PT_CAP_mtc_periods,
 	PT_CAP_cycle_thresholds,
 	PT_CAP_psb_periods,
@@ -66,6 +105,9 @@
 	struct pmu		pmu;
 	u32			caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
 	bool			vmx;
+	unsigned long		max_nonturbo_ratio;
+	unsigned int		tsc_art_num;
+	unsigned int		tsc_art_den;
 };
 
 /**
@@ -104,14 +146,40 @@
 	struct topa_entry	*topa_index[0];
 };
 
+#define PT_FILTERS_NUM	4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a:	range start, goes to RTIT_ADDRn_A
+ * @msr_b:	range end, goes to RTIT_ADDRn_B
+ * @config:	4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+	unsigned long	msr_a;
+	unsigned long	msr_b;
+	unsigned long	config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter:	filters defined for this context
+ * @nr_filters:	number of defined filters in the @filter array
+ */
+struct pt_filters {
+	struct pt_filter	filter[PT_FILTERS_NUM];
+	unsigned int		nr_filters;
+};
+
 /**
  * struct pt - per-cpu pt context
  * @handle:	perf output handle
+ * @filters:		last configured filters
  * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
  * @vmx_on:	1 if VMX is ON on this cpu
  */
 struct pt {
 	struct perf_output_handle handle;
+	struct pt_filters	filters;
 	int			handle_nmi;
 	int			vmx_on;
 };

diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 1705c9d..99c4bab 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c

@@ -27,10 +27,14 @@
  *	  event: rapl_energy_dram
  *    perf code: 0x3
  *
- * dram counter: consumption of the builtin-gpu domain (client only)
+ * gpu counter: consumption of the builtin-gpu domain (client only)
  *	  event: rapl_energy_gpu
  *    perf code: 0x4
  *
+ *  psys counter: consumption of the builtin-psys domain (client only)
+ *	  event: rapl_energy_psys
+ *    perf code: 0x5
+ *
  * We manage those counters as free running (read-only). They may be
  * use simultaneously by other tools, such as turbostat.
  *
@@ -53,6 +57,8 @@
 #include <asm/cpu_device_id.h>
 #include "../perf_event.h"
 
+MODULE_LICENSE("GPL");
+
 /*
  * RAPL energy status counters
  */
@@ -64,13 +70,16 @@
 #define INTEL_RAPL_RAM		0x3	/* pseudo-encoding */
 #define RAPL_IDX_PP1_NRG_STAT	3	/* gpu */
 #define INTEL_RAPL_PP1		0x4	/* pseudo-encoding */
+#define RAPL_IDX_PSYS_NRG_STAT	4	/* psys */
+#define INTEL_RAPL_PSYS		0x5	/* pseudo-encoding */
 
-#define NR_RAPL_DOMAINS         0x4
+#define NR_RAPL_DOMAINS         0x5
 static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
 	"pp0-core",
 	"package",
 	"dram",
 	"pp1-gpu",
+	"psys",
 };
 
 /* Clients have PP0, PKG */
@@ -89,6 +98,13 @@
 			 1<<RAPL_IDX_RAM_NRG_STAT|\
 			 1<<RAPL_IDX_PP1_NRG_STAT)
 
+/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
+#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
+			  1<<RAPL_IDX_PKG_NRG_STAT|\
+			  1<<RAPL_IDX_RAM_NRG_STAT|\
+			  1<<RAPL_IDX_PP1_NRG_STAT|\
+			  1<<RAPL_IDX_PSYS_NRG_STAT)
+
 /* Knights Landing has PKG, RAM */
 #define RAPL_IDX_KNL	(1<<RAPL_IDX_PKG_NRG_STAT|\
 			 1<<RAPL_IDX_RAM_NRG_STAT)
@@ -360,6 +376,10 @@
 		bit = RAPL_IDX_PP1_NRG_STAT;
 		msr = MSR_PP1_ENERGY_STATUS;
 		break;
+	case INTEL_RAPL_PSYS:
+		bit = RAPL_IDX_PSYS_NRG_STAT;
+		msr = MSR_PLATFORM_ENERGY_STATUS;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -414,11 +434,13 @@
 RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
 RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
 RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
+RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
 
 RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
 RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
 RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
 RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
 
 /*
  * we compute in 0.23 nJ increments regardless of MSR
@@ -427,6 +449,7 @@
 RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
 RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
 
 static struct attribute *rapl_events_srv_attr[] = {
 	EVENT_PTR(rapl_cores),
@@ -476,6 +499,27 @@
 	NULL,
 };
 
+static struct attribute *rapl_events_skl_attr[] = {
+	EVENT_PTR(rapl_cores),
+	EVENT_PTR(rapl_pkg),
+	EVENT_PTR(rapl_gpu),
+	EVENT_PTR(rapl_ram),
+	EVENT_PTR(rapl_psys),
+
+	EVENT_PTR(rapl_cores_unit),
+	EVENT_PTR(rapl_pkg_unit),
+	EVENT_PTR(rapl_gpu_unit),
+	EVENT_PTR(rapl_ram_unit),
+	EVENT_PTR(rapl_psys_unit),
+
+	EVENT_PTR(rapl_cores_scale),
+	EVENT_PTR(rapl_pkg_scale),
+	EVENT_PTR(rapl_gpu_scale),
+	EVENT_PTR(rapl_ram_scale),
+	EVENT_PTR(rapl_psys_scale),
+	NULL,
+};
+
 static struct attribute *rapl_events_knl_attr[] = {
 	EVENT_PTR(rapl_pkg),
 	EVENT_PTR(rapl_ram),
@@ -592,6 +636,11 @@
 	return NOTIFY_OK;
 }
 
+static struct notifier_block rapl_cpu_nb = {
+	.notifier_call	= rapl_cpu_notifier,
+	.priority       = CPU_PRI_PERF + 1,
+};
+
 static int rapl_check_hw_unit(bool apply_quirk)
 {
 	u64 msr_rapl_power_unit_bits;
@@ -660,7 +709,7 @@
 	return 0;
 }
 
-static void __init cleanup_rapl_pmus(void)
+static void cleanup_rapl_pmus(void)
 {
 	int i;
 
@@ -691,52 +740,92 @@
 	return 0;
 }
 
-static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
-	[0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
-	[1] = {},
+#define X86_RAPL_MODEL_MATCH(model, init)	\
+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_rapl_init_fun {
+	bool apply_quirk;
+	int cntr_mask;
+	struct attribute **attrs;
 };
 
+static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
+	.apply_quirk = false,
+	.cntr_mask = RAPL_IDX_CLN,
+	.attrs = rapl_events_cln_attr,
+};
+
+static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
+	.apply_quirk = true,
+	.cntr_mask = RAPL_IDX_SRV,
+	.attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
+	.apply_quirk = false,
+	.cntr_mask = RAPL_IDX_HSW,
+	.attrs = rapl_events_hsw_attr,
+};
+
+static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
+	.apply_quirk = false,
+	.cntr_mask = RAPL_IDX_SRV,
+	.attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
+	.apply_quirk = true,
+	.cntr_mask = RAPL_IDX_KNL,
+	.attrs = rapl_events_knl_attr,
+};
+
+static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
+	.apply_quirk = false,
+	.cntr_mask = RAPL_IDX_SKL_CLN,
+	.attrs = rapl_events_skl_attr,
+};
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
+	X86_RAPL_MODEL_MATCH(42, snb_rapl_init),	/* Sandy Bridge */
+	X86_RAPL_MODEL_MATCH(45, snbep_rapl_init),	/* Sandy Bridge-EP */
+
+	X86_RAPL_MODEL_MATCH(58, snb_rapl_init),	/* Ivy Bridge */
+	X86_RAPL_MODEL_MATCH(62, snbep_rapl_init),	/* IvyTown */
+
+	X86_RAPL_MODEL_MATCH(60, hsw_rapl_init),	/* Haswell */
+	X86_RAPL_MODEL_MATCH(63, hsx_rapl_init),	/* Haswell-Server */
+	X86_RAPL_MODEL_MATCH(69, hsw_rapl_init),	/* Haswell-Celeron */
+	X86_RAPL_MODEL_MATCH(70, hsw_rapl_init),	/* Haswell GT3e */
+
+	X86_RAPL_MODEL_MATCH(61, hsw_rapl_init),	/* Broadwell */
+	X86_RAPL_MODEL_MATCH(71, hsw_rapl_init),	/* Broadwell-H */
+	X86_RAPL_MODEL_MATCH(79, hsx_rapl_init),	/* Broadwell-Server */
+	X86_RAPL_MODEL_MATCH(86, hsx_rapl_init),	/* Broadwell Xeon D */
+
+	X86_RAPL_MODEL_MATCH(87, knl_rapl_init),	/* Knights Landing */
+
+	X86_RAPL_MODEL_MATCH(78, skl_rapl_init),	/* Skylake */
+	X86_RAPL_MODEL_MATCH(94, skl_rapl_init),	/* Skylake H/S */
+	{},
+};
+
+MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
+
 static int __init rapl_pmu_init(void)
 {
-	bool apply_quirk = false;
+	const struct x86_cpu_id *id;
+	struct intel_rapl_init_fun *rapl_init;
+	bool apply_quirk;
 	int ret;
 
-	if (!x86_match_cpu(rapl_cpu_match))
+	id = x86_match_cpu(rapl_cpu_match);
+	if (!id)
 		return -ENODEV;
 
-	switch (boot_cpu_data.x86_model) {
-	case 42: /* Sandy Bridge */
-	case 58: /* Ivy Bridge */
-		rapl_cntr_mask = RAPL_IDX_CLN;
-		rapl_pmu_events_group.attrs = rapl_events_cln_attr;
-		break;
-	case 63: /* Haswell-Server */
-	case 79: /* Broadwell-Server */
-		apply_quirk = true;
-		rapl_cntr_mask = RAPL_IDX_SRV;
-		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-		break;
-	case 60: /* Haswell */
-	case 69: /* Haswell-Celeron */
-	case 70: /* Haswell GT3e */
-	case 61: /* Broadwell */
-	case 71: /* Broadwell-H */
-		rapl_cntr_mask = RAPL_IDX_HSW;
-		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
-		break;
-	case 45: /* Sandy Bridge-EP */
-	case 62: /* IvyTown */
-		rapl_cntr_mask = RAPL_IDX_SRV;
-		rapl_pmu_events_group.attrs = rapl_events_srv_attr;
-		break;
-	case 87: /* Knights Landing */
-		apply_quirk = true;
-		rapl_cntr_mask = RAPL_IDX_KNL;
-		rapl_pmu_events_group.attrs = rapl_events_knl_attr;
-		break;
-	default:
-		return -ENODEV;
-	}
+	rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
+	apply_quirk = rapl_init->apply_quirk;
+	rapl_cntr_mask = rapl_init->cntr_mask;
+	rapl_pmu_events_group.attrs = rapl_init->attrs;
 
 	ret = rapl_check_hw_unit(apply_quirk);
 	if (ret)
@@ -756,7 +845,7 @@
 	if (ret)
 		goto out;
 
-	__perf_cpu_notifier(rapl_cpu_notifier);
+	__register_cpu_notifier(&rapl_cpu_nb);
 	cpu_notifier_register_done();
 	rapl_advertise();
 	return 0;
@@ -767,4 +856,14 @@
 	cpu_notifier_register_done();
 	return ret;
 }
-device_initcall(rapl_pmu_init);
+module_init(rapl_pmu_init);
+
+static void __exit intel_rapl_exit(void)
+{
+	cpu_notifier_register_begin();
+	__unregister_cpu_notifier(&rapl_cpu_nb);
+	perf_pmu_unregister(&rapl_pmus->pmu);
+	cleanup_rapl_pmus();
+	cpu_notifier_register_done();
+}
+module_exit(intel_rapl_exit);

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7012d18..16c1789 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c

@@ -1,3 +1,4 @@
+#include <asm/cpu_device_id.h>
 #include "uncore.h"
 
 static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -21,6 +22,8 @@
 struct event_constraint uncore_constraint_empty =
 	EVENT_CONSTRAINT(0, 0, 0);
 
+MODULE_LICENSE("GPL");
+
 static int uncore_pcibus_to_physid(struct pci_bus *bus)
 {
 	struct pci2phy_map *map;
@@ -754,7 +757,7 @@
 	pmu->registered = false;
 }
 
-static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
 {
 	struct intel_uncore_pmu *pmu = type->pmus;
 	struct intel_uncore_box *box;
@@ -770,7 +773,7 @@
 	}
 }
 
-static void __init uncore_exit_boxes(void *dummy)
+static void uncore_exit_boxes(void *dummy)
 {
 	struct intel_uncore_type **types;
 
@@ -787,7 +790,7 @@
 	kfree(pmu->boxes);
 }
 
-static void __init uncore_type_exit(struct intel_uncore_type *type)
+static void uncore_type_exit(struct intel_uncore_type *type)
 {
 	struct intel_uncore_pmu *pmu = type->pmus;
 	int i;
@@ -804,7 +807,7 @@
 	type->events_group = NULL;
 }
 
-static void __init uncore_types_exit(struct intel_uncore_type **types)
+static void uncore_types_exit(struct intel_uncore_type **types)
 {
 	for (; *types; types++)
 		uncore_type_exit(*types);
@@ -989,46 +992,6 @@
 	size_t size;
 	int ret;
 
-	switch (boot_cpu_data.x86_model) {
-	case 45: /* Sandy Bridge-EP */
-		ret = snbep_uncore_pci_init();
-		break;
-	case 62: /* Ivy Bridge-EP */
-		ret = ivbep_uncore_pci_init();
-		break;
-	case 63: /* Haswell-EP */
-		ret = hswep_uncore_pci_init();
-		break;
-	case 79: /* BDX-EP */
-	case 86: /* BDX-DE */
-		ret = bdx_uncore_pci_init();
-		break;
-	case 42: /* Sandy Bridge */
-		ret = snb_uncore_pci_init();
-		break;
-	case 58: /* Ivy Bridge */
-		ret = ivb_uncore_pci_init();
-		break;
-	case 60: /* Haswell */
-	case 69: /* Haswell Celeron */
-		ret = hsw_uncore_pci_init();
-		break;
-	case 61: /* Broadwell */
-		ret = bdw_uncore_pci_init();
-		break;
-	case 87: /* Knights Landing */
-		ret = knl_uncore_pci_init();
-		break;
-	case 94: /* SkyLake */
-		ret = skl_uncore_pci_init();
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	if (ret)
-		return ret;
-
 	size = max_packages * sizeof(struct pci_extra_dev);
 	uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
 	if (!uncore_extra_pci_dev) {
@@ -1060,7 +1023,7 @@
 	return ret;
 }
 
-static void __init uncore_pci_exit(void)
+static void uncore_pci_exit(void)
 {
 	if (pcidrv_registered) {
 		pcidrv_registered = false;
@@ -1287,46 +1250,6 @@
 {
 	int ret;
 
-	switch (boot_cpu_data.x86_model) {
-	case 26: /* Nehalem */
-	case 30:
-	case 37: /* Westmere */
-	case 44:
-		nhm_uncore_cpu_init();
-		break;
-	case 42: /* Sandy Bridge */
-	case 58: /* Ivy Bridge */
-	case 60: /* Haswell */
-	case 69: /* Haswell */
-	case 70: /* Haswell */
-	case 61: /* Broadwell */
-	case 71: /* Broadwell */
-		snb_uncore_cpu_init();
-		break;
-	case 45: /* Sandy Bridge-EP */
-		snbep_uncore_cpu_init();
-		break;
-	case 46: /* Nehalem-EX */
-	case 47: /* Westmere-EX aka. Xeon E7 */
-		nhmex_uncore_cpu_init();
-		break;
-	case 62: /* Ivy Bridge-EP */
-		ivbep_uncore_cpu_init();
-		break;
-	case 63: /* Haswell-EP */
-		hswep_uncore_cpu_init();
-		break;
-	case 79: /* BDX-EP */
-	case 86: /* BDX-DE */
-		bdx_uncore_cpu_init();
-		break;
-	case 87: /* Knights Landing */
-		knl_uncore_cpu_init();
-		break;
-	default:
-		return -ENODEV;
-	}
-
 	ret = uncore_types_init(uncore_msr_uncores, true);
 	if (ret)
 		goto err;
@@ -1376,20 +1299,123 @@
 	return 0;
 }
 
+#define X86_UNCORE_MODEL_MATCH(model, init)	\
+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_uncore_init_fun {
+	void	(*cpu_init)(void);
+	int	(*pci_init)(void);
+};
+
+static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
+	.cpu_init = nhm_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
+	.cpu_init = snb_uncore_cpu_init,
+	.pci_init = snb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
+	.cpu_init = snb_uncore_cpu_init,
+	.pci_init = ivb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
+	.cpu_init = snb_uncore_cpu_init,
+	.pci_init = hsw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
+	.cpu_init = snb_uncore_cpu_init,
+	.pci_init = bdw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
+	.cpu_init = snbep_uncore_cpu_init,
+	.pci_init = snbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
+	.cpu_init = nhmex_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
+	.cpu_init = ivbep_uncore_cpu_init,
+	.pci_init = ivbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
+	.cpu_init = hswep_uncore_cpu_init,
+	.pci_init = hswep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
+	.cpu_init = bdx_uncore_cpu_init,
+	.pci_init = bdx_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
+	.cpu_init = knl_uncore_cpu_init,
+	.pci_init = knl_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+	.pci_init = skl_uncore_pci_init,
+};
+
+static const struct x86_cpu_id intel_uncore_match[] __initconst = {
+	X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init),	/* Nehalem */
+	X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init),	/* Westmere */
+	X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
+	X86_UNCORE_MODEL_MATCH(42, snb_uncore_init),	/* Sandy Bridge */
+	X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init),	/* Ivy Bridge */
+	X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init),	/* Haswell */
+	X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init),	/* Haswell Celeron */
+	X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init),	/* Haswell */
+	X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init),	/* Broadwell */
+	X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init),	/* Broadwell */
+	X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init),	/* Sandy Bridge-EP */
+	X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init),	/* Nehalem-EX */
+	X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init),	/* Westmere-EX aka. Xeon E7 */
+	X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init),	/* Ivy Bridge-EP */
+	X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init),	/* Haswell-EP */
+	X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init),	/* BDX-EP */
+	X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init),	/* BDX-DE */
+	X86_UNCORE_MODEL_MATCH(87, knl_uncore_init),	/* Knights Landing */
+	X86_UNCORE_MODEL_MATCH(94, skl_uncore_init),	/* SkyLake */
+	{},
+};
+
+MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
+
 static int __init intel_uncore_init(void)
 {
-	int pret, cret, ret;
+	const struct x86_cpu_id *id;
+	struct intel_uncore_init_fun *uncore_init;
+	int pret = 0, cret = 0, ret;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+	id = x86_match_cpu(intel_uncore_match);
+	if (!id)
 		return -ENODEV;
 
-	if (cpu_has_hypervisor)
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return -ENODEV;
 
 	max_packages = topology_max_packages();
 
-	pret = uncore_pci_init();
-	cret = uncore_cpu_init();
+	uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
+	if (uncore_init->pci_init) {
+		pret = uncore_init->pci_init();
+		if (!pret)
+			pret = uncore_pci_init();
+	}
+
+	if (uncore_init->cpu_init) {
+		uncore_init->cpu_init();
+		cret = uncore_cpu_init();
+	}
 
 	if (cret && pret)
 		return -ENODEV;
@@ -1409,4 +1435,14 @@
 	cpu_notifier_register_done();
 	return ret;
 }
-device_initcall(intel_uncore_init);
+module_init(intel_uncore_init);
+
+static void __exit intel_uncore_exit(void)
+{
+	cpu_notifier_register_begin();
+	__unregister_cpu_notifier(&uncore_cpu_nb);
+	uncore_types_exit(uncore_msr_uncores);
+	uncore_pci_exit();
+	cpu_notifier_register_done();
+}
+module_exit(intel_uncore_exit);

diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 8bef19f..85ef3c2 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c

@@ -6,6 +6,8 @@
 	PERF_MSR_MPERF			= 2,
 	PERF_MSR_PPERF			= 3,
 	PERF_MSR_SMI			= 4,
+	PERF_MSR_PTSC			= 5,
+	PERF_MSR_IRPERF			= 6,
 
 	PERF_MSR_EVENT_MAX,
 };
@@ -15,6 +17,16 @@
 	return boot_cpu_has(X86_FEATURE_APERFMPERF);
 }
 
+static bool test_ptsc(int idx)
+{
+	return boot_cpu_has(X86_FEATURE_PTSC);
+}
+
+static bool test_irperf(int idx)
+{
+	return boot_cpu_has(X86_FEATURE_IRPERF);
+}
+
 static bool test_intel(int idx)
 {
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -69,18 +81,22 @@
 	bool	(*test)(int idx);
 };
 
-PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
+PMU_EVENT_ATTR_STRING(tsc,    evattr_tsc,    "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf,  evattr_aperf,  "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf,  evattr_mperf,  "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf,  evattr_pperf,  "event=0x03");
+PMU_EVENT_ATTR_STRING(smi,    evattr_smi,    "event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc,   evattr_ptsc,   "event=0x05");
+PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
 
 static struct perf_msr msr[] = {
-	[PERF_MSR_TSC]   = { 0,			&evattr_tsc,	NULL,		 },
-	[PERF_MSR_APERF] = { MSR_IA32_APERF,	&evattr_aperf,	test_aperfmperf, },
-	[PERF_MSR_MPERF] = { MSR_IA32_MPERF,	&evattr_mperf,	test_aperfmperf, },
-	[PERF_MSR_PPERF] = { MSR_PPERF,		&evattr_pperf,	test_intel,	 },
-	[PERF_MSR_SMI]   = { MSR_SMI_COUNT,	&evattr_smi,	test_intel,	 },
+	[PERF_MSR_TSC]    = { 0,		&evattr_tsc,	NULL,		 },
+	[PERF_MSR_APERF]  = { MSR_IA32_APERF,	&evattr_aperf,	test_aperfmperf, },
+	[PERF_MSR_MPERF]  = { MSR_IA32_MPERF,	&evattr_mperf,	test_aperfmperf, },
+	[PERF_MSR_PPERF]  = { MSR_PPERF,	&evattr_pperf,	test_intel,	 },
+	[PERF_MSR_SMI]    = { MSR_SMI_COUNT,	&evattr_smi,	test_intel,	 },
+	[PERF_MSR_PTSC]   = { MSR_F15H_PTSC,	&evattr_ptsc,	test_ptsc,	 },
+	[PERF_MSR_IRPERF] = { MSR_F17H_IRPERF,	&evattr_irperf,	test_irperf,	 },
 };
 
 static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {

diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ad4dc7f..8bd764d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h

@@ -601,6 +601,7 @@
 	u64		lbr_sel_mask;		   /* LBR_SELECT valid bits */
 	const int	*lbr_sel_map;		   /* lbr_select mappings */
 	bool		lbr_double_abort;	   /* duplicated lbr aborts */
+	bool		lbr_pt_coexist;		   /* LBR may coexist with PT */
 
 	/*
 	 * Intel PT/LBR/BTS are exclusive
@@ -859,6 +860,8 @@
 
 extern struct event_constraint intel_slm_pebs_event_constraints[];
 
+extern struct event_constraint intel_glm_pebs_event_constraints[];
+
 extern struct event_constraint intel_nehalem_pebs_event_constraints[];
 
 extern struct event_constraint intel_westmere_pebs_event_constraints[];
@@ -907,6 +910,8 @@
 
 void intel_pmu_lbr_init_atom(void);
 
+void intel_pmu_lbr_init_slm(void);
+
 void intel_pmu_lbr_init_snb(void);
 
 void intel_pmu_lbr_init_hsw(void);

diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 0552884..2f29f4e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c

@@ -357,7 +357,7 @@
 		put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
 
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
+		if (boot_cpu_has(X86_FEATURE_XSAVE))
 			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
 		else
 			put_user_ex(0, &frame->uc.uc_flags);

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 99afb66..e77a644 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h

@@ -1,11 +1,12 @@
 #ifndef _ASM_X86_ALTERNATIVE_H
 #define _ASM_X86_ALTERNATIVE_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <asm/asm.h>
-#include <asm/ptrace.h>
 
 /*
  * Alternative inline assembly for SMP.
@@ -233,36 +234,6 @@
  */
 #define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
 
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
-				  struct paravirt_patch_site *end)
-{}
-#define __parainstructions	NULL
-#define __parainstructions_end	NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
-
-/*
- * Clear and restore the kernel write-protection flag on the local CPU.
- * Allows the kernel to edit read-only pages.
- * Side-effect: any interrupt handler running between save and restore will have
- * the ability to write to read-only pages.
- *
- * Warning:
- * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
- * no thread can be preempted in the instructions being modified (no iret to an
- * invalid instruction possible) or if the instructions are changed from a
- * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
- */
-extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+#endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_X86_ALTERNATIVE_H */

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 98f25bb..bc27611 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h

@@ -239,10 +239,10 @@
 extern void x2apic_setup(void);
 static inline int x2apic_enabled(void)
 {
-	return cpu_has_x2apic && apic_is_x2apic_enabled();
+	return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled();
 }
 
-#define x2apic_supported()	(cpu_has_x2apic)
+#define x2apic_supported()	(boot_cpu_has(X86_FEATURE_X2APIC))
 #else /* !CONFIG_X86_X2APIC */
 static inline void check_x2apic(void) { }
 static inline void x2apic_setup(void) { }

diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index aa6a317..2b00c77 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h

@@ -17,27 +17,6 @@
 	return address;	/* 0 means none */
 }
 
-/*
- * Return the sanitized length of the EBDA in bytes, if it exists.
- */
-static inline unsigned int get_bios_ebda_length(void)
-{
-	unsigned int address;
-	unsigned int length;
-
-	address = get_bios_ebda();
-	if (!address)
-		return 0;
-
-	/* EBDA length is byte 0 of the EBDA (stored in KiB) */
-	length = *(unsigned char *)phys_to_virt(address);
-	length <<= 10;
-
-	/* Trim the length if it extends beyond 640KiB */
-	length = min_t(unsigned int, (640 * 1024) - address, length);
-	return length;
-}
-
 void reserve_ebda_region(void);
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION

diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6b8d6e8..abd06b1 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h

@@ -12,29 +12,46 @@
 
 /* Minimum kernel alignment, as a power of two */
 #ifdef CONFIG_X86_64
-#define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
+# define MIN_KERNEL_ALIGN_LG2	PMD_SHIFT
 #else
-#define MIN_KERNEL_ALIGN_LG2	(PAGE_SHIFT + THREAD_SIZE_ORDER)
+# define MIN_KERNEL_ALIGN_LG2	(PAGE_SHIFT + THREAD_SIZE_ORDER)
 #endif
 #define MIN_KERNEL_ALIGN	(_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
 
 #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
 	(CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN)
-#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
+# error "Invalid value for CONFIG_PHYSICAL_ALIGN"
 #endif
 
 #ifdef CONFIG_KERNEL_BZIP2
-#define BOOT_HEAP_SIZE             0x400000
+# define BOOT_HEAP_SIZE		0x400000
 #else /* !CONFIG_KERNEL_BZIP2 */
-
-#define BOOT_HEAP_SIZE	0x10000
-
-#endif /* !CONFIG_KERNEL_BZIP2 */
+# define BOOT_HEAP_SIZE		 0x10000
+#endif
 
 #ifdef CONFIG_X86_64
-#define BOOT_STACK_SIZE	0x4000
-#else
-#define BOOT_STACK_SIZE	0x1000
+# define BOOT_STACK_SIZE	0x4000
+
+# define BOOT_INIT_PGT_SIZE	(6*4096)
+# ifdef CONFIG_RANDOMIZE_BASE
+/*
+ * Assuming all cross the 512GB boundary:
+ * 1 page for level4
+ * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
+ * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
+ * Total is 19 pages.
+ */
+#  ifdef CONFIG_X86_VERBOSE_BOOTUP
+#   define BOOT_PGT_SIZE	(19*4096)
+#  else /* !CONFIG_X86_VERBOSE_BOOTUP */
+#   define BOOT_PGT_SIZE	(17*4096)
+#  endif
+# else /* !CONFIG_RANDOMIZE_BASE */
+#  define BOOT_PGT_SIZE		BOOT_INIT_PGT_SIZE
+# endif
+
+#else /* !CONFIG_X86_64 */
+# define BOOT_STACK_SIZE	0x1000
 #endif
 
 #endif /* _ASM_X86_BOOT_H */

diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index d194266..eae33c7 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h

@@ -3,11 +3,10 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#define VCLOCK_NONE	0  /* No vDSO clock available.	*/
-#define VCLOCK_TSC	1  /* vDSO should use vread_tsc.	*/
-#define VCLOCK_HPET	2  /* vDSO should use vread_hpet.	*/
-#define VCLOCK_PVCLOCK	3 /* vDSO should use vread_pvclock. */
-#define VCLOCK_MAX	3
+#define VCLOCK_NONE	0	/* No vDSO clock available.		*/
+#define VCLOCK_TSC	1	/* vDSO should use vread_tsc.		*/
+#define VCLOCK_PVCLOCK	2	/* vDSO should use vread_pvclock.	*/
+#define VCLOCK_MAX	2
 
 struct arch_clocksource_data {
 	int vclock_mode;

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e..5a3b2c1 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h

@@ -307,7 +307,7 @@
 	return (void __user *)round_down(sp - len, 16);
 }
 
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
 {
 #ifdef CONFIG_X86_X32_ABI
 	if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@
 
 static inline bool in_compat_syscall(void)
 {
-	return is_ia32_task() || is_x32_task();
+	return in_ia32_syscall() || in_x32_syscall();
 }
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
 

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3636ec0..25ebb54 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h

@@ -27,6 +27,7 @@
 	CPUID_6_EAX,
 	CPUID_8000_000A_EDX,
 	CPUID_7_ECX,
+	CPUID_8000_0007_EBX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
@@ -118,31 +119,6 @@
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
 } while (0)
 
-#define cpu_has_fpu		boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_pse		boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_fxsr		boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm		boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2		boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_aes		boot_cpu_has(X86_FEATURE_AES)
-#define cpu_has_avx		boot_cpu_has(X86_FEATURE_AVX)
-#define cpu_has_avx2		boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_gbpages		boot_cpu_has(X86_FEATURE_GBPAGES)
-#define cpu_has_arch_perfmon	boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-#define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
-#define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaves		boot_cpu_has(X86_FEATURE_XSAVES)
-#define cpu_has_osxsave		boot_cpu_has(X86_FEATURE_OSXSAVE)
-#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
-/*
- * Do not add any more of those clumsy macros - use static_cpu_has() for
- * fast paths and boot_cpu_has() otherwise!
- */
-
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8f9afef..4a41348 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h

@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS	17	/* N 32-bit words worth of info */
+#define NCAPINTS	18	/* N 32-bit words worth of info */
 #define NBUGINTS	1	/* N 32-bit bug flags */
 
 /*
@@ -177,6 +177,7 @@
 #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
 #define X86_FEATURE_PERFCTR_NB  ( 6*32+24) /* NB performance counter extensions */
 #define X86_FEATURE_BPEXT	(6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PTSC	( 6*32+27) /* performance time-stamp counter */
 #define X86_FEATURE_PERFCTR_L2	( 6*32+28) /* L2 performance counter extensions */
 #define X86_FEATURE_MWAITX	( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
 
@@ -250,6 +251,7 @@
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
 #define X86_FEATURE_CLZERO	(13*32+0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF	(13*32+1) /* Instructions Retired Count */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
 #define X86_FEATURE_DTHERM	(14*32+ 0) /* Digital Thermal Sensor */
@@ -280,6 +282,11 @@
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
 
+/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR	(17*32+1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA	(17*32+3) /* Scalable MCA */
+
 /*
  * BUG word(s)
  */
@@ -294,6 +301,9 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
+
 
 #ifdef CONFIG_X86_32
 /*

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 53748c4..78d1e74 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h

@@ -3,6 +3,7 @@
 
 #include <asm/fpu/api.h>
 #include <asm/pgtable.h>
+#include <asm/processor-flags.h>
 #include <asm/tlb.h>
 
 /*
@@ -28,33 +29,22 @@
 
 #define MAX_CMDLINE_ADDRESS	UINT_MAX
 
+#define ARCH_EFI_IRQ_FLAGS_MASK	X86_EFLAGS_IF
+
 #ifdef CONFIG_X86_32
 
-
 extern unsigned long asmlinkage efi_call_phys(void *, ...);
 
+#define arch_efi_call_virt_setup()	kernel_fpu_begin()
+#define arch_efi_call_virt_teardown()	kernel_fpu_end()
+
 /*
  * Wrap all the virtual calls in a way that forces the parameters on the stack.
  */
-
-/* Use this macro if your virtual returns a non-void value */
-#define efi_call_virt(f, args...) \
+#define arch_efi_call_virt(f, args...)					\
 ({									\
-	efi_status_t __s;						\
-	kernel_fpu_begin();						\
-	__s = ((efi_##f##_t __attribute__((regparm(0)))*)		\
-		efi.systab->runtime->f)(args);				\
-	kernel_fpu_end();						\
-	__s;								\
-})
-
-/* Use this macro if your virtual call does not return any value */
-#define __efi_call_virt(f, args...) \
-({									\
-	kernel_fpu_begin();						\
 	((efi_##f##_t __attribute__((regparm(0)))*)			\
 		efi.systab->runtime->f)(args);				\
-	kernel_fpu_end();						\
 })
 
 #define efi_ioremap(addr, size, type, attr)	ioremap_cache(addr, size)
@@ -78,10 +68,8 @@
 	u64	phys_stack;
 } __packed;
 
-#define efi_call_virt(f, ...)						\
+#define arch_efi_call_virt_setup()					\
 ({									\
-	efi_status_t __s;						\
-									\
 	efi_sync_low_kernel_mappings();					\
 	preempt_disable();						\
 	__kernel_fpu_begin();						\
@@ -91,9 +79,13 @@
 		write_cr3((unsigned long)efi_scratch.efi_pgt);		\
 		__flush_tlb_all();					\
 	}								\
-									\
-	__s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__);	\
-									\
+})
+
+#define arch_efi_call_virt(f, args...)					\
+	efi_call((void *)efi.systab->runtime->f, args)			\
+
+#define arch_efi_call_virt_teardown()					\
+({									\
 	if (efi_scratch.use_pgd) {					\
 		write_cr3(efi_scratch.prev_cr3);			\
 		__flush_tlb_all();					\
@@ -101,15 +93,8 @@
 									\
 	__kernel_fpu_end();						\
 	preempt_enable();						\
-	__s;								\
 })
 
-/*
- * All X86_64 virt calls return non-void values. Thus, use non-void call for
- * virt calls that would be void on X86_32.
- */
-#define __efi_call_virt(f, args...) efi_call_virt(f, args)
-
 extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 					u32 type, u64 attribute);
 
@@ -180,6 +165,8 @@
 extern struct console early_efi_console;
 extern void parse_efi_setup(u64 phys_addr, u32 data_len);
 
+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+
 #ifdef CONFIG_EFI_MIXED
 extern void efi_thunk_runtime_setup(void);
 extern efi_status_t efi_thunk_set_virtual_address_map(
@@ -225,6 +212,11 @@
 #define efi_call_early(f, ...)						\
 	__efi_early()->call(__efi_early()->f, __VA_ARGS__);
 
+#define __efi_call_early(f, ...)					\
+	__efi_early()->call((unsigned long)f, __VA_ARGS__);
+
+#define efi_is_64bit()		__efi_early()->is64
+
 extern bool efi_reboot_required(void);
 
 #else

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 15340e3..fea7724 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h

@@ -176,7 +176,7 @@
 	regs->si = regs->di = regs->bp = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
-	t->fs = t->gs = 0;
+	t->fsbase = t->gsbase = 0;
 	t->fsindex = t->gsindex = 0;
 	t->ds = t->es = ds;
 }
@@ -226,8 +226,8 @@
 	(pr_reg)[18] = (regs)->flags;				\
 	(pr_reg)[19] = (regs)->sp;				\
 	(pr_reg)[20] = (regs)->ss;				\
-	(pr_reg)[21] = current->thread.fs;			\
-	(pr_reg)[22] = current->thread.gs;			\
+	(pr_reg)[21] = current->thread.fsbase;			\
+	(pr_reg)[22] = current->thread.gsbase;			\
 	asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v;	\
 	asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v;	\
 	asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v;	\

diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index e6a8613..3a10616 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h

@@ -4,7 +4,7 @@
 #include <asm/page.h>
 #include <asm-generic/hugetlb.h>
 
-#define hugepages_supported() cpu_has_pse
+#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,

diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index d0afb05..f706041 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h

@@ -5,7 +5,7 @@
 
 static inline bool arch_irq_work_has_interrupt(void)
 {
-	return cpu_has_apic;
+	return boot_cpu_has(X86_FEATURE_APIC);
 }
 
 #endif /* _ASM_IRQ_WORK_H */

diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 332f98c..22a8537 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h

@@ -6,6 +6,8 @@
  * Copyright (C) 2008 Wind River Systems, Inc.
  */
 
+#include <asm/ptrace.h>
+
 /*
  * BUFMAX defines the maximum number of characters in inbound/outbound
  * buffers at least NUMREGBYTES*2 are needed for register packets

diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 79327e9..0ccb26d 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h

@@ -8,40 +8,6 @@
 
 #ifdef CONFIG_X86_32
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-
-/*
- * Make sure the compiler doesn't do anything stupid with the
- * arguments on the stack - they are owned by the *caller*, not
- * the callee. This just fools gcc into not spilling into them,
- * and keeps it from doing tailcall recursion and/or using the
- * stack slots for temporaries, since they are live and "used"
- * all the way to the end of the function.
- *
- * NOTE! On x86-64, all the arguments are in registers, so this
- * only matters on a 32-bit kernel.
- */
-#define asmlinkage_protect(n, ret, args...) \
-	__asmlinkage_protect##n(ret, ##args)
-#define __asmlinkage_protect_n(ret, args...) \
-	__asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args)
-#define __asmlinkage_protect0(ret) \
-	__asmlinkage_protect_n(ret)
-#define __asmlinkage_protect1(ret, arg1) \
-	__asmlinkage_protect_n(ret, "m" (arg1))
-#define __asmlinkage_protect2(ret, arg1, arg2) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
-#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
-#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4))
-#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4), "m" (arg5))
-#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
-	__asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
-			      "m" (arg4), "m" (arg5), "m" (arg6))
-
 #endif /* CONFIG_X86_32 */
 
 #ifdef __ASSEMBLY__

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 92b6f65..8bf766e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h

@@ -104,13 +104,23 @@
 #define MCE_LOG_SIGNATURE	"MACHINECHECK"
 
 /* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_CTL		0xc0002000
+#define MSR_AMD64_SMCA_MC0_STATUS	0xc0002001
+#define MSR_AMD64_SMCA_MC0_ADDR		0xc0002002
 #define MSR_AMD64_SMCA_MC0_MISC0	0xc0002003
 #define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
 #define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
+#define MSR_AMD64_SMCA_MC0_DESTAT	0xc0002008
+#define MSR_AMD64_SMCA_MC0_DEADDR	0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
+#define MSR_AMD64_SMCA_MCx_CTL(x)	(MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_STATUS(x)	(MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_ADDR(x)	(MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISC(x)	(MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DESTAT(x)	(MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DEADDR(x)	(MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
 
 /*
@@ -168,9 +178,18 @@
 
 	      __reserved_0	: 61;
 };
+
+struct mca_msr_regs {
+	u32 (*ctl)	(int bank);
+	u32 (*status)	(int bank);
+	u32 (*addr)	(int bank);
+	u32 (*misc)	(int bank);
+};
+
 extern struct mce_vendor_flags mce_flags;
 
 extern struct mca_config mca_cfg;
+extern struct mca_msr_regs msr_ops;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
 

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8428002..39634819 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h

@@ -115,103 +115,12 @@
 	destroy_context_ldt(mm);
 }
 
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
-			     struct task_struct *tsk)
-{
-	unsigned cpu = smp_processor_id();
+extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+		      struct task_struct *tsk);
 
-	if (likely(prev != next)) {
-#ifdef CONFIG_SMP
-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-
-		/*
-		 * Re-load page tables.
-		 *
-		 * This logic has an ordering constraint:
-		 *
-		 *  CPU 0: Write to a PTE for 'next'
-		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
-		 *  CPU 1: set bit 1 in next's mm_cpumask
-		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
-		 *
-		 * We need to prevent an outcome in which CPU 1 observes
-		 * the new PTE value and CPU 0 observes bit 1 clear in
-		 * mm_cpumask.  (If that occurs, then the IPI will never
-		 * be sent, and CPU 0's TLB will contain a stale entry.)
-		 *
-		 * The bad outcome can occur if either CPU's load is
-		 * reordered before that CPU's store, so both CPUs must
-		 * execute full barriers to prevent this from happening.
-		 *
-		 * Thus, switch_mm needs a full barrier between the
-		 * store to mm_cpumask and any operation that could load
-		 * from next->pgd.  TLB fills are special and can happen
-		 * due to instruction fetches or for no reason at all,
-		 * and neither LOCK nor MFENCE orders them.
-		 * Fortunately, load_cr3() is serializing and gives the
-		 * ordering guarantee we need.
-		 *
-		 */
-		load_cr3(next->pgd);
-
-		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-
-		/* Stop flush ipis for the previous mm */
-		cpumask_clear_cpu(cpu, mm_cpumask(prev));
-
-		/* Load per-mm CR4 state */
-		load_mm_cr4(next);
-
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
-		/*
-		 * Load the LDT, if the LDT is different.
-		 *
-		 * It's possible that prev->context.ldt doesn't match
-		 * the LDT register.  This can happen if leave_mm(prev)
-		 * was called and then modify_ldt changed
-		 * prev->context.ldt but suppressed an IPI to this CPU.
-		 * In this case, prev->context.ldt != NULL, because we
-		 * never set context.ldt to NULL while the mm still
-		 * exists.  That means that next->context.ldt !=
-		 * prev->context.ldt, because mms never share an LDT.
-		 */
-		if (unlikely(prev->context.ldt != next->context.ldt))
-			load_mm_ldt(next);
-#endif
-	}
-#ifdef CONFIG_SMP
-	  else {
-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
-		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-
-		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
-			/*
-			 * On established mms, the mm_cpumask is only changed
-			 * from irq context, from ptep_clear_flush() while in
-			 * lazy tlb mode, and here. Irqs are blocked during
-			 * schedule, protecting us from simultaneous changes.
-			 */
-			cpumask_set_cpu(cpu, mm_cpumask(next));
-
-			/*
-			 * We were in lazy tlb mode and leave_mm disabled
-			 * tlb flush IPI delivery. We must reload CR3
-			 * to make sure to use no freed page tables.
-			 *
-			 * As above, load_cr3() is serializing and orders TLB
-			 * fills with respect to the mm_cpumask write.
-			 */
-			load_cr3(next->pgd);
-			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-			load_mm_cr4(next);
-			load_mm_ldt(next);
-		}
-	}
-#endif
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			       struct task_struct *tsk);
+#define switch_mm_irqs_off switch_mm_irqs_off
 
 #define activate_mm(prev, next)			\
 do {						\

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5b3c9a5..5a73a9c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h

@@ -89,27 +89,16 @@
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
 #define MSR_IA32_RTIT_CTL		0x00000570
-#define RTIT_CTL_TRACEEN		BIT(0)
-#define RTIT_CTL_CYCLEACC		BIT(1)
-#define RTIT_CTL_OS			BIT(2)
-#define RTIT_CTL_USR			BIT(3)
-#define RTIT_CTL_CR3EN			BIT(7)
-#define RTIT_CTL_TOPA			BIT(8)
-#define RTIT_CTL_MTC_EN			BIT(9)
-#define RTIT_CTL_TSC_EN			BIT(10)
-#define RTIT_CTL_DISRETC		BIT(11)
-#define RTIT_CTL_BRANCH_EN		BIT(13)
-#define RTIT_CTL_MTC_RANGE_OFFSET	14
-#define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
-#define RTIT_CTL_CYC_THRESH_OFFSET	19
-#define RTIT_CTL_CYC_THRESH		(0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
-#define RTIT_CTL_PSB_FREQ_OFFSET	24
-#define RTIT_CTL_PSB_FREQ      		(0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
 #define MSR_IA32_RTIT_STATUS		0x00000571
-#define RTIT_STATUS_CONTEXTEN		BIT(1)
-#define RTIT_STATUS_TRIGGEREN		BIT(2)
-#define RTIT_STATUS_ERROR		BIT(4)
-#define RTIT_STATUS_STOPPED		BIT(5)
+#define MSR_IA32_RTIT_STATUS		0x00000571
+#define MSR_IA32_RTIT_ADDR0_A		0x00000580
+#define MSR_IA32_RTIT_ADDR0_B		0x00000581
+#define MSR_IA32_RTIT_ADDR1_A		0x00000582
+#define MSR_IA32_RTIT_ADDR1_B		0x00000583
+#define MSR_IA32_RTIT_ADDR2_A		0x00000584
+#define MSR_IA32_RTIT_ADDR2_B		0x00000585
+#define MSR_IA32_RTIT_ADDR3_A		0x00000586
+#define MSR_IA32_RTIT_ADDR3_B		0x00000587
 #define MSR_IA32_RTIT_CR3_MATCH		0x00000572
 #define MSR_IA32_RTIT_OUTPUT_BASE	0x00000560
 #define MSR_IA32_RTIT_OUTPUT_MASK	0x00000561
@@ -205,6 +194,8 @@
 #define MSR_CONFIG_TDP_CONTROL		0x0000064B
 #define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
 
+#define MSR_PLATFORM_ENERGY_STATUS	0x0000064D
+
 #define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
 #define MSR_PKG_ANY_CORE_C0_RES		0x00000659
 #define MSR_PKG_ANY_GFXE_C0_RES		0x0000065A
@@ -315,6 +306,9 @@
 #define MSR_AMD64_IBSOPDATA4		0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
 
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF			0xc00000e9
+
 /* Fam 16h MSRs */
 #define MSR_F16H_L2I_PERF_CTL		0xc0010230
 #define MSR_F16H_L2I_PERF_CTR		0xc0010231
@@ -328,6 +322,7 @@
 #define MSR_F15H_PERF_CTR		0xc0010201
 #define MSR_F15H_NB_PERF_CTL		0xc0010240
 #define MSR_F15H_NB_PERF_CTR		0xc0010241
+#define MSR_F15H_PTSC			0xc0010280
 #define MSR_F15H_IC_CFG			0xc0011021
 
 /* Fam 10h MSRs */

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 7a79ee2..7dc1d8f 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h

@@ -84,7 +84,10 @@
 {
 	DECLARE_ARGS(val, low, high);
 
-	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+	asm volatile("1: rdmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+		     : EAX_EDX_RET(val, low, high) : "c" (msr));
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 		do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
 	return EAX_EDX_VAL(val, low, high);
@@ -98,7 +101,10 @@
 	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
 		     "1:\n\t"
 		     ".section .fixup,\"ax\"\n\t"
-		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
+		     "3: mov %[fault],%[err]\n\t"
+		     "xorl %%eax, %%eax\n\t"
+		     "xorl %%edx, %%edx\n\t"
+		     "jmp 1b\n\t"
 		     ".previous\n\t"
 		     _ASM_EXTABLE(2b, 3b)
 		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
@@ -108,10 +114,14 @@
 	return EAX_EDX_VAL(val, low, high);
 }
 
-static inline void native_write_msr(unsigned int msr,
-				    unsigned low, unsigned high)
+/* Can be uninlined because referenced by paravirt */
+notrace static inline void native_write_msr(unsigned int msr,
+					    unsigned low, unsigned high)
 {
-	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+	asm volatile("1: wrmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+		     : : "c" (msr), "a"(low), "d" (high) : "memory");
 	if (msr_tracepoint_active(__tracepoint_read_msr))
 		do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }

diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index b94f6f6..dbff145 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h

@@ -24,6 +24,7 @@
 #define _ASM_X86_MTRR_H
 
 #include <uapi/asm/mtrr.h>
+#include <asm/pat.h>
 
 
 /*
@@ -83,9 +84,12 @@
 static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 {
 }
+static inline void mtrr_bp_init(void)
+{
+	pat_disable("MTRRs disabled, skipping PAT initialization too.");
+}
 
 #define mtrr_ap_init() do {} while (0)
-#define mtrr_bp_init() do {} while (0)
 #define set_mtrr_aps_delayed_init() do {} while (0)
 #define mtrr_aps_init() do {} while (0)
 #define mtrr_bp_restore() do {} while (0)

diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 802dde3..cf8f619 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h

@@ -37,7 +37,10 @@
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
+#ifndef __pa
 #define __pa(x)		__phys_addr((unsigned long)(x))
+#endif
+
 #define __pa_nodebug(x)	__phys_addr_nodebug((unsigned long)(x))
 /* __pa_symbol should be used for C visible symbols.
    This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@
 #define __pa_symbol(x) \
 	__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
 
+#ifndef __va
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif
 
 #define __boot_va(x)		__va(x)
 #define __boot_pa(x)		__pa(x)

diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 4928cf0..d5c2f8b 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h

@@ -47,12 +47,10 @@
  * are fully set up. If kernel ASLR is configured, it can extend the
  * kernel page table mapping, reducing the size of the modules area.
  */
-#define KERNEL_IMAGE_SIZE_DEFAULT      (512 * 1024 * 1024)
-#if defined(CONFIG_RANDOMIZE_BASE) && \
-	CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT
-#define KERNEL_IMAGE_SIZE   CONFIG_RANDOMIZE_BASE_MAX_OFFSET
+#if defined(CONFIG_RANDOMIZE_BASE)
+#define KERNEL_IMAGE_SIZE	(1024 * 1024 * 1024)
 #else
-#define KERNEL_IMAGE_SIZE      KERNEL_IMAGE_SIZE_DEFAULT
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
 #endif
 
 #endif /* _ASM_X86_PAGE_64_DEFS_H */

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 601f1b8..2970d22 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h

@@ -15,17 +15,6 @@
 #include <linux/cpumask.h>
 #include <asm/frame.h>
 
-static inline int paravirt_enabled(void)
-{
-	return pv_info.paravirt_enabled;
-}
-
-static inline int paravirt_has_feature(unsigned int feature)
-{
-	WARN_ON_ONCE(!pv_info.paravirt_enabled);
-	return (pv_info.features & feature);
-}
-
 static inline void load_sp0(struct tss_struct *tss,
 			     struct thread_struct *thread)
 {
@@ -130,21 +119,31 @@
 
 #define get_kernel_rpl()  (pv_info.kernel_rpl)
 
-static inline u64 paravirt_read_msr(unsigned msr, int *err)
+static inline u64 paravirt_read_msr(unsigned msr)
 {
-	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
+	return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr);
 }
 
-static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
+static inline void paravirt_write_msr(unsigned msr,
+				      unsigned low, unsigned high)
 {
-	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
+	return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
 }
 
-/* These should all do BUG_ON(_err), but our headers are too tangled. */
+static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+{
+	return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err);
+}
+
+static inline int paravirt_write_msr_safe(unsigned msr,
+					  unsigned low, unsigned high)
+{
+	return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high);
+}
+
 #define rdmsr(msr, val1, val2)			\
 do {						\
-	int _err;				\
-	u64 _l = paravirt_read_msr(msr, &_err);	\
+	u64 _l = paravirt_read_msr(msr);	\
 	val1 = (u32)_l;				\
 	val2 = _l >> 32;			\
 } while (0)
@@ -156,8 +155,7 @@
 
 #define rdmsrl(msr, val)			\
 do {						\
-	int _err;				\
-	val = paravirt_read_msr(msr, &_err);	\
+	val = paravirt_read_msr(msr);		\
 } while (0)
 
 static inline void wrmsrl(unsigned msr, u64 val)
@@ -165,23 +163,23 @@
 	wrmsr(msr, (u32)val, (u32)(val>>32));
 }
 
-#define wrmsr_safe(msr, a, b)	paravirt_write_msr(msr, a, b)
+#define wrmsr_safe(msr, a, b)	paravirt_write_msr_safe(msr, a, b)
 
 /* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b)			\
-({						\
-	int _err;				\
-	u64 _l = paravirt_read_msr(msr, &_err);	\
-	(*a) = (u32)_l;				\
-	(*b) = _l >> 32;			\
-	_err;					\
+#define rdmsr_safe(msr, a, b)				\
+({							\
+	int _err;					\
+	u64 _l = paravirt_read_msr_safe(msr, &_err);	\
+	(*a) = (u32)_l;					\
+	(*b) = _l >> 32;				\
+	_err;						\
 })
 
 static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
 {
 	int err;
 
-	*p = paravirt_read_msr(msr, &err);
+	*p = paravirt_read_msr_safe(msr, &err);
 	return err;
 }
 

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index e8c2326..7fa9e77 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h

@@ -69,15 +69,9 @@
 	u16 extra_user_64bit_cs;  /* __USER_CS if none */
 #endif
 
-	int paravirt_enabled;
-	unsigned int features;	  /* valid only if paravirt_enabled is set */
 	const char *name;
 };
 
-#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
-/* Supported features */
-#define PV_SUPPORTED_RTC        (1<<0)
-
 struct pv_init_ops {
 	/*
 	 * Patch may replace one of the defined code sequences with
@@ -155,10 +149,16 @@
 	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
 		      unsigned int *ecx, unsigned int *edx);
 
-	/* MSR, PMC and TSR operations.
-	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-	u64 (*read_msr)(unsigned int msr, int *err);
-	int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+	/* Unsafe MSR operations.  These will warn or panic on failure. */
+	u64 (*read_msr)(unsigned int msr);
+	void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+	/*
+	 * Safe MSR operations.
+	 * read sets err to 0 or -EIO.  write returns 0 or -EIO.
+	 */
+	u64 (*read_msr_safe)(unsigned int msr, int *err);
+	int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
 
 	u64 (*read_pmc)(int counter);
 

diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index ca6c228..0b1ff4c 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h

@@ -5,8 +5,8 @@
 #include <asm/pgtable_types.h>
 
 bool pat_enabled(void);
+void pat_disable(const char *reason);
 extern void pat_init(void);
-void pat_init_cache_modes(u64);
 
 extern int reserve_memtype(u64 start, u64 end,
 		enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 97f3242..f86491a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h

@@ -183,7 +183,7 @@
 
 static inline int has_transparent_hugepage(void)
 {
-	return cpu_has_pse;
+	return boot_cpu_has(X86_FEATURE_PSE);
 }
 
 #ifdef __HAVE_ARCH_PTE_DEVMAP

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9264476..62c6cc3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h

@@ -388,9 +388,16 @@
 	unsigned long		ip;
 #endif
 #ifdef CONFIG_X86_64
-	unsigned long		fs;
+	unsigned long		fsbase;
+	unsigned long		gsbase;
+#else
+	/*
+	 * XXX: this could presumably be unsigned short.  Alternatively,
+	 * 32-bit kernels could be taught to use fsindex instead.
+	 */
+	unsigned long fs;
+	unsigned long gs;
 #endif
-	unsigned long		gs;
 
 	/* Save middle states of ptrace breakpoints */
 	struct perf_event	*ptrace_bps[HBP_NUM];
@@ -473,8 +480,6 @@
 #include <asm/paravirt.h>
 #else
 #define __cpuid			native_cpuid
-#define paravirt_enabled()	0
-#define paravirt_has(x) 	0
 
 static inline void load_sp0(struct tss_struct *tss,
 			    struct thread_struct *thread)

diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ceec86eb..453744c 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h

@@ -99,26 +99,36 @@
 /*
  * lock for writing
  */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
-	long tmp;
-	asm volatile("# beginning down_write\n\t"
-		     LOCK_PREFIX "  xadd      %1,(%2)\n\t"
-		     /* adds 0xffff0001, returns the old value */
-		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
-		     /* was the active mask 0 before? */
-		     "  jz        1f\n"
-		     "  call call_rwsem_down_write_failed\n"
-		     "1:\n"
-		     "# ending down_write"
-		     : "+m" (sem->count), "=d" (tmp)
-		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory", "cc");
-}
+#define ____down_write(sem, slow_path)			\
+({							\
+	long tmp;					\
+	struct rw_semaphore* ret;			\
+	asm volatile("# beginning down_write\n\t"	\
+		     LOCK_PREFIX "  xadd      %1,(%3)\n\t"	\
+		     /* adds 0xffff0001, returns the old value */ \
+		     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
+		     /* was the active mask 0 before? */\
+		     "  jz        1f\n"			\
+		     "  call " slow_path "\n"		\
+		     "1:\n"				\
+		     "# ending down_write"		\
+		     : "+m" (sem->count), "=d" (tmp), "=a" (ret)	\
+		     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
+		     : "memory", "cc");			\
+	ret;						\
+})
 
 static inline void __down_write(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	____down_write(sem, "call_rwsem_down_write_failed");
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+	if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
+		return -EINTR;
+
+	return 0;
 }
 
 /*

diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 7d5a192..1549caa0 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h

@@ -2,6 +2,7 @@
 #define _ASM_X86_SEGMENT_H
 
 #include <linux/const.h>
+#include <asm/alternative.h>
 
 /*
  * Constructor for a conventional segment GDT (or LDT) entry.
@@ -207,13 +208,6 @@
 #define __USER_CS			(GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
 #define __PER_CPU_SEG			(GDT_ENTRY_PER_CPU*8 + 3)
 
-/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
-#define FS_TLS				0
-#define GS_TLS				1
-
-#define GS_TLS_SEL			((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL			((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
 #endif
 
 #ifndef CONFIG_PARAVIRT
@@ -249,10 +243,13 @@
 #endif
 
 /*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
+ * Load a segment. Fall back on loading the zero segment if something goes
+ * wrong.  This variant assumes that loading zero fully clears the segment.
+ * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any
+ * failure to fully clear the cached descriptor is only observable for
+ * FS and GS.
  */
-#define loadsegment(seg, value)						\
+#define __loadsegment_simple(seg, value)				\
 do {									\
 	unsigned short __val = (value);					\
 									\
@@ -269,6 +266,38 @@
 		     : "+r" (__val) : : "memory");			\
 } while (0)
 
+#define __loadsegment_ss(value) __loadsegment_simple(ss, (value))
+#define __loadsegment_ds(value) __loadsegment_simple(ds, (value))
+#define __loadsegment_es(value) __loadsegment_simple(es, (value))
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On 32-bit systems, the hidden parts of FS and GS are unobservable if
+ * the selector is NULL, so there's no funny business here.
+ */
+#define __loadsegment_fs(value) __loadsegment_simple(fs, (value))
+#define __loadsegment_gs(value) __loadsegment_simple(gs, (value))
+
+#else
+
+static inline void __loadsegment_fs(unsigned short value)
+{
+	asm volatile("						\n"
+		     "1:	movw %0, %%fs			\n"
+		     "2:					\n"
+
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+
+		     : : "rm" (value) : "memory");
+}
+
+/* __loadsegment_gs is intentionally undefined.  Use load_gs_index instead. */
+
+#endif
+
+#define loadsegment(seg, value) __loadsegment_ ## seg (value)
+
 /*
  * Save a segment register away:
  */

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 11af24e..ac1d5da 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h

@@ -6,6 +6,7 @@
 #define COMMAND_LINE_SIZE 2048
 
 #include <linux/linkage.h>
+#include <asm/page_types.h>
 
 #ifdef __i386__
 

diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b..8f321a1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h

@@ -39,8 +39,7 @@
 	 */								\
 	unsigned long ebx, ecx, edx, esi, edi;				\
 									\
-	asm volatile("pushfl\n\t"		/* save    flags */	\
-		     "pushl %%ebp\n\t"		/* save    EBP   */	\
+	asm volatile("pushl %%ebp\n\t"		/* save    EBP   */	\
 		     "movl %%esp,%[prev_sp]\n\t"	/* save    ESP   */ \
 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
 		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
@@ -49,7 +48,6 @@
 		     "jmp __switch_to\n"	/* regparm call  */	\
 		     "1:\t"						\
 		     "popl %%ebp\n\t"		/* restore EBP   */	\
-		     "popfl\n"			/* restore flags */	\
 									\
 		     /* output parameters */				\
 		     : [prev_sp] "=m" (prev->thread.sp),		\

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
new file mode 100644
index 0000000..9039506
--- /dev/null
+++ b/arch/x86/include/asm/text-patching.h

@@ -0,0 +1,40 @@
+#ifndef _ASM_X86_TEXT_PATCHING_H
+#define _ASM_X86_TEXT_PATCHING_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+		    struct paravirt_patch_site *end);
+#else
+static inline void apply_paravirt(struct paravirt_patch_site *start,
+				  struct paravirt_patch_site *end)
+{}
+#define __parainstructions	NULL
+#define __parainstructions_end	NULL
+#endif
+
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
+/*
+ * Clear and restore the kernel write-protection flag on the local CPU.
+ * Allows the kernel to edit read-only pages.
+ * Side-effect: any interrupt handler running between save and restore will have
+ * the ability to write to read-only pages.
+ *
+ * Warning:
+ * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
+ * no thread can be preempted in the instructions being modified (no iret to an
+ * invalid instruction possible) or if the instructions are changed from a
+ * consistent state to another consistent state atomically.
+ * On the local CPU you need to be protected again NMI or MCE handlers seeing an
+ * inconsistent instruction while you patch.
+ */
+extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern int poke_int3_handler(struct pt_regs *regs);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+
+#endif /* _ASM_X86_TEXT_PATCHING_H */

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84d..30c133a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h

@@ -255,7 +255,7 @@
 	return true;
 }
 
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
 {
 #ifdef CONFIG_X86_32
 	return true;

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 1fde8d5..4e5be94 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h

@@ -181,7 +181,7 @@
 
 static inline void __flush_tlb_all(void)
 {
-	if (cpu_has_pge)
+	if (static_cpu_has(X86_FEATURE_PGE))
 		__flush_tlb_global();
 	else
 		__flush_tlb();

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 174c421..7428697 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h

@@ -22,7 +22,7 @@
 static inline cycles_t get_cycles(void)
 {
 #ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
+	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return 0;
 #endif
 

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2e7513d..12f9653 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h

@@ -118,7 +118,7 @@
 
 extern int fixup_exception(struct pt_regs *regs, int trapnr);
 extern bool ex_has_fault_handler(unsigned long ip);
-extern int early_fixup_exception(unsigned long *ip);
+extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
 /*
  * These are the main single-value transfer routines.  They automatically

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 71605c7..c852590 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h

@@ -51,15 +51,66 @@
 	BIOS_STATUS_UNAVAIL		= -EBUSY
 };
 
+/* Address map parameters */
+struct uv_gam_parameters {
+	u64	mmr_base;
+	u64	gru_base;
+	u8	mmr_shift;	/* Convert PNode to MMR space offset */
+	u8	gru_shift;	/* Convert PNode to GRU space offset */
+	u8	gpa_shift;	/* Size of offset field in GRU phys addr */
+	u8	unused1;
+};
+
+/* UV_TABLE_GAM_RANGE_ENTRY values */
+#define UV_GAM_RANGE_TYPE_UNUSED	0 /* End of table */
+#define UV_GAM_RANGE_TYPE_RAM		1 /* Normal RAM */
+#define UV_GAM_RANGE_TYPE_NVRAM		2 /* Non-volatile memory */
+#define UV_GAM_RANGE_TYPE_NV_WINDOW	3 /* NVMDIMM block window */
+#define UV_GAM_RANGE_TYPE_NV_MAILBOX	4 /* NVMDIMM mailbox */
+#define UV_GAM_RANGE_TYPE_HOLE		5 /* Unused address range */
+#define UV_GAM_RANGE_TYPE_MAX		6
+
+/* The structure stores PA bits 56:26, for 64MB granularity */
+#define UV_GAM_RANGE_SHFT		26		/* 64MB */
+
+struct uv_gam_range_entry {
+	char	type;		/* Entry type: GAM_RANGE_TYPE_UNUSED, etc. */
+	char	unused1;
+	u16	nasid;		/* HNasid */
+	u16	sockid;		/* Socket ID, high bits of APIC ID */
+	u16	pnode;		/* Index to MMR and GRU spaces */
+	u32	pxm;		/* ACPI proximity domain number */
+	u32	limit;		/* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
+};
+
+#define	UV_SYSTAB_SIG			"UVST"
+#define	UV_SYSTAB_VERSION_1		1	/* UV1/2/3 BIOS version */
+#define	UV_SYSTAB_VERSION_UV4		0x400	/* UV4 BIOS base version */
+#define	UV_SYSTAB_VERSION_UV4_1		0x401	/* + gpa_shift */
+#define	UV_SYSTAB_VERSION_UV4_2		0x402	/* + TYPE_NVRAM/WINDOW/MBOX */
+#define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_2
+
+#define	UV_SYSTAB_TYPE_UNUSED		0	/* End of table (offset == 0) */
+#define	UV_SYSTAB_TYPE_GAM_PARAMS	1	/* GAM PARAM conversions */
+#define	UV_SYSTAB_TYPE_GAM_RNG_TBL	2	/* GAM entry table */
+#define	UV_SYSTAB_TYPE_MAX		3
+
 /*
  * The UV system table describes specific firmware
  * capabilities available to the Linux kernel at runtime.
  */
 struct uv_systab {
-	char signature[4];	/* must be "UVST" */
+	char signature[4];	/* must be UV_SYSTAB_SIG */
 	u32 revision;		/* distinguish different firmware revs */
 	u64 function;		/* BIOS runtime callback function ptr */
+	u32 size;		/* systab size (starting with _VERSION_UV4) */
+	struct {
+		u32 type:8;	/* type of entry */
+		u32 offset:24;	/* byte offset from struct start to entry */
+	} entry[1];		/* additional entries follow */
 };
+extern struct uv_systab *uv_systab;
+/* (... end of definitions from UV BIOS ...) */
 
 enum {
 	BIOS_FREQ_BASE_PLATFORM = 0,
@@ -99,7 +150,11 @@
 extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
 extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
 
+#ifdef CONFIG_EFI
 extern void uv_bios_init(void);
+#else
+void uv_bios_init(void) { }
+#endif
 
 extern unsigned long sn_rtc_cycles_per_second;
 extern int uv_type;
@@ -107,7 +162,7 @@
 extern long sn_coherency_id;
 extern long sn_region_size;
 extern long system_serial_number;
-#define partition_coherence_id()	(sn_coherency_id)
+#define uv_partition_coherence_id()	(sn_coherency_id)
 
 extern struct kobject *sgi_uv_kobj;	/* /sys/firmware/sgi_uv */
 

diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index fc808b8..cc44d92 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h

@@ -598,7 +598,7 @@
 	int			timeout_tries;
 	int			ipi_attempts;
 	int			conseccompletes;
-	short			nobau;
+	bool			nobau;
 	short			baudisabled;
 	short			cpu;
 	short			osnode;

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index ea707478..097b80c 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h

@@ -16,9 +16,11 @@
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/io.h>
+#include <linux/topology.h>
 #include <asm/types.h>
 #include <asm/percpu.h>
 #include <asm/uv/uv_mmrs.h>
+#include <asm/uv/bios.h>
 #include <asm/irq_vectors.h>
 #include <asm/io_apic.h>
 
@@ -103,7 +105,6 @@
  *	      processor APICID register.
  */
 
-
 /*
  * Maximum number of bricks in all partitions and in all coherency domains.
  * This is the total number of bricks accessible in the numalink fabric. It
@@ -127,6 +128,7 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_BLADES * 2)
 
+/* System Controller Interface Reg info */
 struct uv_scir_s {
 	struct timer_list timer;
 	unsigned long	offset;
@@ -137,71 +139,173 @@
 	unsigned char	enabled;
 };
 
+/* GAM (globally addressed memory) range table */
+struct uv_gam_range_s {
+	u32	limit;		/* PA bits 56:26 (GAM_RANGE_SHFT) */
+	u16	nasid;		/* node's global physical address */
+	s8	base;		/* entry index of node's base addr */
+	u8	reserved;
+};
+
 /*
  * The following defines attributes of the HUB chip. These attributes are
- * frequently referenced and are kept in the per-cpu data areas of each cpu.
- * They are kept together in a struct to minimize cache misses.
+ * frequently referenced and are kept in a common per hub struct.
+ * After setup, the struct is read only, so it should be readily
+ * available in the L3 cache on the cpu socket for the node.
  */
 struct uv_hub_info_s {
 	unsigned long		global_mmr_base;
+	unsigned long		global_mmr_shift;
 	unsigned long		gpa_mask;
-	unsigned int		gnode_extra;
+	unsigned short		*socket_to_node;
+	unsigned short		*socket_to_pnode;
+	unsigned short		*pnode_to_socket;
+	struct uv_gam_range_s	*gr_table;
+	unsigned short		min_socket;
+	unsigned short		min_pnode;
+	unsigned char		m_val;
+	unsigned char		n_val;
+	unsigned char		gr_table_len;
 	unsigned char		hub_revision;
 	unsigned char		apic_pnode_shift;
+	unsigned char		gpa_shift;
 	unsigned char		m_shift;
 	unsigned char		n_lshift;
+	unsigned int		gnode_extra;
 	unsigned long		gnode_upper;
 	unsigned long		lowmem_remap_top;
 	unsigned long		lowmem_remap_base;
+	unsigned long		global_gru_base;
+	unsigned long		global_gru_shift;
 	unsigned short		pnode;
 	unsigned short		pnode_mask;
 	unsigned short		coherency_domain_number;
 	unsigned short		numa_blade_id;
-	unsigned char		blade_processor_id;
-	unsigned char		m_val;
-	unsigned char		n_val;
-	struct uv_scir_s	scir;
+	unsigned short		nr_possible_cpus;
+	unsigned short		nr_online_cpus;
+	short			memory_nid;
 };
 
-DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
-#define uv_hub_info		this_cpu_ptr(&__uv_hub_info)
-#define uv_cpu_hub_info(cpu)	(&per_cpu(__uv_hub_info, cpu))
+/* CPU specific info with a pointer to the hub common info struct */
+struct uv_cpu_info_s {
+	void			*p_uv_hub_info;
+	unsigned char		blade_cpu_id;
+	struct uv_scir_s	scir;
+};
+DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
+
+#define uv_cpu_info		this_cpu_ptr(&__uv_cpu_info)
+#define uv_cpu_info_per(cpu)	(&per_cpu(__uv_cpu_info, cpu))
+
+#define	uv_scir_info		(&uv_cpu_info->scir)
+#define	uv_cpu_scir_info(cpu)	(&uv_cpu_info_per(cpu)->scir)
+
+/* Node specific hub common info struct */
+extern void **__uv_hub_info_list;
+static inline struct uv_hub_info_s *uv_hub_info_list(int node)
+{
+	return (struct uv_hub_info_s *)__uv_hub_info_list[node];
+}
+
+static inline struct uv_hub_info_s *_uv_hub_info(void)
+{
+	return (struct uv_hub_info_s *)uv_cpu_info->p_uv_hub_info;
+}
+#define	uv_hub_info	_uv_hub_info()
+
+static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
+{
+	return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
+}
+
+#define	UV_HUB_INFO_VERSION	0x7150
+extern int uv_hub_info_version(void);
+static inline int uv_hub_info_check(int version)
+{
+	if (uv_hub_info_version() == version)
+		return 0;
+
+	pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n",
+		uv_hub_info_version(), version);
+
+	BUG();	/* Catastrophic - cannot continue on unknown UV system */
+}
+#define	_uv_hub_info_check()	uv_hub_info_check(UV_HUB_INFO_VERSION)
 
 /*
- * Hub revisions less than UV2_HUB_REVISION_BASE are UV1 hubs. All UV2
- * hubs have revision numbers greater than or equal to UV2_HUB_REVISION_BASE.
+ * HUB revision ranges for each UV HUB architecture.
  * This is a software convention - NOT the hardware revision numbers in
  * the hub chip.
  */
 #define UV1_HUB_REVISION_BASE		1
 #define UV2_HUB_REVISION_BASE		3
 #define UV3_HUB_REVISION_BASE		5
+#define UV4_HUB_REVISION_BASE		7
 
+#ifdef	UV1_HUB_IS_SUPPORTED
 static inline int is_uv1_hub(void)
 {
 	return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
 }
+#else
+static inline int is_uv1_hub(void)
+{
+	return 0;
+}
+#endif
 
+#ifdef	UV2_HUB_IS_SUPPORTED
 static inline int is_uv2_hub(void)
 {
 	return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
 		(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
 }
+#else
+static inline int is_uv2_hub(void)
+{
+	return 0;
+}
+#endif
 
+#ifdef	UV3_HUB_IS_SUPPORTED
 static inline int is_uv3_hub(void)
 {
-	return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE;
+	return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
+		(uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
+}
+#else
+static inline int is_uv3_hub(void)
+{
+	return 0;
+}
+#endif
+
+#ifdef	UV4_HUB_IS_SUPPORTED
+static inline int is_uv4_hub(void)
+{
+	return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
+}
+#else
+static inline int is_uv4_hub(void)
+{
+	return 0;
+}
+#endif
+
+static inline int is_uvx_hub(void)
+{
+	if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
+		return uv_hub_info->hub_revision;
+
+	return 0;
 }
 
 static inline int is_uv_hub(void)
 {
+#ifdef	UV1_HUB_IS_SUPPORTED
 	return uv_hub_info->hub_revision;
-}
-
-/* code common to uv2 and uv3 only */
-static inline int is_uvx_hub(void)
-{
-	return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
+#endif
+	return is_uvx_hub();
 }
 
 union uvh_apicid {
@@ -243,24 +347,42 @@
 #define UV3_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
 #define UV3_GLOBAL_MMR32_SIZE		(32UL * 1024 * 1024)
 
-#define UV_LOCAL_MMR_BASE		(is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
-					(is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
-							UV3_LOCAL_MMR_BASE))
-#define UV_GLOBAL_MMR32_BASE		(is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\
-					(is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\
-							UV3_GLOBAL_MMR32_BASE))
-#define UV_LOCAL_MMR_SIZE		(is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
-					(is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
-							UV3_LOCAL_MMR_SIZE))
-#define UV_GLOBAL_MMR32_SIZE		(is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\
-					(is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\
-							UV3_GLOBAL_MMR32_SIZE))
+#define UV4_LOCAL_MMR_BASE		0xfa000000UL
+#define UV4_GLOBAL_MMR32_BASE		0xfc000000UL
+#define UV4_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE		(16UL * 1024 * 1024)
+
+#define UV_LOCAL_MMR_BASE		(				\
+					is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
+					is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
+					is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
+					/*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+
+#define UV_GLOBAL_MMR32_BASE		(				\
+					is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE : \
+					is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
+					is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
+					/*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+
+#define UV_LOCAL_MMR_SIZE		(				\
+					is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
+					is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
+					is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
+					/*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+
+#define UV_GLOBAL_MMR32_SIZE		(				\
+					is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE : \
+					is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \
+					is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \
+					/*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE)
+
 #define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
 
 #define UV_GLOBAL_GRU_MMR_BASE		0x4000000
 
 #define UV_GLOBAL_MMR32_PNODE_SHIFT	15
-#define UV_GLOBAL_MMR64_PNODE_SHIFT	26
+#define _UV_GLOBAL_MMR64_PNODE_SHIFT	26
+#define UV_GLOBAL_MMR64_PNODE_SHIFT	(uv_hub_info->global_mmr_shift)
 
 #define UV_GLOBAL_MMR32_PNODE_BITS(p)	((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
 
@@ -307,18 +429,74 @@
  *	      between socket virtual and socket physical addresses.
  */
 
+/* global bits offset - number of local address bits in gpa for this UV arch */
+static inline unsigned int uv_gpa_shift(void)
+{
+	return uv_hub_info->gpa_shift;
+}
+#define	_uv_gpa_shift
+
+/* Find node that has the address range that contains global address  */
+static inline struct uv_gam_range_s *uv_gam_range(unsigned long pa)
+{
+	struct uv_gam_range_s *gr = uv_hub_info->gr_table;
+	unsigned long pal = (pa & uv_hub_info->gpa_mask) >> UV_GAM_RANGE_SHFT;
+	int i, num = uv_hub_info->gr_table_len;
+
+	if (gr) {
+		for (i = 0; i < num; i++, gr++) {
+			if (pal < gr->limit)
+				return gr;
+		}
+	}
+	pr_crit("UV: GAM Range for 0x%lx not found at %p!\n", pa, gr);
+	BUG();
+}
+
+/* Return base address of node that contains global address  */
+static inline unsigned long uv_gam_range_base(unsigned long pa)
+{
+	struct uv_gam_range_s *gr = uv_gam_range(pa);
+	int base = gr->base;
+
+	if (base < 0)
+		return 0UL;
+
+	return uv_hub_info->gr_table[base].limit;
+}
+
+/* socket phys RAM --> UV global NASID (UV4+) */
+static inline unsigned long uv_soc_phys_ram_to_nasid(unsigned long paddr)
+{
+	return uv_gam_range(paddr)->nasid;
+}
+#define	_uv_soc_phys_ram_to_nasid
+
+/* socket virtual --> UV global NASID (UV4+) */
+static inline unsigned long uv_gpa_nasid(void *v)
+{
+	return uv_soc_phys_ram_to_nasid(__pa(v));
+}
+
 /* socket phys RAM --> UV global physical address */
 static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
 {
+	unsigned int m_val = uv_hub_info->m_val;
+
 	if (paddr < uv_hub_info->lowmem_remap_top)
 		paddr |= uv_hub_info->lowmem_remap_base;
 	paddr |= uv_hub_info->gnode_upper;
-	paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
-		((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift);
+	if (m_val)
+		paddr = ((paddr << uv_hub_info->m_shift)
+						>> uv_hub_info->m_shift) |
+			((paddr >> uv_hub_info->m_val)
+						<< uv_hub_info->n_lshift);
+	else
+		paddr |= uv_soc_phys_ram_to_nasid(paddr)
+						<< uv_hub_info->gpa_shift;
 	return paddr;
 }
 
-
 /* socket virtual --> UV global physical address */
 static inline unsigned long uv_gpa(void *v)
 {
@@ -338,54 +516,89 @@
 	unsigned long paddr;
 	unsigned long remap_base = uv_hub_info->lowmem_remap_base;
 	unsigned long remap_top =  uv_hub_info->lowmem_remap_top;
+	unsigned int m_val = uv_hub_info->m_val;
 
-	gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
-		((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
+	if (m_val)
+		gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
+			((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
+
 	paddr = gpa & uv_hub_info->gpa_mask;
 	if (paddr >= remap_base && paddr < remap_base + remap_top)
 		paddr -= remap_base;
 	return paddr;
 }
 
-
-/* gpa -> pnode */
+/* gpa -> gnode */
 static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
 {
-	return gpa >> uv_hub_info->n_lshift;
+	unsigned int n_lshift = uv_hub_info->n_lshift;
+
+	if (n_lshift)
+		return gpa >> n_lshift;
+
+	return uv_gam_range(gpa)->nasid >> 1;
 }
 
 /* gpa -> pnode */
 static inline int uv_gpa_to_pnode(unsigned long gpa)
 {
-	unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1;
-
-	return uv_gpa_to_gnode(gpa) & n_mask;
+	return uv_gpa_to_gnode(gpa) & uv_hub_info->pnode_mask;
 }
 
-/* gpa -> node offset*/
+/* gpa -> node offset */
 static inline unsigned long uv_gpa_to_offset(unsigned long gpa)
 {
-	return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift;
+	unsigned int m_shift = uv_hub_info->m_shift;
+
+	if (m_shift)
+		return (gpa << m_shift) >> m_shift;
+
+	return (gpa & uv_hub_info->gpa_mask) - uv_gam_range_base(gpa);
+}
+
+/* Convert socket to node */
+static inline int _uv_socket_to_node(int socket, unsigned short *s2nid)
+{
+	return s2nid ? s2nid[socket - uv_hub_info->min_socket] : socket;
+}
+
+static inline int uv_socket_to_node(int socket)
+{
+	return _uv_socket_to_node(socket, uv_hub_info->socket_to_node);
 }
 
 /* pnode, offset --> socket virtual */
 static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
 {
-	return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset);
+	unsigned int m_val = uv_hub_info->m_val;
+	unsigned long base;
+	unsigned short sockid, node, *p2s;
+
+	if (m_val)
+		return __va(((unsigned long)pnode << m_val) | offset);
+
+	p2s = uv_hub_info->pnode_to_socket;
+	sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
+	node = uv_socket_to_node(sockid);
+
+	/* limit address of previous socket is our base, except node 0 is 0 */
+	if (!node)
+		return __va((unsigned long)offset);
+
+	base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
+	return __va(base << UV_GAM_RANGE_SHFT | offset);
 }
 
-
-/*
- * Extract a PNODE from an APICID (full apicid, not processor subset)
- */
+/* Extract/Convert a PNODE from an APICID (full apicid, not processor subset) */
 static inline int uv_apicid_to_pnode(int apicid)
 {
-	return (apicid >> uv_hub_info->apic_pnode_shift);
+	int pnode = apicid >> uv_hub_info->apic_pnode_shift;
+	unsigned short *s2pn = uv_hub_info->socket_to_pnode;
+
+	return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode;
 }
 
-/*
- * Convert an apicid to the socket number on the blade
- */
+/* Convert an apicid to the socket number on the blade */
 static inline int uv_apicid_to_socket(int apicid)
 {
 	if (is_uv1_hub())
@@ -434,16 +647,6 @@
 	return readq(uv_global_mmr64_address(pnode, offset));
 }
 
-/*
- * Global MMR space addresses when referenced by the GRU. (GRU does
- * NOT use socket addressing).
- */
-static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset)
-{
-	return UV_GLOBAL_GRU_MMR_BASE | offset |
-		((unsigned long)pnode << uv_hub_info->m_val);
-}
-
 static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val)
 {
 	writeb(val, uv_global_mmr64_address(pnode, offset));
@@ -483,27 +686,23 @@
 	writeb(val, uv_local_mmr_address(offset));
 }
 
-/*
- * Structures and definitions for converting between cpu, node, pnode, and blade
- * numbers.
- */
-struct uv_blade_info {
-	unsigned short	nr_possible_cpus;
-	unsigned short	nr_online_cpus;
-	unsigned short	pnode;
-	short		memory_nid;
-	spinlock_t	nmi_lock;	/* obsolete, see uv_hub_nmi */
-	unsigned long	nmi_count;	/* obsolete, see uv_hub_nmi */
-};
-extern struct uv_blade_info *uv_blade_info;
-extern short *uv_node_to_blade;
-extern short *uv_cpu_to_blade;
-extern short uv_possible_blades;
-
 /* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */
 static inline int uv_blade_processor_id(void)
 {
-	return uv_hub_info->blade_processor_id;
+	return uv_cpu_info->blade_cpu_id;
+}
+
+/* Blade-local cpu number of cpu N. Numbered 0 .. <# cpus on the blade> */
+static inline int uv_cpu_blade_processor_id(int cpu)
+{
+	return uv_cpu_info_per(cpu)->blade_cpu_id;
+}
+#define _uv_cpu_blade_processor_id 1	/* indicate function available */
+
+/* Blade number to Node number (UV1..UV4 is 1:1) */
+static inline int uv_blade_to_node(int blade)
+{
+	return blade;
 }
 
 /* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
@@ -512,55 +711,60 @@
 	return uv_hub_info->numa_blade_id;
 }
 
+/*
+ * Convert linux node number to the UV blade number.
+ * .. Currently for UV1 thru UV4 the node and the blade are identical.
+ * .. If this changes then you MUST check references to this function!
+ */
+static inline int uv_node_to_blade_id(int nid)
+{
+	return nid;
+}
+
 /* Convert a cpu number to the the UV blade number */
 static inline int uv_cpu_to_blade_id(int cpu)
 {
-	return uv_cpu_to_blade[cpu];
-}
-
-/* Convert linux node number to the UV blade number */
-static inline int uv_node_to_blade_id(int nid)
-{
-	return uv_node_to_blade[nid];
+	return uv_node_to_blade_id(cpu_to_node(cpu));
 }
 
 /* Convert a blade id to the PNODE of the blade */
 static inline int uv_blade_to_pnode(int bid)
 {
-	return uv_blade_info[bid].pnode;
+	return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
 }
 
 /* Nid of memory node on blade. -1 if no blade-local memory */
 static inline int uv_blade_to_memory_nid(int bid)
 {
-	return uv_blade_info[bid].memory_nid;
+	return uv_hub_info_list(uv_blade_to_node(bid))->memory_nid;
 }
 
 /* Determine the number of possible cpus on a blade */
 static inline int uv_blade_nr_possible_cpus(int bid)
 {
-	return uv_blade_info[bid].nr_possible_cpus;
+	return uv_hub_info_list(uv_blade_to_node(bid))->nr_possible_cpus;
 }
 
 /* Determine the number of online cpus on a blade */
 static inline int uv_blade_nr_online_cpus(int bid)
 {
-	return uv_blade_info[bid].nr_online_cpus;
+	return uv_hub_info_list(uv_blade_to_node(bid))->nr_online_cpus;
 }
 
 /* Convert a cpu id to the PNODE of the blade containing the cpu */
 static inline int uv_cpu_to_pnode(int cpu)
 {
-	return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode;
+	return uv_cpu_hub_info(cpu)->pnode;
 }
 
 /* Convert a linux node number to the PNODE of the blade */
 static inline int uv_node_to_pnode(int nid)
 {
-	return uv_blade_info[uv_node_to_blade_id(nid)].pnode;
+	return uv_hub_info_list(nid)->pnode;
 }
 
 /* Maximum possible number of blades */
+extern short uv_possible_blades;
 static inline int uv_num_possible_blades(void)
 {
 	return uv_possible_blades;
@@ -578,9 +782,7 @@
 /* Newer SMM NMI handler, not present in all systems */
 #define UVH_NMI_MMRX		UVH_EVENT_OCCURRED0
 #define UVH_NMI_MMRX_CLEAR	UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT	(is_uv1_hub() ? \
-					UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\
-					UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+#define UVH_NMI_MMRX_SHIFT	UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
 #define	UVH_NMI_MMRX_TYPE	"EXTIO_INT0"
 
 /* Non-zero indicates newer SMM NMI handler present */
@@ -622,9 +824,9 @@
 /* Update SCIR state */
 static inline void uv_set_scir_bits(unsigned char value)
 {
-	if (uv_hub_info->scir.state != value) {
-		uv_hub_info->scir.state = value;
-		uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+	if (uv_scir_info->state != value) {
+		uv_scir_info->state = value;
+		uv_write_local_mmr8(uv_scir_info->offset, value);
 	}
 }
 
@@ -635,10 +837,10 @@
 
 static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
 {
-	if (uv_cpu_hub_info(cpu)->scir.state != value) {
+	if (uv_cpu_scir_info(cpu)->state != value) {
 		uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
-				uv_cpu_hub_info(cpu)->scir.offset, value);
-		uv_cpu_hub_info(cpu)->scir.state = value;
+				uv_cpu_scir_info(cpu)->offset, value);
+		uv_cpu_scir_info(cpu)->state = value;
 	}
 }
 
@@ -666,10 +868,7 @@
 
 /*
  * Get the minimum revision number of the hub chips within the partition.
- *     1 - UV1 rev 1.0 initial silicon
- *     2 - UV1 rev 2.0 production silicon
- *     3 - UV2 rev 1.0 initial silicon
- *     5 - UV3 rev 1.0 initial silicon
+ * (See UVx_HUB_REVISION_BASE above for specific values.)
  */
 static inline int uv_get_min_hub_revision_id(void)
 {

diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index ddd8db6..548d684 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h

@@ -5,7 +5,7 @@
  *
  * SGI UV MMR definitions
  *
- * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
  */
 
 #ifndef _ASM_X86_UV_UV_MMRS_H
@@ -18,10 +18,11 @@
  * grouped by architecture types.
  *
  * UVH  - definitions common to all UV hub types.
- * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3).
+ * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4).
  * UV1H - definitions specific to UV type 1 hub.
  * UV2H - definitions specific to UV type 2 hub.
  * UV3H - definitions specific to UV type 3 hub.
+ * UV4H - definitions specific to UV type 4 hub.
  *
  * So in general, MMR addresses and structures are identical on all hubs types.
  * These MMRs are identified as:
@@ -32,19 +33,25 @@
  *		} s;
  *	};
  *
- * If the MMR exists on all hub types but have different addresses:
+ * If the MMR exists on all hub types but have different addresses,
+ * use a conditional operator to define the value at runtime.
  *	#define UV1Hxxx	a
  *	#define UV2Hxxx	b
  *	#define UV3Hxxx	c
+ *	#define UV4Hxxx	d
  *	#define UVHxxx	(is_uv1_hub() ? UV1Hxxx :
  *			(is_uv2_hub() ? UV2Hxxx :
- *					UV3Hxxx))
+ *			(is_uv3_hub() ? UV3Hxxx :
+ *					UV4Hxxx))
  *
- * If the MMR exists on all hub types > 1 but have different addresses:
+ * If the MMR exists on all hub types > 1 but have different addresses, the
+ * variation using "UVX" as the prefix exists.
  *	#define UV2Hxxx	b
  *	#define UV3Hxxx	c
- *	#define UVXHxxx (is_uv2_hub() ? UV2Hxxx :
- *					UV3Hxxx))
+ *	#define UV4Hxxx	d
+ *	#define UVHxxx	(is_uv2_hub() ? UV2Hxxx :
+ *			(is_uv3_hub() ? UV3Hxxx :
+ *					UV4Hxxx))
  *
  *	union uvh_xxx {
  *		unsigned long       v;
@@ -56,6 +63,8 @@
  *		} s2;
  *		struct uv3h_xxx_s {	 # Full UV3 definition (*)
  *		} s3;
+ *		struct uv4h_xxx_s {	 # Full UV4 definition (*)
+ *		} s4;
  *	};
  *		(* - if present and different than the common struct)
  *
@@ -73,7 +82,7 @@
  *		} sn;
  *	};
  *
- * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH)
+ * (GEN Flags: mflags_opt= undefs=function UV234=UVXH)
  */
 
 #define UV_MMR_ENABLE		(1UL << 63)
@@ -83,20 +92,36 @@
 #define UV2_HUB_PART_NUMBER_X	0x1111
 #define UV3_HUB_PART_NUMBER	0x9578
 #define UV3_HUB_PART_NUMBER_X	0x4321
+#define UV4_HUB_PART_NUMBER	0x99a1
 
 /* Compat: Indicate which UV Hubs are supported. */
+#define UV1_HUB_IS_SUPPORTED	1
 #define UV2_HUB_IS_SUPPORTED	1
 #define UV3_HUB_IS_SUPPORTED	1
+#define UV4_HUB_IS_SUPPORTED	1
+
+/* Error function to catch undefined references */
+extern unsigned long uv_undefined(char *str);
 
 /* ========================================================================= */
 /*                          UVH_BAU_DATA_BROADCAST                           */
 /* ========================================================================= */
 #define UVH_BAU_DATA_BROADCAST 0x61688UL
-#define UVH_BAU_DATA_BROADCAST_32 0x440
+
+#define UV1H_BAU_DATA_BROADCAST_32 0x440
+#define UV2H_BAU_DATA_BROADCAST_32 0x440
+#define UV3H_BAU_DATA_BROADCAST_32 0x440
+#define UV4H_BAU_DATA_BROADCAST_32 0x360
+#define UVH_BAU_DATA_BROADCAST_32 (					\
+	is_uv1_hub() ? UV1H_BAU_DATA_BROADCAST_32 :			\
+	is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 :			\
+	is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 :			\
+	/*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32)
 
 #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT		0
 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK		0x0000000000000001UL
 
+
 union uvh_bau_data_broadcast_u {
 	unsigned long	v;
 	struct uvh_bau_data_broadcast_s {
@@ -109,7 +134,16 @@
 /*                           UVH_BAU_DATA_CONFIG                             */
 /* ========================================================================= */
 #define UVH_BAU_DATA_CONFIG 0x61680UL
-#define UVH_BAU_DATA_CONFIG_32 0x438
+
+#define UV1H_BAU_DATA_CONFIG_32 0x438
+#define UV2H_BAU_DATA_CONFIG_32 0x438
+#define UV3H_BAU_DATA_CONFIG_32 0x438
+#define UV4H_BAU_DATA_CONFIG_32 0x358
+#define UVH_BAU_DATA_CONFIG_32 (					\
+	is_uv1_hub() ? UV1H_BAU_DATA_CONFIG_32 :			\
+	is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 :			\
+	is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 :			\
+	/*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32)
 
 #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT			0
 #define UVH_BAU_DATA_CONFIG_DM_SHFT			8
@@ -128,6 +162,7 @@
 #define UVH_BAU_DATA_CONFIG_M_MASK			0x0000000000010000UL
 #define UVH_BAU_DATA_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+
 union uvh_bau_data_config_u {
 	unsigned long	v;
 	struct uvh_bau_data_config_s {
@@ -266,7 +301,6 @@
 #define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK		0x0080000000000000UL
 #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK	0x0100000000000000UL
 
-#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT		1
 #define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT		2
 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT		3
 #define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT		4
@@ -275,55 +309,11 @@
 #define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT		7
 #define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT		8
 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT		9
-#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		12
 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		13
 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT		14
 #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT		15
 #define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT		16
-#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
-#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
-#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
-#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
-#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
-#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
-#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
-#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
-#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
-#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
-#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
-#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
-#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT		53
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
-#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
-#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
 #define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK		0x0000000000000004UL
 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK		0x0000000000000008UL
 #define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK		0x0000000000000010UL
@@ -332,54 +322,294 @@
 #define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK		0x0000000000000080UL
 #define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK		0x0000000000000100UL
 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK		0x0000000000000200UL
-#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK		0x0000000000001000UL
 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK		0x0000000000002000UL
 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK		0x0000000000004000UL
 #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK		0x0000000000008000UL
 #define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK		0x0000000000010000UL
-#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
-#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
-#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
-#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
-#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
-#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
-#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
-#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
-#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
-#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
-#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
-#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
-#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
-#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
+
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT		53
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
+
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
+#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT		53
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
+#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
+
+#define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT		1
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT		10
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT		17
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT		18
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT		19
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT		20
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		21
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		22
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT		23
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT		24
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT		25
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		26
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		27
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		28
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		29
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT		30
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT		31
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT		32
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT		33
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT		34
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		35
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		36
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	37
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		38
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		39
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		40
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		41
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		42
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		43
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		44
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		45
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		46
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		47
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		48
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		49
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		50
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		51
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		52
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		53
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		54
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		55
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		56
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		57
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	58
+#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT		59
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		60
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		61
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		62
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		63
+#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK		0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK		0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK		0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK		0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK		0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK		0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK		0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0004000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0008000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0010000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0020000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0040000000000000UL
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0080000000000000UL
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0100000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0200000000000000UL
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0400000000000000UL
+#define UV4H_EVENT_OCCURRED0_IPI_INT_MASK		0x0800000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x1000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x2000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x4000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x8000000000000000UL
+
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT (				\
+	is_uv1_hub() ? UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
+	is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
+	is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
+	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
 
 union uvh_event_occurred0_u {
 	unsigned long	v;
@@ -391,7 +621,7 @@
 	} s;
 	struct uvxh_event_occurred0_s {
 		unsigned long	lb_hcerr:1;			/* RW */
-		unsigned long	qp_hcerr:1;			/* RW */
+		unsigned long	rsvd_1:1;
 		unsigned long	rh_hcerr:1;			/* RW */
 		unsigned long	lh0_hcerr:1;			/* RW */
 		unsigned long	lh1_hcerr:1;			/* RW */
@@ -400,25 +630,51 @@
 		unsigned long	ni0_hcerr:1;			/* RW */
 		unsigned long	ni1_hcerr:1;			/* RW */
 		unsigned long	lb_aoerr0:1;			/* RW */
-		unsigned long	qp_aoerr0:1;			/* RW */
+		unsigned long	rsvd_10:1;
 		unsigned long	rh_aoerr0:1;			/* RW */
 		unsigned long	lh0_aoerr0:1;			/* RW */
 		unsigned long	lh1_aoerr0:1;			/* RW */
 		unsigned long	gr0_aoerr0:1;			/* RW */
 		unsigned long	gr1_aoerr0:1;			/* RW */
 		unsigned long	xb_aoerr0:1;			/* RW */
-		unsigned long	rt_aoerr0:1;			/* RW */
+		unsigned long	rsvd_17_63:47;
+	} sx;
+	struct uv4h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	kt_hcerr:1;			/* RW */
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	kt_aoerr0:1;			/* RW */
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rtq0_aoerr0:1;			/* RW */
+		unsigned long	rtq1_aoerr0:1;			/* RW */
+		unsigned long	rtq2_aoerr0:1;			/* RW */
+		unsigned long	rtq3_aoerr0:1;			/* RW */
 		unsigned long	ni0_aoerr0:1;			/* RW */
 		unsigned long	ni1_aoerr0:1;			/* RW */
 		unsigned long	lb_aoerr1:1;			/* RW */
-		unsigned long	qp_aoerr1:1;			/* RW */
+		unsigned long	kt_aoerr1:1;			/* RW */
 		unsigned long	rh_aoerr1:1;			/* RW */
 		unsigned long	lh0_aoerr1:1;			/* RW */
 		unsigned long	lh1_aoerr1:1;			/* RW */
 		unsigned long	gr0_aoerr1:1;			/* RW */
 		unsigned long	gr1_aoerr1:1;			/* RW */
 		unsigned long	xb_aoerr1:1;			/* RW */
-		unsigned long	rt_aoerr1:1;			/* RW */
+		unsigned long	rtq0_aoerr1:1;			/* RW */
+		unsigned long	rtq1_aoerr1:1;			/* RW */
+		unsigned long	rtq2_aoerr1:1;			/* RW */
+		unsigned long	rtq3_aoerr1:1;			/* RW */
 		unsigned long	ni0_aoerr1:1;			/* RW */
 		unsigned long	ni1_aoerr1:1;			/* RW */
 		unsigned long	system_shutdown_int:1;		/* RW */
@@ -448,9 +704,7 @@
 		unsigned long	extio_int1:1;			/* RW */
 		unsigned long	extio_int2:1;			/* RW */
 		unsigned long	extio_int3:1;			/* RW */
-		unsigned long	profile_int:1;			/* RW */
-		unsigned long	rsvd_59_63:5;
-	} sx;
+	} s4;
 };
 
 /* ========================================================================= */
@@ -464,11 +718,21 @@
 /*                         UVH_EXTIO_INT0_BROADCAST                          */
 /* ========================================================================= */
 #define UVH_EXTIO_INT0_BROADCAST 0x61448UL
-#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0
+
+#define UV1H_EXTIO_INT0_BROADCAST_32 0x3f0
+#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0
+#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0
+#define UV4H_EXTIO_INT0_BROADCAST_32 0x310
+#define UVH_EXTIO_INT0_BROADCAST_32 (					\
+	is_uv1_hub() ? UV1H_EXTIO_INT0_BROADCAST_32 :			\
+	is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 :			\
+	is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 :			\
+	/*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32)
 
 #define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT		0
 #define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK		0x0000000000000001UL
 
+
 union uvh_extio_int0_broadcast_u {
 	unsigned long	v;
 	struct uvh_extio_int0_broadcast_s {
@@ -499,6 +763,7 @@
 #define UVH_GR0_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
 #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+
 union uvh_gr0_tlb_int0_config_u {
 	unsigned long	v;
 	struct uvh_gr0_tlb_int0_config_s {
@@ -537,6 +802,7 @@
 #define UVH_GR0_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
 #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+
 union uvh_gr0_tlb_int1_config_u {
 	unsigned long	v;
 	struct uvh_gr0_tlb_int1_config_s {
@@ -559,19 +825,18 @@
 #define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
 #define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
 #define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UVH_GR0_TLB_MMR_CONTROL						\
-		(is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL :		\
-		(is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL :		\
-				UV3H_GR0_TLB_MMR_CONTROL))
+#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL
+#define UVH_GR0_TLB_MMR_CONTROL (					\
+	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL :			\
+	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL :			\
+	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL :			\
+	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL)
 
 #define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
 #define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
 #define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
 #define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
 #define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
 #define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
 #define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
 #define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
@@ -601,14 +866,11 @@
 #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK	0x1000000000000000UL
 
 #define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
 #define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
 #define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
 #define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
@@ -651,12 +913,45 @@
 #define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
 #define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
 
+#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
+#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		13
+#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
+#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
+#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT		59
+#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000001fffUL
+#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000006000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK		0xf800000000000000UL
+
+#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK (				\
+	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
+	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
+	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
+	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK)
+#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK (				\
+	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
+	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
+	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
+	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK)
+#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT (				\
+	is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
+	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
+	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
+	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT)
+
 union uvh_gr0_tlb_mmr_control_u {
 	unsigned long	v;
 	struct uvh_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
+		unsigned long	rsvd_0_15:16;
 		unsigned long	auto_valid_en:1;		/* RW */
 		unsigned long	rsvd_17_19:3;
 		unsigned long	mmr_hash_index_en:1;		/* RW */
@@ -690,9 +985,7 @@
 		unsigned long	rsvd_61_63:3;
 	} s1;
 	struct uvxh_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
+		unsigned long	rsvd_0_15:16;
 		unsigned long	auto_valid_en:1;		/* RW */
 		unsigned long	rsvd_17_19:3;
 		unsigned long	mmr_hash_index_en:1;		/* RW */
@@ -703,8 +996,7 @@
 		unsigned long	rsvd_33_47:15;
 		unsigned long	rsvd_48:1;
 		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52:1;
-		unsigned long	rsvd_53_63:11;
+		unsigned long	rsvd_52_63:12;
 	} sx;
 	struct uv2h_gr0_tlb_mmr_control_s {
 		unsigned long	index:12;			/* RW */
@@ -741,6 +1033,24 @@
 		unsigned long	undef_52:1;			/* Undefined */
 		unsigned long	rsvd_53_63:11;
 	} s3;
+	struct uv4h_gr0_tlb_mmr_control_s {
+		unsigned long	index:13;			/* RW */
+		unsigned long	mem_sel:2;			/* RW */
+		unsigned long	rsvd_15:1;
+		unsigned long	auto_valid_en:1;		/* RW */
+		unsigned long	rsvd_17_19:3;
+		unsigned long	mmr_hash_index_en:1;		/* RW */
+		unsigned long	ecc_sel:1;			/* RW */
+		unsigned long	rsvd_22_29:8;
+		unsigned long	mmr_write:1;			/* WP */
+		unsigned long	mmr_read:1;			/* WP */
+		unsigned long	mmr_op_done:1;			/* RW */
+		unsigned long	rsvd_33_47:15;
+		unsigned long	undef_48:1;			/* Undefined */
+		unsigned long	rsvd_49_51:3;
+		unsigned long	rsvd_52_58:7;
+		unsigned long	page_size:5;			/* RW */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -749,19 +1059,14 @@
 #define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
 #define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
 #define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI					\
-		(is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI :		\
-		(is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI :		\
-				UV3H_GR0_TLB_MMR_READ_DATA_HI))
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL
+#define UVH_GR0_TLB_MMR_READ_DATA_HI (					\
+	is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI :			\
+	is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI :			\
+	is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI :			\
+	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI)
 
 #define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT		43
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK		0x0000080000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
 
 #define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
 #define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
@@ -773,13 +1078,6 @@
 #define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
 
 #define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
 
 #define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
 #define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
@@ -803,15 +1101,24 @@
 #define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0000200000000000UL
 #define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
 
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT		34
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		49
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	51
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	52
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	53
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x00000003ffffffffUL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK		0x0001fffc00000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0006000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0008000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0010000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0020000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
+
+
 union uvh_gr0_tlb_mmr_read_data_hi_u {
 	unsigned long	v;
-	struct uvh_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s;
 	struct uv1h_gr0_tlb_mmr_read_data_hi_s {
 		unsigned long	pfn:41;				/* RO */
 		unsigned long	gaa:2;				/* RO */
@@ -819,13 +1126,6 @@
 		unsigned long	larger:1;			/* RO */
 		unsigned long	rsvd_45_63:19;
 	} s1;
-	struct uvxh_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} sx;
 	struct uv2h_gr0_tlb_mmr_read_data_hi_s {
 		unsigned long	pfn:41;				/* RO */
 		unsigned long	gaa:2;				/* RO */
@@ -842,6 +1142,16 @@
 		unsigned long	undef_46_54:9;			/* Undefined */
 		unsigned long	way_ecc:9;			/* RO */
 	} s3;
+	struct uv4h_gr0_tlb_mmr_read_data_hi_s {
+		unsigned long	pfn:34;				/* RO */
+		unsigned long	pnid:15;			/* RO */
+		unsigned long	gaa:2;				/* RO */
+		unsigned long	dirty:1;			/* RO */
+		unsigned long	larger:1;			/* RO */
+		unsigned long	aa_ext:1;			/* RO */
+		unsigned long	undef_54:1;			/* Undefined */
+		unsigned long	way_ecc:9;			/* RO */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -850,10 +1160,12 @@
 #define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
 #define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
 #define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO					\
-		(is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO :		\
-		(is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO :		\
-				UV3H_GR0_TLB_MMR_READ_DATA_LO))
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL
+#define UVH_GR0_TLB_MMR_READ_DATA_LO (					\
+	is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO :			\
+	is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO :			\
+	is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO :			\
+	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO)
 
 #define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
 #define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
@@ -890,6 +1202,14 @@
 #define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
 #define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
 
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
+
+
 union uvh_gr0_tlb_mmr_read_data_lo_u {
 	unsigned long	v;
 	struct uvh_gr0_tlb_mmr_read_data_lo_s {
@@ -917,12 +1237,25 @@
 		unsigned long	asid:24;			/* RO */
 		unsigned long	valid:1;			/* RO */
 	} s3;
+	struct uv4h_gr0_tlb_mmr_read_data_lo_s {
+		unsigned long	vpn:39;				/* RO */
+		unsigned long	asid:24;			/* RO */
+		unsigned long	valid:1;			/* RO */
+	} s4;
 };
 
 /* ========================================================================= */
 /*                         UVH_GR1_TLB_INT0_CONFIG                           */
 /* ========================================================================= */
-#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL
+#define UVH_GR1_TLB_INT0_CONFIG (					\
+	is_uv1_hub() ? UV1H_GR1_TLB_INT0_CONFIG :			\
+	is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG :			\
+	is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG :			\
+	/*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG)
 
 #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT		0
 #define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT			8
@@ -941,6 +1274,7 @@
 #define UVH_GR1_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
 #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+
 union uvh_gr1_tlb_int0_config_u {
 	unsigned long	v;
 	struct uvh_gr1_tlb_int0_config_s {
@@ -960,7 +1294,15 @@
 /* ========================================================================= */
 /*                         UVH_GR1_TLB_INT1_CONFIG                           */
 /* ========================================================================= */
-#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL
+#define UVH_GR1_TLB_INT1_CONFIG (					\
+	is_uv1_hub() ? UV1H_GR1_TLB_INT1_CONFIG :			\
+	is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG :			\
+	is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG :			\
+	/*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG)
 
 #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT		0
 #define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT			8
@@ -979,6 +1321,7 @@
 #define UVH_GR1_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
 #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+
 union uvh_gr1_tlb_int1_config_u {
 	unsigned long	v;
 	struct uvh_gr1_tlb_int1_config_s {
@@ -1001,19 +1344,18 @@
 #define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
 #define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
 #define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UVH_GR1_TLB_MMR_CONTROL						\
-		(is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL :		\
-		(is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL :		\
-				UV3H_GR1_TLB_MMR_CONTROL))
+#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL
+#define UVH_GR1_TLB_MMR_CONTROL (					\
+	is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL :			\
+	is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL :			\
+	is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL :			\
+	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL)
 
 #define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
 #define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
 #define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
 #define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
 #define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVH_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
 #define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
 #define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
 #define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
@@ -1043,14 +1385,11 @@
 #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK	0x1000000000000000UL
 
 #define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
 #define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
 #define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
 #define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
@@ -1093,12 +1432,30 @@
 #define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
 #define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
 
+#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
+#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		13
+#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
+#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
+#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT		59
+#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000001fffUL
+#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000006000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK		0xf800000000000000UL
+
+
 union uvh_gr1_tlb_mmr_control_u {
 	unsigned long	v;
 	struct uvh_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
+		unsigned long	rsvd_0_15:16;
 		unsigned long	auto_valid_en:1;		/* RW */
 		unsigned long	rsvd_17_19:3;
 		unsigned long	mmr_hash_index_en:1;		/* RW */
@@ -1132,9 +1489,7 @@
 		unsigned long	rsvd_61_63:3;
 	} s1;
 	struct uvxh_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
+		unsigned long	rsvd_0_15:16;
 		unsigned long	auto_valid_en:1;		/* RW */
 		unsigned long	rsvd_17_19:3;
 		unsigned long	mmr_hash_index_en:1;		/* RW */
@@ -1145,8 +1500,7 @@
 		unsigned long	rsvd_33_47:15;
 		unsigned long	rsvd_48:1;
 		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52:1;
-		unsigned long	rsvd_53_63:11;
+		unsigned long	rsvd_52_63:12;
 	} sx;
 	struct uv2h_gr1_tlb_mmr_control_s {
 		unsigned long	index:12;			/* RW */
@@ -1183,6 +1537,24 @@
 		unsigned long	undef_52:1;			/* Undefined */
 		unsigned long	rsvd_53_63:11;
 	} s3;
+	struct uv4h_gr1_tlb_mmr_control_s {
+		unsigned long	index:13;			/* RW */
+		unsigned long	mem_sel:2;			/* RW */
+		unsigned long	rsvd_15:1;
+		unsigned long	auto_valid_en:1;		/* RW */
+		unsigned long	rsvd_17_19:3;
+		unsigned long	mmr_hash_index_en:1;		/* RW */
+		unsigned long	ecc_sel:1;			/* RW */
+		unsigned long	rsvd_22_29:8;
+		unsigned long	mmr_write:1;			/* WP */
+		unsigned long	mmr_read:1;			/* WP */
+		unsigned long	mmr_op_done:1;			/* RW */
+		unsigned long	rsvd_33_47:15;
+		unsigned long	undef_48:1;			/* Undefined */
+		unsigned long	rsvd_49_51:3;
+		unsigned long	rsvd_52_58:7;
+		unsigned long	page_size:5;			/* RW */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -1191,19 +1563,14 @@
 #define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
 #define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
 #define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI					\
-		(is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI :		\
-		(is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI :		\
-				UV3H_GR1_TLB_MMR_READ_DATA_HI))
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL
+#define UVH_GR1_TLB_MMR_READ_DATA_HI (					\
+	is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI :			\
+	is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI :			\
+	is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI :			\
+	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI)
 
 #define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT		43
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK		0x0000080000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
 
 #define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
 #define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
@@ -1215,13 +1582,6 @@
 #define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
 
 #define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
 
 #define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
 #define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
@@ -1245,15 +1605,24 @@
 #define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0000200000000000UL
 #define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
 
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT		34
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		49
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	51
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	52
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	53
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x00000003ffffffffUL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK		0x0001fffc00000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0006000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0008000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0010000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0020000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
+
+
 union uvh_gr1_tlb_mmr_read_data_hi_u {
 	unsigned long	v;
-	struct uvh_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s;
 	struct uv1h_gr1_tlb_mmr_read_data_hi_s {
 		unsigned long	pfn:41;				/* RO */
 		unsigned long	gaa:2;				/* RO */
@@ -1261,13 +1630,6 @@
 		unsigned long	larger:1;			/* RO */
 		unsigned long	rsvd_45_63:19;
 	} s1;
-	struct uvxh_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} sx;
 	struct uv2h_gr1_tlb_mmr_read_data_hi_s {
 		unsigned long	pfn:41;				/* RO */
 		unsigned long	gaa:2;				/* RO */
@@ -1284,6 +1646,16 @@
 		unsigned long	undef_46_54:9;			/* Undefined */
 		unsigned long	way_ecc:9;			/* RO */
 	} s3;
+	struct uv4h_gr1_tlb_mmr_read_data_hi_s {
+		unsigned long	pfn:34;				/* RO */
+		unsigned long	pnid:15;			/* RO */
+		unsigned long	gaa:2;				/* RO */
+		unsigned long	dirty:1;			/* RO */
+		unsigned long	larger:1;			/* RO */
+		unsigned long	aa_ext:1;			/* RO */
+		unsigned long	undef_54:1;			/* Undefined */
+		unsigned long	way_ecc:9;			/* RO */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -1292,10 +1664,12 @@
 #define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
 #define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
 #define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO					\
-		(is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO :		\
-		(is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO :		\
-				UV3H_GR1_TLB_MMR_READ_DATA_LO))
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL
+#define UVH_GR1_TLB_MMR_READ_DATA_LO (					\
+	is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO :			\
+	is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO :			\
+	is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO :			\
+	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO)
 
 #define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
 #define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
@@ -1332,6 +1706,14 @@
 #define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
 #define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
 
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
+
+
 union uvh_gr1_tlb_mmr_read_data_lo_u {
 	unsigned long	v;
 	struct uvh_gr1_tlb_mmr_read_data_lo_s {
@@ -1359,6 +1741,11 @@
 		unsigned long	asid:24;			/* RO */
 		unsigned long	valid:1;			/* RO */
 	} s3;
+	struct uv4h_gr1_tlb_mmr_read_data_lo_s {
+		unsigned long	vpn:39;				/* RO */
+		unsigned long	asid:24;			/* RO */
+		unsigned long	valid:1;			/* RO */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -1369,6 +1756,7 @@
 #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT		0
 #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK		0x00ffffffffffffffUL
 
+
 union uvh_int_cmpb_u {
 	unsigned long	v;
 	struct uvh_int_cmpb_s {
@@ -1382,12 +1770,14 @@
 /* ========================================================================= */
 #define UVH_INT_CMPC 0x22100UL
 
+
 #define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT		0
 #define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK		0x00ffffffffffffffUL
 
 #define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT		0
 #define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK		0x00ffffffffffffffUL
 
+
 union uvh_int_cmpc_u {
 	unsigned long	v;
 	struct uvh_int_cmpc_s {
@@ -1401,12 +1791,14 @@
 /* ========================================================================= */
 #define UVH_INT_CMPD 0x22180UL
 
+
 #define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT		0
 #define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK		0x00ffffffffffffffUL
 
 #define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT		0
 #define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK		0x00ffffffffffffffUL
 
+
 union uvh_int_cmpd_u {
 	unsigned long	v;
 	struct uvh_int_cmpd_s {
@@ -1419,7 +1811,16 @@
 /*                               UVH_IPI_INT                                 */
 /* ========================================================================= */
 #define UVH_IPI_INT 0x60500UL
-#define UVH_IPI_INT_32 0x348
+
+#define UV1H_IPI_INT_32 0x348
+#define UV2H_IPI_INT_32 0x348
+#define UV3H_IPI_INT_32 0x348
+#define UV4H_IPI_INT_32 0x268
+#define UVH_IPI_INT_32 (						\
+	is_uv1_hub() ? UV1H_IPI_INT_32 :				\
+	is_uv2_hub() ? UV2H_IPI_INT_32 :				\
+	is_uv3_hub() ? UV3H_IPI_INT_32 :				\
+	/*is_uv4_hub*/ UV4H_IPI_INT_32)
 
 #define UVH_IPI_INT_VECTOR_SHFT				0
 #define UVH_IPI_INT_DELIVERY_MODE_SHFT			8
@@ -1432,6 +1833,7 @@
 #define UVH_IPI_INT_APIC_ID_MASK			0x0000ffffffff0000UL
 #define UVH_IPI_INT_SEND_MASK				0x8000000000000000UL
 
+
 union uvh_ipi_int_u {
 	unsigned long	v;
 	struct uvh_ipi_int_s {
@@ -1448,103 +1850,269 @@
 /* ========================================================================= */
 /*                   UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST                     */
 /* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST")
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST (				\
+	is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
+	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
+	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST)
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
 
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
+
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
 
 union uvh_lb_bau_intd_payload_queue_first_u {
 	unsigned long	v;
-	struct uvh_lb_bau_intd_payload_queue_first_s {
+	struct uv1h_lb_bau_intd_payload_queue_first_s {
 		unsigned long	rsvd_0_3:4;
 		unsigned long	address:39;			/* RW */
 		unsigned long	rsvd_43_48:6;
 		unsigned long	node_id:14;			/* RW */
 		unsigned long	rsvd_63:1;
-	} s;
+	} s1;
+	struct uv2h_lb_bau_intd_payload_queue_first_s {
+		unsigned long	rsvd_0_3:4;
+		unsigned long	address:39;			/* RW */
+		unsigned long	rsvd_43_48:6;
+		unsigned long	node_id:14;			/* RW */
+		unsigned long	rsvd_63:1;
+	} s2;
+	struct uv3h_lb_bau_intd_payload_queue_first_s {
+		unsigned long	rsvd_0_3:4;
+		unsigned long	address:39;			/* RW */
+		unsigned long	rsvd_43_48:6;
+		unsigned long	node_id:14;			/* RW */
+		unsigned long	rsvd_63:1;
+	} s3;
 };
 
 /* ========================================================================= */
 /*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST                     */
 /* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST")
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST (				\
+	is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
+	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
+	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST)
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
 
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT	4
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK	0x000007fffffffff0UL
+
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
+
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
 
 union uvh_lb_bau_intd_payload_queue_last_u {
 	unsigned long	v;
-	struct uvh_lb_bau_intd_payload_queue_last_s {
+	struct uv1h_lb_bau_intd_payload_queue_last_s {
 		unsigned long	rsvd_0_3:4;
 		unsigned long	address:39;			/* RW */
 		unsigned long	rsvd_43_63:21;
-	} s;
+	} s1;
+	struct uv2h_lb_bau_intd_payload_queue_last_s {
+		unsigned long	rsvd_0_3:4;
+		unsigned long	address:39;			/* RW */
+		unsigned long	rsvd_43_63:21;
+	} s2;
+	struct uv3h_lb_bau_intd_payload_queue_last_s {
+		unsigned long	rsvd_0_3:4;
+		unsigned long	address:39;			/* RW */
+		unsigned long	rsvd_43_63:21;
+	} s3;
 };
 
 /* ========================================================================= */
 /*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL                     */
 /* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL")
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL (				\
+	is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
+	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
+	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL)
 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
 
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT	4
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK	0x000007fffffffff0UL
+
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
+
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
 
 union uvh_lb_bau_intd_payload_queue_tail_u {
 	unsigned long	v;
-	struct uvh_lb_bau_intd_payload_queue_tail_s {
+	struct uv1h_lb_bau_intd_payload_queue_tail_s {
 		unsigned long	rsvd_0_3:4;
 		unsigned long	address:39;			/* RW */
 		unsigned long	rsvd_43_63:21;
-	} s;
+	} s1;
+	struct uv2h_lb_bau_intd_payload_queue_tail_s {
+		unsigned long	rsvd_0_3:4;
+		unsigned long	address:39;			/* RW */
+		unsigned long	rsvd_43_63:21;
+	} s2;
+	struct uv3h_lb_bau_intd_payload_queue_tail_s {
+		unsigned long	rsvd_0_3:4;
+		unsigned long	address:39;			/* RW */
+		unsigned long	rsvd_43_63:21;
+	} s3;
 };
 
 /* ========================================================================= */
 /*                   UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE                    */
 /* ========================================================================= */
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE")
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE (				\
+	is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
+	is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
+	is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE)
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
 
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
+
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
 
 union uvh_lb_bau_intd_software_acknowledge_u {
 	unsigned long	v;
-	struct uvh_lb_bau_intd_software_acknowledge_s {
+	struct uv1h_lb_bau_intd_software_acknowledge_s {
 		unsigned long	pending_0:1;			/* RW, W1C */
 		unsigned long	pending_1:1;			/* RW, W1C */
 		unsigned long	pending_2:1;			/* RW, W1C */
@@ -1562,27 +2130,84 @@
 		unsigned long	timeout_6:1;			/* RW, W1C */
 		unsigned long	timeout_7:1;			/* RW, W1C */
 		unsigned long	rsvd_16_63:48;
-	} s;
+	} s1;
+	struct uv2h_lb_bau_intd_software_acknowledge_s {
+		unsigned long	pending_0:1;			/* RW */
+		unsigned long	pending_1:1;			/* RW */
+		unsigned long	pending_2:1;			/* RW */
+		unsigned long	pending_3:1;			/* RW */
+		unsigned long	pending_4:1;			/* RW */
+		unsigned long	pending_5:1;			/* RW */
+		unsigned long	pending_6:1;			/* RW */
+		unsigned long	pending_7:1;			/* RW */
+		unsigned long	timeout_0:1;			/* RW */
+		unsigned long	timeout_1:1;			/* RW */
+		unsigned long	timeout_2:1;			/* RW */
+		unsigned long	timeout_3:1;			/* RW */
+		unsigned long	timeout_4:1;			/* RW */
+		unsigned long	timeout_5:1;			/* RW */
+		unsigned long	timeout_6:1;			/* RW */
+		unsigned long	timeout_7:1;			/* RW */
+		unsigned long	rsvd_16_63:48;
+	} s2;
+	struct uv3h_lb_bau_intd_software_acknowledge_s {
+		unsigned long	pending_0:1;			/* RW */
+		unsigned long	pending_1:1;			/* RW */
+		unsigned long	pending_2:1;			/* RW */
+		unsigned long	pending_3:1;			/* RW */
+		unsigned long	pending_4:1;			/* RW */
+		unsigned long	pending_5:1;			/* RW */
+		unsigned long	pending_6:1;			/* RW */
+		unsigned long	pending_7:1;			/* RW */
+		unsigned long	timeout_0:1;			/* RW */
+		unsigned long	timeout_1:1;			/* RW */
+		unsigned long	timeout_2:1;			/* RW */
+		unsigned long	timeout_3:1;			/* RW */
+		unsigned long	timeout_4:1;			/* RW */
+		unsigned long	timeout_5:1;			/* RW */
+		unsigned long	timeout_6:1;			/* RW */
+		unsigned long	timeout_7:1;			/* RW */
+		unsigned long	rsvd_16_63:48;
+	} s3;
 };
 
 /* ========================================================================= */
 /*                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS                 */
 /* ========================================================================= */
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS")
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS (			\
+	is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
+	is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
+	is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
+	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS)
 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
 
 
 /* ========================================================================= */
 /*                         UVH_LB_BAU_MISC_CONTROL                           */
 /* ========================================================================= */
-#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
 #define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
 #define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
 #define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL
-#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL
-#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL
+#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL
+#define UVH_LB_BAU_MISC_CONTROL (					\
+	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL :			\
+	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL :			\
+	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL :			\
+	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL)
+
+#define UV1H_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18
+#define UVH_LB_BAU_MISC_CONTROL_32 (					\
+	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_32 :			\
+	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 :			\
+	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 :			\
+	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32)
 
 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
 #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
@@ -1590,8 +2215,6 @@
 #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
 #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
 #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
 #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
@@ -1606,8 +2229,6 @@
 #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
 #define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
 #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
 #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
@@ -1656,8 +2277,6 @@
 #define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
 #define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
 #define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
 #define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
 #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
 #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
@@ -1679,8 +2298,6 @@
 #define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
 #define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
 #define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
 #define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
 #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
 #define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
@@ -1797,6 +2414,88 @@
 #define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
 #define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
 
+#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
+#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT	15
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
+#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
+#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
+#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT	37
+#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
+#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46
+#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
+#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
+#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK	0x00000000000f8000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
+#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK	0x0000002000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
+
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK	\
+	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK")
+#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK (	\
+	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
+	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
+	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
+	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK)
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT	\
+	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT")
+#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT (	\
+	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
+	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
+	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
+	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT)
+#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK	\
+	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK")
+#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK (	\
+	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
+	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
+	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
+	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK)
+#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT	\
+	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT")
+#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT (	\
+	is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
+	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
+	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
+	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT)
+
 union uvh_lb_bau_misc_control_u {
 	unsigned long	v;
 	struct uvh_lb_bau_misc_control_s {
@@ -1806,8 +2505,7 @@
 		unsigned long	force_lock_nop:1;		/* RW */
 		unsigned long	qpi_agent_presence_vector:3;	/* RW */
 		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
+		unsigned long	rsvd_15_19:5;
 		unsigned long	enable_dual_mapping_mode:1;	/* RW */
 		unsigned long	vga_io_port_decode_enable:1;	/* RW */
 		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
@@ -1844,8 +2542,7 @@
 		unsigned long	force_lock_nop:1;		/* RW */
 		unsigned long	qpi_agent_presence_vector:3;	/* RW */
 		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
+		unsigned long	rsvd_15_19:5;
 		unsigned long	enable_dual_mapping_mode:1;	/* RW */
 		unsigned long	vga_io_port_decode_enable:1;	/* RW */
 		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
@@ -1918,13 +2615,59 @@
 		unsigned long	rsvd_46_47:2;
 		unsigned long	fun:16;				/* RW */
 	} s3;
+	struct uv4h_lb_bau_misc_control_s {
+		unsigned long	rejection_delay:8;		/* RW */
+		unsigned long	apic_mode:1;			/* RW */
+		unsigned long	force_broadcast:1;		/* RW */
+		unsigned long	force_lock_nop:1;		/* RW */
+		unsigned long	qpi_agent_presence_vector:3;	/* RW */
+		unsigned long	descriptor_fetch_mode:1;	/* RW */
+		unsigned long	rsvd_15_19:5;
+		unsigned long	enable_dual_mapping_mode:1;	/* RW */
+		unsigned long	vga_io_port_decode_enable:1;	/* RW */
+		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
+		unsigned long	suppress_dest_registration:1;	/* RW */
+		unsigned long	programmed_initial_priority:3;	/* RW */
+		unsigned long	use_incoming_priority:1;	/* RW */
+		unsigned long	enable_programmed_initial_priority:1;/* RW */
+		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
+		unsigned long	apic_mode_status:1;		/* RO */
+		unsigned long	suppress_interrupts_to_self:1;	/* RW */
+		unsigned long	enable_lock_based_system_flush:1;/* RW */
+		unsigned long	enable_extended_sb_status:1;	/* RW */
+		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
+		unsigned long	use_legacy_descriptor_formats:1;/* RW */
+		unsigned long	suppress_quiesce_msgs_to_qpi:1;	/* RW */
+		unsigned long	rsvd_37:1;
+		unsigned long	thread_kill_timebase:8;		/* RW */
+		unsigned long	address_interleave_select:1;	/* RW */
+		unsigned long	rsvd_47:1;
+		unsigned long	fun:16;				/* RW */
+	} s4;
 };
 
 /* ========================================================================= */
 /*                     UVH_LB_BAU_SB_ACTIVATION_CONTROL                      */
 /* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL)
+
+#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32)
 
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT	0
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT	62
@@ -1933,6 +2676,7 @@
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK	0x4000000000000000UL
 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK	0x8000000000000000UL
 
+
 union uvh_lb_bau_sb_activation_control_u {
 	unsigned long	v;
 	struct uvh_lb_bau_sb_activation_control_s {
@@ -1946,12 +2690,30 @@
 /* ========================================================================= */
 /*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_0                      */
 /* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0)
+
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32)
 
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT	0
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK	0xffffffffffffffffUL
 
+
 union uvh_lb_bau_sb_activation_status_0_u {
 	unsigned long	v;
 	struct uvh_lb_bau_sb_activation_status_0_s {
@@ -1962,12 +2724,30 @@
 /* ========================================================================= */
 /*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_1                      */
 /* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1)
+
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32)
 
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT	0
 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK	0xffffffffffffffffUL
 
+
 union uvh_lb_bau_sb_activation_status_1_u {
 	unsigned long	v;
 	struct uvh_lb_bau_sb_activation_status_1_s {
@@ -1978,23 +2758,55 @@
 /* ========================================================================= */
 /*                      UVH_LB_BAU_SB_DESCRIPTOR_BASE                        */
 /* ========================================================================= */
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE (					\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE :			\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE :			\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE :			\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE)
+
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 (				\
+	is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
 
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT	12
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK	0x000007fffffff000UL
 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
 
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+
+
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
+
+
 union uvh_lb_bau_sb_descriptor_base_u {
 	unsigned long	v;
 	struct uvh_lb_bau_sb_descriptor_base_s {
 		unsigned long	rsvd_0_11:12;
-		unsigned long	page_address:31;		/* RW */
-		unsigned long	rsvd_43_48:6;
+		unsigned long	rsvd_12_48:37;
 		unsigned long	node_id:14;			/* RW */
 		unsigned long	rsvd_63:1;
 	} s;
+	struct uv4h_lb_bau_sb_descriptor_base_s {
+		unsigned long	rsvd_0_11:12;
+		unsigned long	page_address:34;		/* RW */
+		unsigned long	rsvd_46_48:3;
+		unsigned long	node_id:14;			/* RW */
+		unsigned long	rsvd_63:1;
+	} s4;
 };
 
 /* ========================================================================= */
@@ -2004,6 +2816,7 @@
 #define UV1H_NODE_ID 0x0UL
 #define UV2H_NODE_ID 0x0UL
 #define UV3H_NODE_ID 0x0UL
+#define UV4H_NODE_ID 0x0UL
 
 #define UVH_NODE_ID_FORCE1_SHFT				0
 #define UVH_NODE_ID_MANUFACTURER_SHFT			1
@@ -2080,6 +2893,26 @@
 #define UV3H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
 #define UV3H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
 
+#define UV4H_NODE_ID_FORCE1_SHFT			0
+#define UV4H_NODE_ID_MANUFACTURER_SHFT			1
+#define UV4H_NODE_ID_PART_NUMBER_SHFT			12
+#define UV4H_NODE_ID_REVISION_SHFT			28
+#define UV4H_NODE_ID_NODE_ID_SHFT			32
+#define UV4H_NODE_ID_ROUTER_SELECT_SHFT			48
+#define UV4H_NODE_ID_RESERVED_2_SHFT			49
+#define UV4H_NODE_ID_NODES_PER_BIT_SHFT			50
+#define UV4H_NODE_ID_NI_PORT_SHFT			57
+#define UV4H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
+#define UV4H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
+#define UV4H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
+#define UV4H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
+#define UV4H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
+#define UV4H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
+#define UV4H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
+#define UV4H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
+#define UV4H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
+
+
 union uvh_node_id_u {
 	unsigned long	v;
 	struct uvh_node_id_s {
@@ -2137,17 +2970,40 @@
 		unsigned long	ni_port:5;			/* RO */
 		unsigned long	rsvd_62_63:2;
 	} s3;
+	struct uv4h_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:15;			/* RW */
+		unsigned long	rsvd_47:1;
+		unsigned long	router_select:1;		/* RO */
+		unsigned long	rsvd_49:1;
+		unsigned long	nodes_per_bit:7;		/* RO */
+		unsigned long	ni_port:5;			/* RO */
+		unsigned long	rsvd_62_63:2;
+	} s4;
 };
 
 /* ========================================================================= */
 /*                          UVH_NODE_PRESENT_TABLE                           */
 /* ========================================================================= */
 #define UVH_NODE_PRESENT_TABLE 0x1400UL
-#define UVH_NODE_PRESENT_TABLE_DEPTH 16
+
+#define UV1H_NODE_PRESENT_TABLE_DEPTH 16
+#define UV2H_NODE_PRESENT_TABLE_DEPTH 16
+#define UV3H_NODE_PRESENT_TABLE_DEPTH 16
+#define UV4H_NODE_PRESENT_TABLE_DEPTH 4
+#define UVH_NODE_PRESENT_TABLE_DEPTH (					\
+	is_uv1_hub() ? UV1H_NODE_PRESENT_TABLE_DEPTH :			\
+	is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH :			\
+	is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH :			\
+	/*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH)
 
 #define UVH_NODE_PRESENT_TABLE_NODES_SHFT		0
 #define UVH_NODE_PRESENT_TABLE_NODES_MASK		0xffffffffffffffffUL
 
+
 union uvh_node_present_table_u {
 	unsigned long	v;
 	struct uvh_node_present_table_s {
@@ -2158,7 +3014,15 @@
 /* ========================================================================= */
 /*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR                  */
 /* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR (			\
+	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
+	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
+	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR)
 
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
@@ -2167,6 +3031,7 @@
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
 
+
 union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
@@ -2182,7 +3047,15 @@
 /* ========================================================================= */
 /*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR                  */
 /* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR (			\
+	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
+	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
+	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR)
 
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
@@ -2191,6 +3064,7 @@
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
 
+
 union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
@@ -2206,7 +3080,15 @@
 /* ========================================================================= */
 /*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR                  */
 /* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR (			\
+	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
+	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
+	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR)
 
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
@@ -2215,6 +3097,7 @@
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
 
+
 union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
@@ -2230,11 +3113,20 @@
 /* ========================================================================= */
 /*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR                  */
 /* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR (			\
+	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
+	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
+	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR)
 
 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
+
 union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
@@ -2247,11 +3139,20 @@
 /* ========================================================================= */
 /*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR                  */
 /* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR (			\
+	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
+	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
+	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR)
 
 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
+
 union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
@@ -2264,11 +3165,20 @@
 /* ========================================================================= */
 /*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR                  */
 /* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR (			\
+	is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
+	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
+	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR)
 
 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
+
 union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
@@ -2281,14 +3191,17 @@
 /* ========================================================================= */
 /*                          UVH_RH_GAM_CONFIG_MMR                            */
 /* ========================================================================= */
-#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
 #define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
 #define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
 #define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
+#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL
+#define UVH_RH_GAM_CONFIG_MMR (						\
+	is_uv1_hub() ? UV1H_RH_GAM_CONFIG_MMR :				\
+	is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR :				\
+	is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR :				\
+	/*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR)
 
-#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
 
 #define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
@@ -2298,9 +3211,7 @@
 #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
 #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK		0x0000000000001000UL
 
-#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
 #define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
 #define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
 
 #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
@@ -2313,10 +3224,14 @@
 #define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
 #define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
 
+#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
+#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
+
+
 union uvh_rh_gam_config_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_config_mmr_s {
-		unsigned long	m_skt:6;			/* RW */
+		unsigned long	rsvd_0_5:6;
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
 	} s;
@@ -2328,7 +3243,7 @@
 		unsigned long	rsvd_13_63:51;
 	} s1;
 	struct uvxh_rh_gam_config_mmr_s {
-		unsigned long	m_skt:6;			/* RW */
+		unsigned long	rsvd_0_5:6;
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
 	} sx;
@@ -2342,20 +3257,28 @@
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
 	} s3;
+	struct uv4h_rh_gam_config_mmr_s {
+		unsigned long	rsvd_0_5:6;
+		unsigned long	n_skt:4;			/* RW */
+		unsigned long	rsvd_10_63:54;
+	} s4;
 };
 
 /* ========================================================================= */
 /*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
 #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
 #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
 #define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR (				\
+	is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
+	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
+	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
+	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR)
 
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
 
@@ -2368,10 +3291,8 @@
 #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
 #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
 
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
 #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
 #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
 #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
 #define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
 
@@ -2391,12 +3312,28 @@
 #define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK	0x4000000000000000UL
 #define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
 
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	26
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK (			\
+	is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
+	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
+	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK)
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT (			\
+	is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
+	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
+	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
+	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT)
+
 union uvh_rh_gam_gru_overlay_config_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_27:28;
-		unsigned long	base:18;			/* RW */
-		unsigned long	rsvd_46_51:6;
+		unsigned long	rsvd_0_51:52;
 		unsigned long	n_gru:4;			/* RW */
 		unsigned long	rsvd_56_62:7;
 		unsigned long	enable:1;			/* RW */
@@ -2412,8 +3349,7 @@
 		unsigned long	enable:1;			/* RW */
 	} s1;
 	struct uvxh_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_27:28;
-		unsigned long	base:18;			/* RW */
+		unsigned long	rsvd_0_45:46;
 		unsigned long	rsvd_46_51:6;
 		unsigned long	n_gru:4;			/* RW */
 		unsigned long	rsvd_56_62:7;
@@ -2436,6 +3372,15 @@
 		unsigned long	mode:1;				/* RW */
 		unsigned long	enable:1;			/* RW */
 	} s3;
+	struct uv4h_rh_gam_gru_overlay_config_mmr_s {
+		unsigned long	rsvd_0_24:25;
+		unsigned long	undef_25:1;			/* Undefined */
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_51:6;
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -2443,6 +3388,14 @@
 /* ========================================================================= */
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR (				\
+	is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
+	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
+	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
+	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR)
+
 
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT	30
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT	46
@@ -2453,6 +3406,7 @@
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK	0x00f0000000000000UL
 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
 
+
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT	27
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT	46
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT	52
@@ -2462,6 +3416,7 @@
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK	0x00f0000000000000UL
 #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
 
+
 union uvh_rh_gam_mmioh_overlay_config_mmr_u {
 	unsigned long	v;
 	struct uv1h_rh_gam_mmioh_overlay_config_mmr_s {
@@ -2485,10 +3440,15 @@
 /* ========================================================================= */
 /*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
 /* ========================================================================= */
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
 #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
 #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
 #define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR (				\
+	is_uv1_hub() ? UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
+	is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
+	is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
+	/*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR)
 
 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
@@ -2517,6 +3477,12 @@
 #define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
 #define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
 
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
+
+
 union uvh_rh_gam_mmr_overlay_config_mmr_u {
 	unsigned long	v;
 	struct uvh_rh_gam_mmr_overlay_config_mmr_s {
@@ -2550,16 +3516,31 @@
 		unsigned long	rsvd_46_62:17;
 		unsigned long	enable:1;			/* RW */
 	} s3;
+	struct uv4h_rh_gam_mmr_overlay_config_mmr_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_62:17;
+		unsigned long	enable:1;			/* RW */
+	} s4;
 };
 
 /* ========================================================================= */
 /*                                 UVH_RTC                                   */
 /* ========================================================================= */
-#define UVH_RTC 0x340000UL
+#define UV1H_RTC 0x340000UL
+#define UV2H_RTC 0x340000UL
+#define UV3H_RTC 0x340000UL
+#define UV4H_RTC 0xe0000UL
+#define UVH_RTC (							\
+	is_uv1_hub() ? UV1H_RTC :					\
+	is_uv2_hub() ? UV2H_RTC :					\
+	is_uv3_hub() ? UV3H_RTC :					\
+	/*is_uv4_hub*/ UV4H_RTC)
 
 #define UVH_RTC_REAL_TIME_CLOCK_SHFT			0
 #define UVH_RTC_REAL_TIME_CLOCK_MASK			0x00ffffffffffffffUL
 
+
 union uvh_rtc_u {
 	unsigned long	v;
 	struct uvh_rtc_s {
@@ -2590,6 +3571,7 @@
 #define UVH_RTC1_INT_CONFIG_M_MASK			0x0000000000010000UL
 #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+
 union uvh_rtc1_int_config_u {
 	unsigned long	v;
 	struct uvh_rtc1_int_config_s {
@@ -2609,12 +3591,30 @@
 /* ========================================================================= */
 /*                               UVH_SCRATCH5                                */
 /* ========================================================================= */
-#define UVH_SCRATCH5 0x2d0200UL
-#define UVH_SCRATCH5_32 0x778
+#define UV1H_SCRATCH5 0x2d0200UL
+#define UV2H_SCRATCH5 0x2d0200UL
+#define UV3H_SCRATCH5 0x2d0200UL
+#define UV4H_SCRATCH5 0xb0200UL
+#define UVH_SCRATCH5 (							\
+	is_uv1_hub() ? UV1H_SCRATCH5 :					\
+	is_uv2_hub() ? UV2H_SCRATCH5 :					\
+	is_uv3_hub() ? UV3H_SCRATCH5 :					\
+	/*is_uv4_hub*/ UV4H_SCRATCH5)
+
+#define UV1H_SCRATCH5_32 0x778
+#define UV2H_SCRATCH5_32 0x778
+#define UV3H_SCRATCH5_32 0x778
+#define UV4H_SCRATCH5_32 0x798
+#define UVH_SCRATCH5_32 (						\
+	is_uv1_hub() ? UV1H_SCRATCH5_32 :				\
+	is_uv2_hub() ? UV2H_SCRATCH5_32 :				\
+	is_uv3_hub() ? UV3H_SCRATCH5_32 :				\
+	/*is_uv4_hub*/ UV4H_SCRATCH5_32)
 
 #define UVH_SCRATCH5_SCRATCH5_SHFT			0
 #define UVH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
+
 union uvh_scratch5_u {
 	unsigned long	v;
 	struct uvh_scratch5_s {
@@ -2625,14 +3625,39 @@
 /* ========================================================================= */
 /*                            UVH_SCRATCH5_ALIAS                             */
 /* ========================================================================= */
-#define UVH_SCRATCH5_ALIAS 0x2d0208UL
-#define UVH_SCRATCH5_ALIAS_32 0x780
+#define UV1H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV4H_SCRATCH5_ALIAS 0xb0208UL
+#define UVH_SCRATCH5_ALIAS (						\
+	is_uv1_hub() ? UV1H_SCRATCH5_ALIAS :				\
+	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS :				\
+	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS :				\
+	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS)
+
+#define UV1H_SCRATCH5_ALIAS_32 0x780
+#define UV2H_SCRATCH5_ALIAS_32 0x780
+#define UV3H_SCRATCH5_ALIAS_32 0x780
+#define UV4H_SCRATCH5_ALIAS_32 0x7a0
+#define UVH_SCRATCH5_ALIAS_32 (						\
+	is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_32 :				\
+	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 :				\
+	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 :				\
+	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32)
 
 
 /* ========================================================================= */
 /*                           UVH_SCRATCH5_ALIAS_2                            */
 /* ========================================================================= */
-#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV1H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UVH_SCRATCH5_ALIAS_2 (						\
+	is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_2 :				\
+	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 :				\
+	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 :				\
+	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2)
 #define UVH_SCRATCH5_ALIAS_2_32 0x788
 
 
@@ -2640,76 +3665,255 @@
 /*                          UVXH_EVENT_OCCURRED2                             */
 /* ========================================================================= */
 #define UVXH_EVENT_OCCURRED2 0x70100UL
-#define UVXH_EVENT_OCCURRED2_32 0xb68
 
-#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT			0
-#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT			1
-#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT			2
-#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT			3
-#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT			4
-#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT			5
-#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT			6
-#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT			7
-#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT			8
-#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT			9
-#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT		10
-#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT		11
-#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT		12
-#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT		13
-#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT		14
-#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT		15
-#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT		16
-#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT		17
-#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT		18
-#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT		19
-#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT		20
-#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT		21
-#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT		22
-#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT		23
-#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT		24
-#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT		25
-#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT		26
-#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT		27
-#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT		28
-#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT		29
-#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT		30
-#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT		31
-#define UVXH_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
-#define UVXH_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
-#define UVXH_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
-#define UVXH_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
-#define UVXH_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
-#define UVXH_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
-#define UVXH_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
-#define UVXH_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
-#define UVXH_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
-#define UVXH_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
-#define UVXH_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
-#define UVXH_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
-#define UVXH_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
-#define UVXH_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
-#define UVXH_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
-#define UVXH_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
-#define UVXH_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
-#define UVXH_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
-#define UVXH_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
-#define UVXH_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
-#define UVXH_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
-#define UVXH_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
-#define UVXH_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
-#define UVXH_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
-#define UVXH_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED2_32 0xb68
+#define UV3H_EVENT_OCCURRED2_32 0xb68
+#define UV4H_EVENT_OCCURRED2_32 0x608
+#define UVH_EVENT_OCCURRED2_32 (					\
+	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 :			\
+	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 :			\
+	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32)
 
-union uvxh_event_occurred2_u {
+
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT			0
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT			1
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT			2
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT			3
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT			4
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT			5
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT			6
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT			7
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT			8
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT			9
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT		10
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT		11
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT		12
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT		13
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT		14
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT		15
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT		16
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT		17
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT		18
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT		19
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT		20
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT		21
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT		22
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT		23
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT		24
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT		25
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT		26
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT		27
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT		28
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT		29
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT		30
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT		31
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+
+#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT			0
+#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT			1
+#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT			2
+#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT			3
+#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT			4
+#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT			5
+#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT			6
+#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT			7
+#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT			8
+#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT			9
+#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT		10
+#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT		11
+#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT		12
+#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT		13
+#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT		14
+#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT		15
+#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT		16
+#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT		17
+#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT		18
+#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT		19
+#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT		20
+#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT		21
+#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT		22
+#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT		23
+#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT		24
+#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT		25
+#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT		26
+#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT		27
+#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT		28
+#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT		29
+#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT		30
+#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT		31
+#define UV3H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	16
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	17
+#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT			18
+#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT			19
+#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT			20
+#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT			21
+#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT			22
+#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT			23
+#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT			24
+#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT			25
+#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT			26
+#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT			27
+#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT		28
+#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT		29
+#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT		30
+#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT		31
+#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT		32
+#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT		33
+#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT		34
+#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT		35
+#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT		36
+#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT		37
+#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT		38
+#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT		39
+#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT		40
+#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT		41
+#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT		42
+#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT		43
+#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT		44
+#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT		45
+#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT		46
+#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT		47
+#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT		48
+#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT		49
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_18_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_19_MASK		0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_20_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_21_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_22_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_23_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_24_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_25_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_26_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_27_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_28_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_29_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_30_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_31_MASK		0x0002000000000000UL
+
+#define UVXH_EVENT_OCCURRED2_RTC_1_MASK (				\
+	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK :		\
+	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK :		\
+	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK)
+
+union uvh_event_occurred2_u {
 	unsigned long	v;
-	struct uvxh_event_occurred2_s {
+	struct uv2h_event_occurred2_s {
 		unsigned long	rtc_0:1;			/* RW */
 		unsigned long	rtc_1:1;			/* RW */
 		unsigned long	rtc_2:1;			/* RW */
@@ -2743,25 +3947,129 @@
 		unsigned long	rtc_30:1;			/* RW */
 		unsigned long	rtc_31:1;			/* RW */
 		unsigned long	rsvd_32_63:32;
-	} sx;
+	} s2;
+	struct uv3h_event_occurred2_s {
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_32_63:32;
+	} s3;
+	struct uv4h_event_occurred2_s {
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_50_63:14;
+	} s4;
 };
 
 /* ========================================================================= */
 /*                       UVXH_EVENT_OCCURRED2_ALIAS                          */
 /* ========================================================================= */
 #define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
-#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70
+
+#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
+#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70
+#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610
+#define UVH_EVENT_OCCURRED2_ALIAS_32 (					\
+	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 :			\
+	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 :			\
+	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32)
 
 
 /* ========================================================================= */
 /*                   UVXH_LB_BAU_SB_ACTIVATION_STATUS_2                      */
 /* ========================================================================= */
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
 #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
 #define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 (				\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2)
+
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 (				\
+	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 :		\
+	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 :		\
+	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32)
 
 #define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
 #define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
@@ -2772,6 +4080,10 @@
 #define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
 #define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
 
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+
+
 union uvxh_lb_bau_sb_activation_status_2_u {
 	unsigned long	v;
 	struct uvxh_lb_bau_sb_activation_status_2_s {
@@ -2783,6 +4095,9 @@
 	struct uv3h_lb_bau_sb_activation_status_2_s {
 		unsigned long	aux_error:64;			/* RW */
 	} s3;
+	struct uv4h_lb_bau_sb_activation_status_2_s {
+		unsigned long	aux_error:64;			/* RW */
+	} s4;
 };
 
 /* ========================================================================= */
@@ -2823,26 +4138,6 @@
 };
 
 /* ========================================================================= */
-/*                          UV3H_GR1_GAM_GR_CONFIG                           */
-/* ========================================================================= */
-#define UV3H_GR1_GAM_GR_CONFIG				0x1000028UL
-
-#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_SHFT		0
-#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_SHFT		10
-#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL
-#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
-
-union uv3h_gr1_gam_gr_config_u {
-	unsigned long	v;
-	struct uv3h_gr1_gam_gr_config_s {
-		unsigned long	m_skt:6;			/* RW */
-		unsigned long	undef_6_9:4;			/* Undefined */
-		unsigned long	subspace:1;			/* RW */
-		unsigned long	reserved:53;
-	} s3;
-};
-
-/* ========================================================================= */
 /*                   UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR                   */
 /* ========================================================================= */
 #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR		0x1603000UL
@@ -2924,5 +4219,67 @@
 	} s3;
 };
 
+/* ========================================================================= */
+/*                       UV4H_LB_PROC_INTD_QUEUE_FIRST                       */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_QUEUE_FIRST			0xa4100UL
+
+#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6
+#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL
+
+union uv4h_lb_proc_intd_queue_first_u {
+	unsigned long	v;
+	struct uv4h_lb_proc_intd_queue_first_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	first_payload_address:40;	/* RW */
+	} s4;
+};
+
+/* ========================================================================= */
+/*                       UV4H_LB_PROC_INTD_QUEUE_LAST                        */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_QUEUE_LAST			0xa4108UL
+
+#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5
+#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL
+
+union uv4h_lb_proc_intd_queue_last_u {
+	unsigned long	v;
+	struct uv4h_lb_proc_intd_queue_last_s {
+		unsigned long	undef_0_4:5;			/* Undefined */
+		unsigned long	last_payload_address:41;	/* RW */
+	} s4;
+};
+
+/* ========================================================================= */
+/*                     UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR                      */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR		0xa4118UL
+
+#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0
+#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL
+
+union uv4h_lb_proc_intd_soft_ack_clear_u {
+	unsigned long	v;
+	struct uv4h_lb_proc_intd_soft_ack_clear_s {
+		unsigned long	soft_ack_pending_flags:8;	/* WP */
+	} s4;
+};
+
+/* ========================================================================= */
+/*                    UV4H_LB_PROC_INTD_SOFT_ACK_PENDING                     */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING		0xa4110UL
+
+#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0
+#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL
+
+union uv4h_lb_proc_intd_soft_ack_pending_u {
+	unsigned long	v;
+	struct uv4h_lb_proc_intd_soft_ack_pending_s {
+		unsigned long	soft_ack_flags:8;		/* RW */
+	} s4;
+};
+
 
 #endif /* _ASM_X86_UV_UV_MMRS_H */

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 1ae89a2..4dcdf74 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h

@@ -142,6 +142,44 @@
 struct timespec;
 
 /**
+ * struct x86_legacy_devices - legacy x86 devices
+ *
+ * @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform
+ * 	is known to never have a PNPBIOS.
+ *
+ * These are devices known to require LPC or ISA bus. The definition of legacy
+ * devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag
+ * ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on
+ * the LPC or ISA bus. User visible devices are devices that have end-user
+ * accessible connectors (for example, LPT parallel port). Legacy devices on
+ * the LPC bus consist for example of serial and parallel ports, PS/2 keyboard
+ * / mouse, and the floppy disk controller. A system that lacks all known
+ * legacy devices can assume all devices can be detected exclusively via
+ * standard device enumeration mechanisms including the ACPI namespace.
+ *
+ * A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not
+ * have any of the legacy devices enumerated below present.
+ */
+struct x86_legacy_devices {
+	int pnpbios;
+};
+
+/**
+ * struct x86_legacy_features - legacy x86 features
+ *
+ * @rtc: this device has a CMOS real-time clock present
+ * @ebda_search: it's safe to search for the EBDA signature in the hardware's
+ * 	low RAM
+ * @devices: legacy x86 devices, refer to struct x86_legacy_devices
+ * 	documentation for further details.
+ */
+struct x86_legacy_features {
+	int rtc;
+	int ebda_search;
+	struct x86_legacy_devices devices;
+};
+
+/**
  * struct x86_platform_ops - platform specific runtime functions
  * @calibrate_tsc:		calibrate TSC
  * @get_wallclock:		get time from HW clock like RTC etc.
@@ -152,6 +190,14 @@
  * @save_sched_clock_state:	save state for sched_clock() on suspend
  * @restore_sched_clock_state:	restore state for sched_clock() on resume
  * @apic_post_init:		adjust apic if neeeded
+ * @legacy:			legacy features
+ * @set_legacy_features:	override legacy features. Use of this callback
+ * 				is highly discouraged. You should only need
+ * 				this if your hardware platform requires further
+ * 				custom fine tuning far beyong what may be
+ * 				possible in x86_early_init_platform_quirks() by
+ * 				only using the current x86_hardware_subarch
+ * 				semantics.
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
@@ -165,6 +211,8 @@
 	void (*save_sched_clock_state)(void);
 	void (*restore_sched_clock_state)(void);
 	void (*apic_post_init)(void);
+	struct x86_legacy_features legacy;
+	void (*set_legacy_features)(void);
 };
 
 struct pci_dev;
@@ -186,6 +234,8 @@
 extern struct x86_platform_ops x86_platform;
 extern struct x86_msi_ops x86_msi;
 extern struct x86_io_apic_ops x86_io_apic_ops;
+
+extern void x86_early_init_platform_quirks(void);
 extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
 

diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index c54beb4..635eac5 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h

@@ -550,7 +550,7 @@
 #define XOR_TRY_TEMPLATES				\
 do {							\
 	AVX_XOR_SPEED;					\
-	if (cpu_has_xmm) {				\
+	if (boot_cpu_has(X86_FEATURE_XMM)) {				\
 		xor_speed(&xor_block_pIII_sse);		\
 		xor_speed(&xor_block_sse_pf64);		\
 	} else if (boot_cpu_has(X86_FEATURE_MMX)) {	\

diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 7c0a517..22a7b18 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h

@@ -167,12 +167,12 @@
 
 #define AVX_XOR_SPEED \
 do { \
-	if (cpu_has_avx && cpu_has_osxsave) \
+	if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
 		xor_speed(&xor_block_avx); \
 } while (0)
 
 #define AVX_SELECT(FASTEST) \
-	(cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
+	(boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 
 #else
 

diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 3292543..c18ce67 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h

@@ -157,7 +157,46 @@
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
-enum {
+/**
+ * enum x86_hardware_subarch - x86 hardware subarchitecture
+ *
+ * The x86 hardware_subarch and hardware_subarch_data were added as of the x86
+ * boot protocol 2.07 to help distinguish and support custom x86 boot
+ * sequences. This enum represents accepted values for the x86
+ * hardware_subarch.  Custom x86 boot sequences (not X86_SUBARCH_PC) do not
+ * have or simply *cannot* make use of natural stubs like BIOS or EFI, the
+ * hardware_subarch can be used on the Linux entry path to revector to a
+ * subarchitecture stub when needed. This subarchitecture stub can be used to
+ * set up Linux boot parameters or for special care to account for nonstandard
+ * handling of page tables.
+ *
+ * These enums should only ever be used by x86 code, and the code that uses
+ * it should be well contained and compartamentalized.
+ *
+ * KVM and Xen HVM do not have a subarch as these are expected to follow
+ * standard x86 boot entries. If there is a genuine need for "hypervisor" type
+ * that should be considered separately in the future. Future guest types
+ * should seriously consider working with standard x86 boot stubs such as
+ * the BIOS or EFI boot stubs.
+ *
+ * WARNING: this enum is only used for legacy hacks, for platform features that
+ *	    are not easily enumerated or discoverable. You should not ever use
+ *	    this for new features.
+ *
+ * @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard
+ *	PC mechanisms (PCI, ACPI) and doesn't need a special boot flow.
+ * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest
+ * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
+ * 	which start at asm startup_xen() entry point and later jump to the C
+ * 	xen_start_kernel() entry point. Both domU and dom0 type of guests are
+ * 	currently supportd through this PV boot path.
+ * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
+ *	systems which do not have the PCI legacy interfaces.
+ * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for
+ * 	for settop boxes and media devices, the use of a subarch for CE4100
+ * 	is more of a hack...
+ */
+enum x86_hardware_subarch {
 	X86_SUBARCH_PC = 0,
 	X86_SUBARCH_LGUEST,
 	X86_SUBARCH_XEN,

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 616ebd2..9abf855 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile

@@ -2,7 +2,11 @@
 # Makefile for the linux kernel.
 #
 
-extra-y                := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
+extra-y	:= head_$(BITS).o
+extra-y	+= head$(BITS).o
+extra-y	+= ebda.o
+extra-y	+= platform-quirks.o
+extra-y	+= vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c2f1ef..9414f84 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c

@@ -136,7 +136,7 @@
 {
 	struct acpi_table_madt *madt = NULL;
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -EINVAL;
 
 	madt = (struct acpi_table_madt *)table;
@@ -445,7 +445,6 @@
 		polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
 
 	mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
-	acpi_penalize_sci_irq(bus_irq, trigger, polarity);
 
 	/*
 	 * stash over-ride to indicate we've been here
@@ -913,6 +912,15 @@
 
 static int __init acpi_parse_fadt(struct acpi_table_header *table)
 {
+	if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) {
+		pr_debug("ACPI: no legacy devices present\n");
+		x86_platform.legacy.devices.pnpbios = 0;
+	}
+
+	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
+		pr_debug("ACPI: not registering RTC platform device\n");
+		x86_platform.legacy.rtc = 0;
+	}
 
 #ifdef CONFIG_X86_PM_TIMER
 	/* detect the location of the ACPI PM Timer */
@@ -951,7 +959,7 @@
 {
 	int count;
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 
 	/*
@@ -979,7 +987,7 @@
 	int ret;
 	struct acpi_subtable_proc madt_proc[2];
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 
 	/*
@@ -1125,7 +1133,7 @@
 	if (acpi_disabled || acpi_noirq)
 		return -ENODEV;
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 
 	/*

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f9093..5cb272a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c

@@ -11,6 +11,7 @@
 #include <linux/stop_machine.h>
 #include <linux/slab.h>
 #include <linux/kdebug.h>
+#include <asm/text-patching.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d356987..60078a6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c

@@ -607,7 +607,7 @@
 	long tapic = apic_read(APIC_TMCCT);
 	unsigned long pm = acpi_pm_read_early();
 
-	if (cpu_has_tsc)
+	if (boot_cpu_has(X86_FEATURE_TSC))
 		tsc = rdtsc();
 
 	switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@
 	*delta = (long)res;
 
 	/* Correct the tsc counter value */
-	if (cpu_has_tsc) {
+	if (boot_cpu_has(X86_FEATURE_TSC)) {
 		res = (((u64)(*deltatsc)) * pm_100ms);
 		do_div(res, deltapm);
 		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@
 	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
 		    lapic_timer_frequency);
 
-	if (cpu_has_tsc) {
+	if (boot_cpu_has(X86_FEATURE_TSC)) {
 		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 			    "%ld.%04ld MHz.\n",
 			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@
 {
 	unsigned long flags;
 
-	if (!cpu_has_apic && !apic_from_smp_config())
+	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
 		return;
 
 	local_irq_save(flags);
@@ -1134,7 +1134,7 @@
 	 * Don't do the setup now if we have a SMP BIOS as the
 	 * through-I/O-APIC virtual wire mode might be active.
 	 */
-	if (smp_found_config || !cpu_has_apic)
+	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
 		return;
 
 	/*
@@ -1227,7 +1227,7 @@
 	unsigned long long tsc = 0, ntsc;
 	long long max_loops = cpu_khz ? cpu_khz : 1000000;
 
-	if (cpu_has_tsc)
+	if (boot_cpu_has(X86_FEATURE_TSC))
 		tsc = rdtsc();
 
 	if (disable_apic) {
@@ -1311,7 +1311,7 @@
 			break;
 		}
 		if (queued) {
-			if (cpu_has_tsc && cpu_khz) {
+			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
 				ntsc = rdtsc();
 				max_loops = (cpu_khz << 10) - (ntsc - tsc);
 			} else
@@ -1445,7 +1445,7 @@
 {
 	u64 msr;
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return;
 
 	rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1561,7 +1561,7 @@
 		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
 		x2apic_mode = 1;
 		x2apic_state = X2APIC_ON;
-	} else if (!cpu_has_x2apic) {
+	} else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
 		x2apic_state = X2APIC_DISABLED;
 	}
 }
@@ -1632,7 +1632,7 @@
  */
 static int __init detect_init_APIC(void)
 {
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		pr_info("No local APIC present\n");
 		return -1;
 	}
@@ -1711,14 +1711,14 @@
 		goto no_apic;
 	case X86_VENDOR_INTEL:
 		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
-		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+		    (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
 			break;
 		goto no_apic;
 	default:
 		goto no_apic;
 	}
 
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		/*
 		 * Over-ride BIOS and try to enable the local APIC only if
 		 * "lapic" specified.
@@ -2233,19 +2233,19 @@
 		return -1;
 	}
 #ifdef CONFIG_X86_64
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		disable_apic = 1;
 		pr_info("Apic disabled by BIOS\n");
 		return -1;
 	}
 #else
-	if (!smp_found_config && !cpu_has_apic)
+	if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
 		return -1;
 
 	/*
 	 * Complain if the BIOS pretends there is one.
 	 */
-	if (!cpu_has_apic &&
+	if (!boot_cpu_has(X86_FEATURE_APIC) &&
 	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
 			boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@
 static int __init init_lapic_sysfs(void)
 {
 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-	if (cpu_has_apic)
+	if (boot_cpu_has(X86_FEATURE_APIC))
 		register_syscore_ops(&lapic_syscore_ops);
 
 	return 0;

diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 331a7a0..13d19ed 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c

@@ -100,13 +100,13 @@
 
 static u32 noop_apic_read(u32 reg)
 {
-	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 	return 0;
 }
 
 static void noop_apic_write(u32 reg, u32 v)
 {
-	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 }
 
 struct apic apic_noop = {

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fdb0fbf..84e33ff 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c

@@ -1454,7 +1454,7 @@
 		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
 
-	if (cpu_has_apic || apic_from_smp_config())
+	if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
 		disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 28bde88..2a0f225 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c

@@ -230,7 +230,7 @@
 {
 	int apicid, cpuid;
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return 0;
 
 	apicid = hard_smp_processor_id();

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ef49551..a5e400a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c

@@ -944,7 +944,7 @@
 	print_PIC();
 
 	/* don't print out if apic is not there */
-	if (!cpu_has_apic && !apic_from_smp_config())
+	if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
 		return 0;
 
 	print_local_APICs(show_lapic);

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d7ce96a..2900315 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c

@@ -48,12 +48,35 @@
 static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
 static u64 gru_dist_lmask, gru_dist_umask;
 static union uvh_apicid uvh_apicid;
+
+/* info derived from CPUID */
+static struct {
+	unsigned int apicid_shift;
+	unsigned int apicid_mask;
+	unsigned int socketid_shift;	/* aka pnode_shift for UV1/2/3 */
+	unsigned int pnode_mask;
+	unsigned int gpa_shift;
+} uv_cpuid;
+
 int uv_min_hub_revision_id;
 EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
 unsigned int uv_apicid_hibits;
 EXPORT_SYMBOL_GPL(uv_apicid_hibits);
 
 static struct apic apic_x2apic_uv_x;
+static struct uv_hub_info_s uv_hub_info_node0;
+
+/* Set this to use hardware error handler instead of kernel panic */
+static int disable_uv_undefined_panic = 1;
+unsigned long uv_undefined(char *str)
+{
+	if (likely(!disable_uv_undefined_panic))
+		panic("UV: error: undefined MMR: %s\n", str);
+	else
+		pr_crit("UV: error: undefined MMR: %s\n", str);
+	return ~0ul;	/* cause a machine fault  */
+}
+EXPORT_SYMBOL(uv_undefined);
 
 static unsigned long __init uv_early_read_mmr(unsigned long addr)
 {
@@ -108,21 +131,71 @@
 	case UV3_HUB_PART_NUMBER_X:
 		uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
 		break;
+	case UV4_HUB_PART_NUMBER:
+		uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
+		break;
 	}
 
 	uv_hub_info->hub_revision = uv_min_hub_revision_id;
-	pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+	uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
+	pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask;
+	uv_cpuid.gpa_shift = 46;	/* default unless changed */
+
+	pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n",
+		node_id.s.revision, node_id.s.part_number, node_id.s.node_id,
+		m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
 	return pnode;
 }
 
-static void __init early_get_apic_pnode_shift(void)
+/* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
+#define SMT_LEVEL	0	/* leaf 0xb SMT level */
+#define INVALID_TYPE	0	/* leaf 0xb sub-leaf types */
+#define SMT_TYPE	1
+#define CORE_TYPE	2
+#define LEAFB_SUBTYPE(ecx)		(((ecx) >> 8) & 0xff)
+#define BITS_SHIFT_NEXT_LEVEL(eax)	((eax) & 0x1f)
+
+static void set_x2apic_bits(void)
 {
-	uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
-	if (!uvh_apicid.v)
-		/*
-		 * Old bios, use default value
-		 */
-		uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+	unsigned int eax, ebx, ecx, edx, sub_index;
+	unsigned int sid_shift;
+
+	cpuid(0, &eax, &ebx, &ecx, &edx);
+	if (eax < 0xb) {
+		pr_info("UV: CPU does not have CPUID.11\n");
+		return;
+	}
+	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+	if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) {
+		pr_info("UV: CPUID.11 not implemented\n");
+		return;
+	}
+	sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
+	sub_index = 1;
+	do {
+		cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+		if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
+			sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
+			break;
+		}
+		sub_index++;
+	} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
+	uv_cpuid.apicid_shift = 0;
+	uv_cpuid.apicid_mask = (~(-1 << sid_shift));
+	uv_cpuid.socketid_shift = sid_shift;
+}
+
+static void __init early_get_apic_socketid_shift(void)
+{
+	if (is_uv2_hub() || is_uv3_hub())
+		uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
+
+	set_x2apic_bits();
+
+	pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n",
+		uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
+	pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n",
+		uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
 }
 
 /*
@@ -150,13 +223,18 @@
 	if (strncmp(oem_id, "SGI", 3) != 0)
 		return 0;
 
+	/* Setup early hub type field in uv_hub_info for Node 0 */
+	uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+
 	/*
 	 * Determine UV arch type.
 	 *   SGI: UV100/1000
 	 *   SGI2: UV2000/3000
 	 *   SGI3: UV300 (truncated to 4 chars because of different varieties)
+	 *   SGI4: UV400 (truncated to 4 chars because of different varieties)
 	 */
 	uv_hub_info->hub_revision =
+		!strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
 		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
 		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
 		!strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
@@ -165,7 +243,7 @@
 		goto badbios;
 
 	pnodeid = early_get_pnodeid();
-	early_get_apic_pnode_shift();
+	early_get_apic_socketid_shift();
 	x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
 	x86_platform.nmi_init = uv_nmi_init;
 
@@ -211,17 +289,11 @@
 }
 EXPORT_SYMBOL_GPL(is_uv_system);
 
-DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
-EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+void **__uv_hub_info_list;
+EXPORT_SYMBOL_GPL(__uv_hub_info_list);
 
-struct uv_blade_info *uv_blade_info;
-EXPORT_SYMBOL_GPL(uv_blade_info);
-
-short *uv_node_to_blade;
-EXPORT_SYMBOL_GPL(uv_node_to_blade);
-
-short *uv_cpu_to_blade;
-EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+DEFINE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_info);
 
 short uv_possible_blades;
 EXPORT_SYMBOL_GPL(uv_possible_blades);
@@ -229,6 +301,115 @@
 unsigned long sn_rtc_cycles_per_second;
 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
+/* the following values are used for the per node hub info struct */
+static __initdata unsigned short *_node_to_pnode;
+static __initdata unsigned short _min_socket, _max_socket;
+static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
+static __initdata struct uv_gam_range_entry *uv_gre_table;
+static __initdata struct uv_gam_parameters *uv_gp_table;
+static __initdata unsigned short *_socket_to_node;
+static __initdata unsigned short *_socket_to_pnode;
+static __initdata unsigned short *_pnode_to_socket;
+static __initdata struct uv_gam_range_s *_gr_table;
+#define	SOCK_EMPTY	((unsigned short)~0)
+
+extern int uv_hub_info_version(void)
+{
+	return UV_HUB_INFO_VERSION;
+}
+EXPORT_SYMBOL(uv_hub_info_version);
+
+/* Build GAM range lookup table */
+static __init void build_uv_gr_table(void)
+{
+	struct uv_gam_range_entry *gre = uv_gre_table;
+	struct uv_gam_range_s *grt;
+	unsigned long last_limit = 0, ram_limit = 0;
+	int bytes, i, sid, lsid = -1;
+
+	if (!gre)
+		return;
+
+	bytes = _gr_table_len * sizeof(struct uv_gam_range_s);
+	grt = kzalloc(bytes, GFP_KERNEL);
+	BUG_ON(!grt);
+	_gr_table = grt;
+
+	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+		if (gre->type == UV_GAM_RANGE_TYPE_HOLE) {
+			if (!ram_limit) {   /* mark hole between ram/non-ram */
+				ram_limit = last_limit;
+				last_limit = gre->limit;
+				lsid++;
+				continue;
+			}
+			last_limit = gre->limit;
+			pr_info("UV: extra hole in GAM RE table @%d\n",
+				(int)(gre - uv_gre_table));
+			continue;
+		}
+		if (_max_socket < gre->sockid) {
+			pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n",
+				gre->sockid, _max_socket,
+				(int)(gre - uv_gre_table));
+			continue;
+		}
+		sid = gre->sockid - _min_socket;
+		if (lsid < sid) {		/* new range */
+			grt = &_gr_table[sid];
+			grt->base = lsid;
+			grt->nasid = gre->nasid;
+			grt->limit = last_limit = gre->limit;
+			lsid = sid;
+			continue;
+		}
+		if (lsid == sid && !ram_limit) {	/* update range */
+			if (grt->limit == last_limit) {	/* .. if contiguous */
+				grt->limit = last_limit = gre->limit;
+				continue;
+			}
+		}
+		if (!ram_limit) {		/* non-contiguous ram range */
+			grt++;
+			grt->base = sid - 1;
+			grt->nasid = gre->nasid;
+			grt->limit = last_limit = gre->limit;
+			continue;
+		}
+		grt++;				/* non-contiguous/non-ram */
+		grt->base = grt - _gr_table;	/* base is this entry */
+		grt->nasid = gre->nasid;
+		grt->limit = last_limit = gre->limit;
+		lsid++;
+	}
+
+	/* shorten table if possible */
+	grt++;
+	i = grt - _gr_table;
+	if (i < _gr_table_len) {
+		void *ret;
+
+		bytes = i * sizeof(struct uv_gam_range_s);
+		ret = krealloc(_gr_table, bytes, GFP_KERNEL);
+		if (ret) {
+			_gr_table = ret;
+			_gr_table_len = i;
+		}
+	}
+
+	/* display resultant gam range table */
+	for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) {
+		int gb = grt->base;
+		unsigned long start = gb < 0 ?  0 :
+			(unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT;
+		unsigned long end =
+			(unsigned long)grt->limit << UV_GAM_RANGE_SHFT;
+
+		pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n",
+			i, grt->nasid, start, end, gb);
+	}
+}
+
 static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 	unsigned long val;
@@ -355,7 +536,6 @@
 
 static unsigned int uv_read_apic_id(void)
 {
-
 	return x2apic_get_apic_id(apic_read(APIC_ID));
 }
 
@@ -430,58 +610,38 @@
 	__this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
 }
 
-/*
- * Called on boot cpu.
- */
-static __init int boot_pnode_to_blade(int pnode)
-{
-	int blade;
-
-	for (blade = 0; blade < uv_num_possible_blades(); blade++)
-		if (pnode == uv_blade_info[blade].pnode)
-			return blade;
-	BUG();
-}
-
-struct redir_addr {
-	unsigned long redirect;
-	unsigned long alias;
-};
-
+#define	UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH	3
 #define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
 
-static __initdata struct redir_addr redir_addrs[] = {
-	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
-	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
-	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
-};
-
-static unsigned char get_n_lshift(int m_val)
-{
-	union uv3h_gr0_gam_gr_config_u m_gr_config;
-
-	if (is_uv1_hub())
-		return m_val;
-
-	if (is_uv2_hub())
-		return m_val == 40 ? 40 : 39;
-
-	m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
-	return m_gr_config.s3.m_skt;
-}
-
 static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 {
 	union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
 	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+	unsigned long m_redirect;
+	unsigned long m_overlay;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
-		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+	for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) {
+		switch (i) {
+		case 0:
+			m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR;
+			m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
+			break;
+		case 1:
+			m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR;
+			m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
+			break;
+		case 2:
+			m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR;
+			m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
+			break;
+		}
+		alias.v = uv_read_local_mmr(m_overlay);
 		if (alias.s.enable && alias.s.base == 0) {
 			*size = (1UL << alias.s.m_alias);
-			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
-			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+			redirect.v = uv_read_local_mmr(m_redirect);
+			*base = (unsigned long)redirect.s.dest_base
+							<< DEST_SHIFT;
 			return;
 		}
 	}
@@ -544,6 +704,8 @@
 {
 	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
 	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+	unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK;
+	unsigned long base;
 
 	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
 	if (!gru.s.enable) {
@@ -555,8 +717,9 @@
 		map_gru_distributed(gru.v);
 		return;
 	}
-	map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
-	gru_start_paddr = ((u64)gru.s.base << shift);
+	base = (gru.v & mask) >> shift;
+	map_high("GRU", base, shift, shift, max_pnode, map_wb);
+	gru_start_paddr = ((u64)base << shift);
 	gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
 }
 
@@ -595,6 +758,7 @@
 	},
 };
 
+/* UV3 & UV4 have identical MMIOH overlay configs */
 static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
 {
 	union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
@@ -674,7 +838,7 @@
 	unsigned long mmr, base;
 	int shift, enable, m_io, n_io;
 
-	if (is_uv3_hub()) {
+	if (is_uv3_hub() || is_uv4_hub()) {
 		/* Map both MMIOH Regions */
 		map_mmioh_high_uv3(0, min_pnode, max_pnode);
 		map_mmioh_high_uv3(1, min_pnode, max_pnode);
@@ -739,8 +903,8 @@
  */
 static void uv_heartbeat(unsigned long ignored)
 {
-	struct timer_list *timer = &uv_hub_info->scir.timer;
-	unsigned char bits = uv_hub_info->scir.state;
+	struct timer_list *timer = &uv_scir_info->timer;
+	unsigned char bits = uv_scir_info->state;
 
 	/* flip heartbeat bit */
 	bits ^= SCIR_CPU_HEARTBEAT;
@@ -760,14 +924,14 @@
 
 static void uv_heartbeat_enable(int cpu)
 {
-	while (!uv_cpu_hub_info(cpu)->scir.enabled) {
-		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+	while (!uv_cpu_scir_info(cpu)->enabled) {
+		struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
 
 		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
 		setup_timer(timer, uv_heartbeat, cpu);
 		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
 		add_timer_on(timer, cpu);
-		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+		uv_cpu_scir_info(cpu)->enabled = 1;
 
 		/* also ensure that boot cpu is enabled */
 		cpu = 0;
@@ -777,9 +941,9 @@
 #ifdef CONFIG_HOTPLUG_CPU
 static void uv_heartbeat_disable(int cpu)
 {
-	if (uv_cpu_hub_info(cpu)->scir.enabled) {
-		uv_cpu_hub_info(cpu)->scir.enabled = 0;
-		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+	if (uv_cpu_scir_info(cpu)->enabled) {
+		uv_cpu_scir_info(cpu)->enabled = 0;
+		del_timer(&uv_cpu_scir_info(cpu)->timer);
 	}
 	uv_set_cpu_scir_bits(cpu, 0xff);
 }
@@ -862,28 +1026,368 @@
 void uv_cpu_init(void)
 {
 	/* CPU 0 initialization will be done via uv_system_init. */
-	if (!uv_blade_info)
+	if (smp_processor_id() == 0)
 		return;
 
-	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+	uv_hub_info->nr_online_cpus++;
 
 	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
 		set_x2apic_extra_bits(uv_hub_info->pnode);
 }
 
+struct mn {
+	unsigned char	m_val;
+	unsigned char	n_val;
+	unsigned char	m_shift;
+	unsigned char	n_lshift;
+};
+
+static void get_mn(struct mn *mnp)
+{
+	union uvh_rh_gam_config_mmr_u m_n_config;
+	union uv3h_gr0_gam_gr_config_u m_gr_config;
+
+	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
+	mnp->n_val = m_n_config.s.n_skt;
+	if (is_uv4_hub()) {
+		mnp->m_val = 0;
+		mnp->n_lshift = 0;
+	} else if (is_uv3_hub()) {
+		mnp->m_val = m_n_config.s3.m_skt;
+		m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
+		mnp->n_lshift = m_gr_config.s3.m_skt;
+	} else if (is_uv2_hub()) {
+		mnp->m_val = m_n_config.s2.m_skt;
+		mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
+	} else if (is_uv1_hub()) {
+		mnp->m_val = m_n_config.s1.m_skt;
+		mnp->n_lshift = mnp->m_val;
+	}
+	mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
+}
+
+void __init uv_init_hub_info(struct uv_hub_info_s *hub_info)
+{
+	struct mn mn = {0};	/* avoid unitialized warnings */
+	union uvh_node_id_u node_id;
+
+	get_mn(&mn);
+	hub_info->m_val = mn.m_val;
+	hub_info->n_val = mn.n_val;
+	hub_info->m_shift = mn.m_shift;
+	hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
+
+	hub_info->hub_revision = uv_hub_info->hub_revision;
+	hub_info->pnode_mask = uv_cpuid.pnode_mask;
+	hub_info->min_pnode = _min_pnode;
+	hub_info->min_socket = _min_socket;
+	hub_info->pnode_to_socket = _pnode_to_socket;
+	hub_info->socket_to_node = _socket_to_node;
+	hub_info->socket_to_pnode = _socket_to_pnode;
+	hub_info->gr_table_len = _gr_table_len;
+	hub_info->gr_table = _gr_table;
+	hub_info->gpa_mask = mn.m_val ?
+		(1UL << (mn.m_val + mn.n_val)) - 1 :
+		(1UL << uv_cpuid.gpa_shift) - 1;
+
+	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+	hub_info->gnode_extra =
+		(node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1;
+
+	hub_info->gnode_upper =
+		((unsigned long)hub_info->gnode_extra << mn.m_val);
+
+	if (uv_gp_table) {
+		hub_info->global_mmr_base = uv_gp_table->mmr_base;
+		hub_info->global_mmr_shift = uv_gp_table->mmr_shift;
+		hub_info->global_gru_base = uv_gp_table->gru_base;
+		hub_info->global_gru_shift = uv_gp_table->gru_shift;
+		hub_info->gpa_shift = uv_gp_table->gpa_shift;
+		hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1;
+	} else {
+		hub_info->global_mmr_base =
+			uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+					~UV_MMR_ENABLE;
+		hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
+	}
+
+	get_lowmem_redirect(
+		&hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top);
+
+	hub_info->apic_pnode_shift = uv_cpuid.socketid_shift;
+
+	/* show system specific info */
+	pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n",
+		hub_info->n_val, hub_info->m_val,
+		hub_info->m_shift, hub_info->n_lshift);
+
+	pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n",
+		hub_info->gpa_mask, hub_info->gpa_shift,
+		hub_info->pnode_mask, hub_info->apic_pnode_shift);
+
+	pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n",
+		hub_info->global_mmr_base, hub_info->global_mmr_shift,
+		hub_info->global_gru_base, hub_info->global_gru_shift);
+
+	pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n",
+		hub_info->gnode_upper, hub_info->gnode_extra);
+}
+
+static void __init decode_gam_params(unsigned long ptr)
+{
+	uv_gp_table = (struct uv_gam_parameters *)ptr;
+
+	pr_info("UV: GAM Params...\n");
+	pr_info("UV: mmr_base/shift:0x%llx/%d gru_base/shift:0x%llx/%d gpa_shift:%d\n",
+		uv_gp_table->mmr_base, uv_gp_table->mmr_shift,
+		uv_gp_table->gru_base, uv_gp_table->gru_shift,
+		uv_gp_table->gpa_shift);
+}
+
+static void __init decode_gam_rng_tbl(unsigned long ptr)
+{
+	struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
+	unsigned long lgre = 0;
+	int index = 0;
+	int sock_min = 999999, pnode_min = 99999;
+	int sock_max = -1, pnode_max = -1;
+
+	uv_gre_table = gre;
+	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+		if (!index) {
+			pr_info("UV: GAM Range Table...\n");
+			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+				"Range", "", "Size", "Type", "NASID",
+				"SID", "PN", "PXM");
+		}
+		pr_info(
+		"UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x %3d\n",
+			index++,
+			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
+			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
+			((unsigned long)(gre->limit - lgre)) >>
+				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+			gre->type, gre->nasid, gre->sockid,
+			gre->pnode, gre->pxm);
+
+		lgre = gre->limit;
+		if (sock_min > gre->sockid)
+			sock_min = gre->sockid;
+		if (sock_max < gre->sockid)
+			sock_max = gre->sockid;
+		if (pnode_min > gre->pnode)
+			pnode_min = gre->pnode;
+		if (pnode_max < gre->pnode)
+			pnode_max = gre->pnode;
+	}
+	_min_socket = sock_min;
+	_max_socket = sock_max;
+	_min_pnode = pnode_min;
+	_max_pnode = pnode_max;
+	_gr_table_len = index;
+	pr_info(
+	"UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n",
+		index, _min_socket, _max_socket, _min_pnode, _max_pnode);
+}
+
+static void __init decode_uv_systab(void)
+{
+	struct uv_systab *st;
+	int i;
+
+	st = uv_systab;
+	if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub())
+		return;
+	if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) {
+		pr_crit(
+		"UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n",
+			st->revision, UV_SYSTAB_VERSION_UV4_LATEST);
+		BUG();
+	}
+
+	for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+		unsigned long ptr = st->entry[i].offset;
+
+		if (!ptr)
+			continue;
+
+		ptr = ptr + (unsigned long)st;
+
+		switch (st->entry[i].type) {
+		case UV_SYSTAB_TYPE_GAM_PARAMS:
+			decode_gam_params(ptr);
+			break;
+
+		case UV_SYSTAB_TYPE_GAM_RNG_TBL:
+			decode_gam_rng_tbl(ptr);
+			break;
+		}
+	}
+}
+
+/*
+ * Setup physical blade translations from UVH_NODE_PRESENT_TABLE
+ * .. NB: UVH_NODE_PRESENT_TABLE is going away,
+ * .. being replaced by GAM Range Table
+ */
+static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
+{
+	int i, uv_pb = 0;
+
+	pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH);
+	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+		unsigned long np;
+
+		np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+		if (np)
+			pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
+
+		uv_pb += hweight64(np);
+	}
+	if (uv_possible_blades != uv_pb)
+		uv_possible_blades = uv_pb;
+}
+
+static void __init build_socket_tables(void)
+{
+	struct uv_gam_range_entry *gre = uv_gre_table;
+	int num, nump;
+	int cpu, i, lnid;
+	int minsock = _min_socket;
+	int maxsock = _max_socket;
+	int minpnode = _min_pnode;
+	int maxpnode = _max_pnode;
+	size_t bytes;
+
+	if (!gre) {
+		if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) {
+			pr_info("UV: No UVsystab socket table, ignoring\n");
+			return;		/* not required */
+		}
+		pr_crit(
+		"UV: Error: UVsystab address translations not available!\n");
+		BUG();
+	}
+
+	/* build socket id -> node id, pnode */
+	num = maxsock - minsock + 1;
+	bytes = num * sizeof(_socket_to_node[0]);
+	_socket_to_node = kmalloc(bytes, GFP_KERNEL);
+	_socket_to_pnode = kmalloc(bytes, GFP_KERNEL);
+
+	nump = maxpnode - minpnode + 1;
+	bytes = nump * sizeof(_pnode_to_socket[0]);
+	_pnode_to_socket = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket);
+
+	for (i = 0; i < num; i++)
+		_socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY;
+
+	for (i = 0; i < nump; i++)
+		_pnode_to_socket[i] = SOCK_EMPTY;
+
+	/* fill in pnode/node/addr conversion list values */
+	pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+		if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+			continue;
+		i = gre->sockid - minsock;
+		if (_socket_to_pnode[i] != SOCK_EMPTY)
+			continue;	/* duplicate */
+		_socket_to_pnode[i] = gre->pnode;
+		_socket_to_node[i] = gre->pxm;
+
+		i = gre->pnode - minpnode;
+		_pnode_to_socket[i] = gre->sockid;
+
+		pr_info(
+		"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+			gre->sockid, gre->type, gre->nasid,
+			_socket_to_pnode[gre->sockid - minsock],
+			_socket_to_node[gre->sockid - minsock],
+			_pnode_to_socket[gre->pnode - minpnode]);
+	}
+
+	/* check socket -> node values */
+	lnid = -1;
+	for_each_present_cpu(cpu) {
+		int nid = cpu_to_node(cpu);
+		int apicid, sockid;
+
+		if (lnid == nid)
+			continue;
+		lnid = nid;
+		apicid = per_cpu(x86_cpu_to_apicid, cpu);
+		sockid = apicid >> uv_cpuid.socketid_shift;
+		i = sockid - minsock;
+
+		if (nid != _socket_to_node[i]) {
+			pr_warn(
+			"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
+				i, sockid, gre->type, _socket_to_node[i], nid);
+			_socket_to_node[i] = nid;
+		}
+	}
+
+	/* Setup physical blade to pnode translation from GAM Range Table */
+	bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]);
+	_node_to_pnode = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!_node_to_pnode);
+
+	for (lnid = 0; lnid < num_possible_nodes(); lnid++) {
+		unsigned short sockid;
+
+		for (sockid = minsock; sockid <= maxsock; sockid++) {
+			if (lnid == _socket_to_node[sockid - minsock]) {
+				_node_to_pnode[lnid] =
+					_socket_to_pnode[sockid - minsock];
+				break;
+			}
+		}
+		if (sockid > maxsock) {
+			pr_err("UV: socket for node %d not found!\n", lnid);
+			BUG();
+		}
+	}
+
+	/*
+	 * If socket id == pnode or socket id == node for all nodes,
+	 *   system runs faster by removing corresponding conversion table.
+	 */
+	pr_info("UV: Checking socket->node/pnode for identity maps\n");
+	if (minsock == 0) {
+		for (i = 0; i < num; i++)
+			if (_socket_to_node[i] == SOCK_EMPTY ||
+				i != _socket_to_node[i])
+				break;
+		if (i >= num) {
+			kfree(_socket_to_node);
+			_socket_to_node = NULL;
+			pr_info("UV: 1:1 socket_to_node table removed\n");
+		}
+	}
+	if (minsock == minpnode) {
+		for (i = 0; i < num; i++)
+			if (_socket_to_pnode[i] != SOCK_EMPTY &&
+				_socket_to_pnode[i] != i + minpnode)
+				break;
+		if (i >= num) {
+			kfree(_socket_to_pnode);
+			_socket_to_pnode = NULL;
+			pr_info("UV: 1:1 socket_to_pnode table removed\n");
+		}
+	}
+}
+
 void __init uv_system_init(void)
 {
-	union uvh_rh_gam_config_mmr_u  m_n_config;
-	union uvh_node_id_u node_id;
-	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
-	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
-	int gnode_extra, min_pnode = 999999, max_pnode = -1;
-	unsigned long mmr_base, present, paddr;
-	unsigned short pnode_mask;
-	unsigned char n_lshift;
-	char *hub = (is_uv1_hub() ? "UV100/1000" :
-		    (is_uv2_hub() ? "UV2000/3000" :
-		    (is_uv3_hub() ? "UV300" : NULL)));
+	struct uv_hub_info_s hub_info = {0};
+	int bytes, cpu, nodeid;
+	unsigned short min_pnode = 9999, max_pnode = 0;
+	char *hub = is_uv4_hub() ? "UV400" :
+		    is_uv3_hub() ? "UV300" :
+		    is_uv2_hub() ? "UV2000/3000" :
+		    is_uv1_hub() ? "UV100/1000" : NULL;
 
 	if (!hub) {
 		pr_err("UV: Unknown/unsupported UV hub\n");
@@ -893,124 +1397,104 @@
 
 	map_low_mmrs();
 
-	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
-	m_val = m_n_config.s.m_skt;
-	n_val = m_n_config.s.n_skt;
-	pnode_mask = (1 << n_val) - 1;
-	n_lshift = get_n_lshift(m_val);
-	mmr_base =
-	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
-	    ~UV_MMR_ENABLE;
-
-	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
-	gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
-	gnode_upper = ((unsigned long)gnode_extra  << m_val);
-	pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n",
-			n_val, m_val, pnode_mask, gnode_upper, gnode_extra,
-			n_lshift);
-
-	pr_info("UV: global MMR base 0x%lx\n", mmr_base);
-
-	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
-		uv_possible_blades +=
-		  hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
+	uv_bios_init();			/* get uv_systab for decoding */
+	decode_uv_systab();
+	build_socket_tables();
+	build_uv_gr_table();
+	uv_init_hub_info(&hub_info);
+	uv_possible_blades = num_possible_nodes();
+	if (!_node_to_pnode)
+		boot_init_possible_blades(&hub_info);
 
 	/* uv_num_possible_blades() is really the hub count */
-	pr_info("UV: Found %d blades, %d hubs\n",
-			is_uv1_hub() ? uv_num_possible_blades() :
-			(uv_num_possible_blades() + 1) / 2,
-			uv_num_possible_blades());
+	pr_info("UV: Found %d hubs, %d nodes, %d cpus\n",
+			uv_num_possible_blades(),
+			num_possible_nodes(),
+			num_possible_cpus());
 
-	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
-	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
-	BUG_ON(!uv_blade_info);
-
-	for (blade = 0; blade < uv_num_possible_blades(); blade++)
-		uv_blade_info[blade].memory_nid = -1;
-
-	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
-
-	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
-	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
-	BUG_ON(!uv_node_to_blade);
-	memset(uv_node_to_blade, 255, bytes);
-
-	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
-	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
-	BUG_ON(!uv_cpu_to_blade);
-	memset(uv_cpu_to_blade, 255, bytes);
-
-	blade = 0;
-	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
-		present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
-		for (j = 0; j < 64; j++) {
-			if (!test_bit(j, &present))
-				continue;
-			pnode = (i * 64 + j) & pnode_mask;
-			uv_blade_info[blade].pnode = pnode;
-			uv_blade_info[blade].nr_possible_cpus = 0;
-			uv_blade_info[blade].nr_online_cpus = 0;
-			spin_lock_init(&uv_blade_info[blade].nmi_lock);
-			min_pnode = min(pnode, min_pnode);
-			max_pnode = max(pnode, max_pnode);
-			blade++;
-		}
-	}
-
-	uv_bios_init();
 	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
 			    &sn_region_size, &system_serial_number);
+	hub_info.coherency_domain_number = sn_coherency_id;
 	uv_rtc_init();
 
-	for_each_present_cpu(cpu) {
+	bytes = sizeof(void *) * uv_num_possible_blades();
+	__uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
+	BUG_ON(!__uv_hub_info_list);
+
+	bytes = sizeof(struct uv_hub_info_s);
+	for_each_node(nodeid) {
+		struct uv_hub_info_s *new_hub;
+
+		if (__uv_hub_info_list[nodeid]) {
+			pr_err("UV: Node %d UV HUB already initialized!?\n",
+				nodeid);
+			BUG();
+		}
+
+		/* Allocate new per hub info list */
+		new_hub = (nodeid == 0) ?
+			&uv_hub_info_node0 :
+			kzalloc_node(bytes, GFP_KERNEL, nodeid);
+		BUG_ON(!new_hub);
+		__uv_hub_info_list[nodeid] = new_hub;
+		new_hub = uv_hub_info_list(nodeid);
+		BUG_ON(!new_hub);
+		*new_hub = hub_info;
+
+		/* Use information from GAM table if available */
+		if (_node_to_pnode)
+			new_hub->pnode = _node_to_pnode[nodeid];
+		else	/* Fill in during cpu loop */
+			new_hub->pnode = 0xffff;
+		new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
+		new_hub->memory_nid = -1;
+		new_hub->nr_possible_cpus = 0;
+		new_hub->nr_online_cpus = 0;
+	}
+
+	/* Initialize per cpu info */
+	for_each_possible_cpu(cpu) {
 		int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+		int numa_node_id;
+		unsigned short pnode;
 
-		nid = cpu_to_node(cpu);
-		/*
-		 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
-		 */
-		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
-		uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
-		uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
-
-		uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
-		uv_cpu_hub_info(cpu)->n_lshift = n_lshift;
-
+		nodeid = cpu_to_node(cpu);
+		numa_node_id = numa_cpu_node(cpu);
 		pnode = uv_apicid_to_pnode(apicid);
-		blade = boot_pnode_to_blade(pnode);
-		lcpu = uv_blade_info[blade].nr_possible_cpus;
-		uv_blade_info[blade].nr_possible_cpus++;
 
-		/* Any node on the blade, else will contain -1. */
-		uv_blade_info[blade].memory_nid = nid;
-
-		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
-		uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
-		uv_cpu_hub_info(cpu)->m_val = m_val;
-		uv_cpu_hub_info(cpu)->n_val = n_val;
-		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
-		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
-		uv_cpu_hub_info(cpu)->pnode = pnode;
-		uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
-		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
-		uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
-		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
-		uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
-		uv_node_to_blade[nid] = blade;
-		uv_cpu_to_blade[cpu] = blade;
+		uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
+		uv_cpu_info_per(cpu)->blade_cpu_id =
+			uv_cpu_hub_info(cpu)->nr_possible_cpus++;
+		if (uv_cpu_hub_info(cpu)->memory_nid == -1)
+			uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
+		if (nodeid != numa_node_id &&	/* init memoryless node */
+		    uv_hub_info_list(numa_node_id)->pnode == 0xffff)
+			uv_hub_info_list(numa_node_id)->pnode = pnode;
+		else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
+			uv_cpu_hub_info(cpu)->pnode = pnode;
+		uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid);
 	}
 
-	/* Add blade/pnode info for nodes without cpus */
-	for_each_online_node(nid) {
-		if (uv_node_to_blade[nid] >= 0)
-			continue;
-		paddr = node_start_pfn(nid) << PAGE_SHIFT;
-		pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
-		blade = boot_pnode_to_blade(pnode);
-		uv_node_to_blade[nid] = blade;
+	for_each_node(nodeid) {
+		unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
+
+		/* Add pnode info for pre-GAM list nodes without cpus */
+		if (pnode == 0xffff) {
+			unsigned long paddr;
+
+			paddr = node_start_pfn(nodeid) << PAGE_SHIFT;
+			pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
+			uv_hub_info_list(nodeid)->pnode = pnode;
+		}
+		min_pnode = min(pnode, min_pnode);
+		max_pnode = max(pnode, max_pnode);
+		pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n",
+			nodeid,
+			uv_hub_info_list(nodeid)->pnode,
+			uv_hub_info_list(nodeid)->nr_possible_cpus);
 	}
 
+	pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
 	map_gru_high(max_pnode);
 	map_mmr_high(max_pnode);
 	map_mmioh_high(min_pnode, max_pnode);

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9307f18..c7364bd 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c

@@ -2267,7 +2267,7 @@
 
 	dmi_check_system(apm_dmi_table);
 
-	if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
+	if (apm_info.bios.version == 0 || machine_is_olpc()) {
 		printk(KERN_INFO "apm: BIOS not found.\n");
 		return -ENODEV;
 	}

diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 5c04246..674134e 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c

@@ -80,6 +80,7 @@
 	OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
 	OFFSET(BP_version, boot_params, hdr.version);
 	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+	OFFSET(BP_init_size, boot_params, hdr.init_size);
 	OFFSET(BP_pref_address, boot_params, hdr.pref_address);
 	OFFSET(BP_code32_start, boot_params, hdr.code32_start);
 

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7b76eb6..c343a54 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c

@@ -565,14 +565,17 @@
 	 * can safely set X86_FEATURE_EXTD_APICID unconditionally for families
 	 * after 16h.
 	 */
-	if (cpu_has_apic && c->x86 > 0x16) {
-		set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
-	} else if (cpu_has_apic && c->x86 >= 0xf) {
-		/* check CPU config space for extended APIC ID */
-		unsigned int val;
-		val = read_pci_config(0, 24, 0, 0x68);
-		if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+	if (boot_cpu_has(X86_FEATURE_APIC)) {
+		if (c->x86 > 0x16)
 			set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+		else if (c->x86 >= 0xf) {
+			/* check CPU config space for extended APIC ID */
+			unsigned int val;
+
+			val = read_pci_config(0, 24, 0, 0x68);
+			if ((val >> 17 & 0x3) == 0x3)
+				set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+		}
 	}
 #endif
 
@@ -628,6 +631,7 @@
 	 */
 	msr_set_bit(MSR_K7_HWCR, 6);
 #endif
+	set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
 }
 
 static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -746,7 +750,7 @@
 	if (c->x86 >= 0xf)
 		set_cpu_cap(c, X86_FEATURE_K8);
 
-	if (cpu_has_xmm2) {
+	if (cpu_has(c, X86_FEATURE_XMM2)) {
 		/* MFENCE stops RDTSC speculation */
 		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 	}

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8394b3d..6ef6ed9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c

@@ -430,7 +430,7 @@
 #ifdef CONFIG_X86_32
 	loadsegment(fs, __KERNEL_PERCPU);
 #else
-	loadsegment(gs, 0);
+	__loadsegment_simple(gs, 0);
 	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
 #endif
 	load_stack_canary_segment();
@@ -717,6 +717,13 @@
 		}
 	}
 
+	if (c->extended_cpuid_level >= 0x80000007) {
+		cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+		c->x86_capability[CPUID_8000_0007_EBX] = ebx;
+		c->x86_power = edx;
+	}
+
 	if (c->extended_cpuid_level >= 0x80000008) {
 		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
 
@@ -729,9 +736,6 @@
 		c->x86_phys_bits = 36;
 #endif
 
-	if (c->extended_cpuid_level >= 0x80000007)
-		c->x86_power = cpuid_edx(0x80000007);
-
 	if (c->extended_cpuid_level >= 0x8000000a)
 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
 
@@ -862,30 +866,34 @@
 #else
 	set_cpu_cap(c, X86_FEATURE_NOPL);
 #endif
+}
 
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
 	/*
-	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
-	 * systems that run Linux at CPL > 0 may or may not have the
-	 * issue, but, even if they have the issue, there's absolutely
-	 * nothing we can do about it because we can't use the real IRET
-	 * instruction.
+	 * Empirically, writing zero to a segment selector on AMD does
+	 * not clear the base, whereas writing zero to a segment
+	 * selector on Intel does clear the base.  Intel's behavior
+	 * allows slightly faster context switches in the common case
+	 * where GS is unused by the prev and next threads.
 	 *
-	 * NB: For the time being, only 32-bit kernels support
-	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
-	 * whether to apply espfix using paravirt hooks.  If any
-	 * non-paravirt system ever shows up that does *not* have the
-	 * ESPFIX issue, we can change this.
+	 * Since neither vendor documents this anywhere that I can see,
+	 * detect it directly instead of hardcoding the choice by
+	 * vendor.
+	 *
+	 * I've designated AMD's behavior as the "bug" because it's
+	 * counterintuitive and less friendly.
 	 */
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
-	do {
-		extern void native_iret(void);
-		if (pv_cpu_ops.iret == native_iret)
-			set_cpu_bug(c, X86_BUG_ESPFIX);
-	} while (0);
-#else
-	set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+	unsigned long old_base, tmp;
+	rdmsrl(MSR_FS_BASE, old_base);
+	wrmsrl(MSR_FS_BASE, 1);
+	loadsegment(fs, 0);
+	rdmsrl(MSR_FS_BASE, tmp);
+	if (tmp != 0)
+		set_cpu_bug(c, X86_BUG_NULL_SEG);
+	wrmsrl(MSR_FS_BASE, old_base);
 #endif
 }
 
@@ -921,6 +929,33 @@
 	get_model_name(c); /* Default name */
 
 	detect_nopl(c);
+
+	detect_null_seg_behavior(c);
+
+	/*
+	 * ESPFIX is a strange bug.  All real CPUs have it.  Paravirt
+	 * systems that run Linux at CPL > 0 may or may not have the
+	 * issue, but, even if they have the issue, there's absolutely
+	 * nothing we can do about it because we can't use the real IRET
+	 * instruction.
+	 *
+	 * NB: For the time being, only 32-bit kernels support
+	 * X86_BUG_ESPFIX as such.  64-bit kernels directly choose
+	 * whether to apply espfix using paravirt hooks.  If any
+	 * non-paravirt system ever shows up that does *not* have the
+	 * ESPFIX issue, we can change this.
+	 */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+	do {
+		extern void native_iret(void);
+		if (pv_cpu_ops.iret == native_iret)
+			set_cpu_bug(c, X86_BUG_ESPFIX);
+	} while (0);
+# else
+	set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
 }
 
 static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1076,12 +1111,12 @@
 	struct tss_struct *tss;
 	int cpu;
 
+	if (!boot_cpu_has(X86_FEATURE_SEP))
+		return;
+
 	cpu = get_cpu();
 	tss = &per_cpu(cpu_tss, cpu);
 
-	if (!boot_cpu_has(X86_FEATURE_SEP))
-		goto out;
-
 	/*
 	 * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
 	 * see the big comment in struct x86_hw_tss's definition.
@@ -1096,7 +1131,6 @@
 
 	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
-out:
 	put_cpu();
 }
 #endif
@@ -1528,7 +1562,7 @@
 	pr_info("Initializing CPU#%d\n", cpu);
 
 	if (cpu_feature_enabled(X86_FEATURE_VME) ||
-	    cpu_has_tsc ||
+	    boot_cpu_has(X86_FEATURE_TSC) ||
 	    boot_cpu_has(X86_FEATURE_DE))
 		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 

diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6adef9c..bd9dcd6 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c

@@ -333,7 +333,7 @@
 		switch (dir0_lsn) {
 		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
 			dir0_msn = 0;
-			p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+			p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
 			break;
 
 		case 0xe:  /* a 486S A step */

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e4393bf..8dae51f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c

@@ -152,9 +152,9 @@
 	 *  the TLB when any changes are made to any of the page table entries.
 	 *  The operating system must reload CR3 to cause the TLB to be flushed"
 	 *
-	 * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
-	 * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
-	 * to be modified
+	 * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+	 * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+	 * to be modified.
 	 */
 	if (c->x86 == 5 && c->x86_model == 9) {
 		pr_info("Disabling PGE capability bit\n");
@@ -233,7 +233,7 @@
 	 * The Quark is also family 5, but does not have the same bug.
 	 */
 	clear_cpu_bug(c, X86_BUG_F00F);
-	if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
+	if (c->x86 == 5 && c->x86_model < 9) {
 		static int f00f_workaround_enabled;
 
 		set_cpu_bug(c, X86_BUG_F00F);
@@ -281,7 +281,7 @@
 	 * integrated APIC (see 11AP erratum in "Pentium Processor
 	 * Specification Update").
 	 */
-	if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+	if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
 	    (c->x86_mask < 0x6 || c->x86_mask == 0xb))
 		set_cpu_bug(c, X86_BUG_11AP);
 
@@ -456,7 +456,7 @@
 			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
-	if (cpu_has_xmm2)
+	if (cpu_has(c, X86_FEATURE_XMM2))
 		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 
 	if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +468,7 @@
 			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
-	if (c->x86 == 6 && cpu_has_clflush &&
+	if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
 		set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
 

diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 2658e2a..93d824e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c

@@ -26,6 +26,52 @@
 static LLIST_HEAD(mce_event_llist);
 static char gen_pool_buf[MCE_POOLSZ];
 
+/*
+ * Compare the record "t" with each of the records on list "l" to see if
+ * an equivalent one is present in the list.
+ */
+static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
+{
+	struct mce_evt_llist *node;
+	struct mce *m1, *m2;
+
+	m1 = &t->mce;
+
+	llist_for_each_entry(node, &l->llnode, llnode) {
+		m2 = &node->mce;
+
+		if (!mce_cmp(m1, m2))
+			return true;
+	}
+	return false;
+}
+
+/*
+ * The system has panicked - we'd like to peruse the list of MCE records
+ * that have been queued, but not seen by anyone yet.  The list is in
+ * reverse time order, so we need to reverse it. While doing that we can
+ * also drop duplicate records (these were logged because some banks are
+ * shared between cores or by all threads on a socket).
+ */
+struct llist_node *mce_gen_pool_prepare_records(void)
+{
+	struct llist_node *head;
+	LLIST_HEAD(new_head);
+	struct mce_evt_llist *node, *t;
+
+	head = llist_del_all(&mce_event_llist);
+	if (!head)
+		return NULL;
+
+	/* squeeze out duplicates while reversing order */
+	llist_for_each_entry_safe(node, t, head, llnode) {
+		if (!is_duplicate_mce_record(node, t))
+			llist_add(&node->llnode, &new_head);
+	}
+
+	return new_head.first;
+}
+
 void mce_gen_pool_process(void)
 {
 	struct llist_node *head;

diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 547720e..cd74a3f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h

@@ -35,6 +35,7 @@
 bool mce_gen_pool_empty(void);
 int mce_gen_pool_add(struct mce *mce);
 int mce_gen_pool_init(void);
+struct llist_node *mce_gen_pool_prepare_records(void);
 
 extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@
 #endif
 
 void mce_inject_log(struct mce *m);
+
+/*
+ * We consider records to be equivalent if bank+status+addr+misc all match.
+ * This is only used when the system is going down because of a fatal error
+ * to avoid cluttering the console log with essentially repeated information.
+ * In normal processing all errors seen are logged.
+ */
+static inline bool mce_cmp(struct mce *m1, struct mce *m2)
+{
+	return m1->bank != m2->bank ||
+		m1->status != m2->status ||
+		m1->addr != m2->addr ||
+		m1->misc != m2->misc;
+}

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766..631356c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c

@@ -204,6 +204,33 @@
 	return IN_KERNEL;
 }
 
+static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+{
+	u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+	u32 low, high;
+
+	/*
+	 * We need to look at the following bits:
+	 * - "succor" bit (data poisoning support), and
+	 * - TCC bit (Task Context Corrupt)
+	 * in MCi_STATUS to determine error severity.
+	 */
+	if (!mce_flags.succor)
+		return MCE_PANIC_SEVERITY;
+
+	if (rdmsr_safe(addr, &low, &high))
+		return MCE_PANIC_SEVERITY;
+
+	/* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
+	if ((low & MCI_CONFIG_MCAX) &&
+	    (m->status & MCI_STATUS_TCC) &&
+	    (err_ctx == IN_KERNEL))
+		return MCE_PANIC_SEVERITY;
+
+	 /* ...otherwise invoke hwpoison handler. */
+	return MCE_AR_SEVERITY;
+}
+
 /*
  * See AMD Error Scope Hierarchy table in a newer BKDG. For example
  * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@
 		 * to at least kill process to prolong system operation.
 		 */
 		if (mce_flags.overflow_recov) {
+			if (mce_flags.smca)
+				return mce_severity_amd_smca(m, ctx);
+
 			/* software can try to contain */
 			if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
 				return MCE_PANIC_SEVERITY;

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f0c921b..92e5e37 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c

@@ -161,7 +161,6 @@
 	if (!mce_gen_pool_add(mce))
 		irq_work_queue(&mce_irq_work);
 
-	mce->finished = 0;
 	wmb();
 	for (;;) {
 		entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@
 	mcelog.entry[entry].finished = 1;
 	wmb();
 
-	mce->finished = 1;
 	set_bit(0, &mce_need_notify);
 }
 
@@ -224,6 +222,53 @@
 }
 EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
 
+static inline u32 ctl_reg(int bank)
+{
+	return MSR_IA32_MCx_CTL(bank);
+}
+
+static inline u32 status_reg(int bank)
+{
+	return MSR_IA32_MCx_STATUS(bank);
+}
+
+static inline u32 addr_reg(int bank)
+{
+	return MSR_IA32_MCx_ADDR(bank);
+}
+
+static inline u32 misc_reg(int bank)
+{
+	return MSR_IA32_MCx_MISC(bank);
+}
+
+static inline u32 smca_ctl_reg(int bank)
+{
+	return MSR_AMD64_SMCA_MCx_CTL(bank);
+}
+
+static inline u32 smca_status_reg(int bank)
+{
+	return MSR_AMD64_SMCA_MCx_STATUS(bank);
+}
+
+static inline u32 smca_addr_reg(int bank)
+{
+	return MSR_AMD64_SMCA_MCx_ADDR(bank);
+}
+
+static inline u32 smca_misc_reg(int bank)
+{
+	return MSR_AMD64_SMCA_MCx_MISC(bank);
+}
+
+struct mca_msr_regs msr_ops = {
+	.ctl	= ctl_reg,
+	.status	= status_reg,
+	.addr	= addr_reg,
+	.misc	= misc_reg
+};
+
 static void print_mce(struct mce *m)
 {
 	int ret = 0;
@@ -290,7 +335,9 @@
 
 static void mce_panic(const char *msg, struct mce *final, char *exp)
 {
-	int i, apei_err = 0;
+	int apei_err = 0;
+	struct llist_node *pending;
+	struct mce_evt_llist *l;
 
 	if (!fake_panic) {
 		/*
@@ -307,11 +354,10 @@
 		if (atomic_inc_return(&mce_fake_panicked) > 1)
 			return;
 	}
+	pending = mce_gen_pool_prepare_records();
 	/* First print corrected ones that are still unlogged */
-	for (i = 0; i < MCE_LOG_LEN; i++) {
-		struct mce *m = &mcelog.entry[i];
-		if (!(m->status & MCI_STATUS_VAL))
-			continue;
+	llist_for_each_entry(l, pending, llnode) {
+		struct mce *m = &l->mce;
 		if (!(m->status & MCI_STATUS_UC)) {
 			print_mce(m);
 			if (!apei_err)
@@ -319,13 +365,11 @@
 		}
 	}
 	/* Now print uncorrected but with the final one last */
-	for (i = 0; i < MCE_LOG_LEN; i++) {
-		struct mce *m = &mcelog.entry[i];
-		if (!(m->status & MCI_STATUS_VAL))
-			continue;
+	llist_for_each_entry(l, pending, llnode) {
+		struct mce *m = &l->mce;
 		if (!(m->status & MCI_STATUS_UC))
 			continue;
-		if (!final || memcmp(m, final, sizeof(struct mce))) {
+		if (!final || mce_cmp(m, final)) {
 			print_mce(m);
 			if (!apei_err)
 				apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@
 
 	if (msr == mca_cfg.rip_msr)
 		return offsetof(struct mce, ip);
-	if (msr == MSR_IA32_MCx_STATUS(bank))
+	if (msr == msr_ops.status(bank))
 		return offsetof(struct mce, status);
-	if (msr == MSR_IA32_MCx_ADDR(bank))
+	if (msr == msr_ops.addr(bank))
 		return offsetof(struct mce, addr);
-	if (msr == MSR_IA32_MCx_MISC(bank))
+	if (msr == msr_ops.misc(bank))
 		return offsetof(struct mce, misc);
 	if (msr == MSR_IA32_MCG_STATUS)
 		return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@
 static void mce_read_aux(struct mce *m, int i)
 {
 	if (m->status & MCI_STATUS_MISCV)
-		m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+		m->misc = mce_rdmsrl(msr_ops.misc(i));
 	if (m->status & MCI_STATUS_ADDRV) {
-		m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+		m->addr = mce_rdmsrl(msr_ops.addr(i));
 
 		/*
 		 * Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@
 		m.tsc = 0;
 
 		barrier();
-		m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+		m.status = mce_rdmsrl(msr_ops.status(i));
 		if (!(m.status & MCI_STATUS_VAL))
 			continue;
 
@@ -654,7 +698,7 @@
 		/*
 		 * Clear state for this bank.
 		 */
-		mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+		mce_wrmsrl(msr_ops.status(i), 0);
 	}
 
 	/*
@@ -679,7 +723,7 @@
 	char *tmp;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
-		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+		m->status = mce_rdmsrl(msr_ops.status(i));
 		if (m->status & MCI_STATUS_VAL) {
 			__set_bit(i, validp);
 			if (quirk_no_way_out)
@@ -830,9 +874,9 @@
 
 	atomic_add(*no_way_out, &global_nwo);
 	/*
-	 * global_nwo should be updated before mce_callin
+	 * Rely on the implied barrier below, such that global_nwo
+	 * is updated before mce_callin.
 	 */
-	smp_wmb();
 	order = atomic_inc_return(&mce_callin);
 
 	/*
@@ -957,7 +1001,7 @@
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		if (test_bit(i, toclear))
-			mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+			mce_wrmsrl(msr_ops.status(i), 0);
 	}
 }
 
@@ -994,11 +1038,12 @@
 	int i;
 	int worst = 0;
 	int severity;
+
 	/*
 	 * Establish sequential order between the CPUs entering the machine
 	 * check handler.
 	 */
-	int order;
+	int order = -1;
 	/*
 	 * If no_way_out gets set, there is no safe way to recover from this
 	 * MCE.  If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@
 	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
 	DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
 	char *msg = "Unknown";
-	int lmce = 0;
+
+	/*
+	 * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
+	 * on Intel.
+	 */
+	int lmce = 1;
 
 	/* If this CPU is offline, just bail out. */
 	if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@
 		kill_it = 1;
 
 	/*
-	 * Check if this MCE is signaled to only this logical processor
+	 * Check if this MCE is signaled to only this logical processor,
+	 * on Intel only.
 	 */
-	if (m.mcgstatus & MCG_STATUS_LMCES)
-		lmce = 1;
-	else {
-		/*
-		 * Go through all the banks in exclusion of the other CPUs.
-		 * This way we don't report duplicated events on shared banks
-		 * because the first one to see it will clear it.
-		 * If this is a Local MCE, then no need to perform rendezvous.
-		 */
+	if (m.cpuvendor == X86_VENDOR_INTEL)
+		lmce = m.mcgstatus & MCG_STATUS_LMCES;
+
+	/*
+	 * Go through all banks in exclusion of the other CPUs. This way we
+	 * don't report duplicated events on shared banks because the first one
+	 * to see it will clear it. If this is a Local MCE, then no need to
+	 * perform rendezvous.
+	 */
+	if (!lmce)
 		order = mce_start(&no_way_out);
-	}
 
 	for (i = 0; i < cfg->banks; i++) {
 		__clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@
 		m.addr = 0;
 		m.bank = i;
 
-		m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+		m.status = mce_rdmsrl(msr_ops.status(i));
 		if ((m.status & MCI_STATUS_VAL) == 0)
 			continue;
 
@@ -1420,7 +1471,6 @@
 	enum mcp_flags m_fl = 0;
 	mce_banks_t all_banks;
 	u64 cap;
-	int i;
 
 	if (!mca_cfg.bootlog)
 		m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	if (cap & MCG_CTL_P)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+}
+
+static void __mcheck_cpu_init_clear_banks(void)
+{
+	int i;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		struct mce_bank *b = &mce_banks[i];
 
 		if (!b->init)
 			continue;
-		wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
-		wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+		wrmsrl(msr_ops.ctl(i), b->ctl);
+		wrmsrl(msr_ops.status(i), 0);
 	}
 }
 
@@ -1495,7 +1550,7 @@
 			 */
 			clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
 		}
-		if (c->x86 <= 17 && cfg->bootlog < 0) {
+		if (c->x86 < 17 && cfg->bootlog < 0) {
 			/*
 			 * Lots of broken BIOS around that don't clear them
 			 * by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@
 		break;
 
 	case X86_VENDOR_AMD: {
-		u32 ebx = cpuid_ebx(0x80000007);
+		mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
+		mce_flags.succor	 = !!cpu_has(c, X86_FEATURE_SUCCOR);
+		mce_flags.smca		 = !!cpu_has(c, X86_FEATURE_SMCA);
 
-		mce_flags.overflow_recov = !!(ebx & BIT(0));
-		mce_flags.succor	 = !!(ebx & BIT(1));
-		mce_flags.smca		 = !!(ebx & BIT(3));
+		/*
+		 * Install proper ops for Scalable MCA enabled processors
+		 */
+		if (mce_flags.smca) {
+			msr_ops.ctl	= smca_ctl_reg;
+			msr_ops.status	= smca_status_reg;
+			msr_ops.addr	= smca_addr_reg;
+			msr_ops.misc	= smca_misc_reg;
+		}
 		mce_amd_feature_init(c);
 
 		break;
@@ -1717,6 +1780,7 @@
 
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_vendor(c);
+	__mcheck_cpu_init_clear_banks();
 	__mcheck_cpu_init_timer();
 }
 
@@ -2082,7 +2146,7 @@
 		struct mce_bank *b = &mce_banks[i];
 
 		if (b->init)
-			wrmsrl(MSR_IA32_MCx_CTL(i), 0);
+			wrmsrl(msr_ops.ctl(i), 0);
 	}
 	return;
 }
@@ -2121,6 +2185,7 @@
 {
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
+	__mcheck_cpu_init_clear_banks();
 }
 
 static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@
 	if (!mce_available(raw_cpu_ptr(&cpu_info)))
 		return;
 	__mcheck_cpu_init_generic();
+	__mcheck_cpu_init_clear_banks();
 	__mcheck_cpu_init_timer();
 }
 
@@ -2413,7 +2479,7 @@
 		struct mce_bank *b = &mce_banks[i];
 
 		if (b->init)
-			wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+			wrmsrl(msr_ops.ctl(i), b->ctl);
 	}
 }
 

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9d656fd..10b0661 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c

@@ -54,14 +54,6 @@
 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
 #define SMCA_THR_LVT_OFF	0xF000
 
-/*
- * OS is required to set the MCAX bit to acknowledge that it is now using the
- * new MSR ranges and new registers under each bank. It also means that the OS
- * will configure deferred errors in the new MCx_CONFIG register. If the bit is
- * not set, uncorrectable errors will cause a system panic.
- */
-#define SMCA_MCAX_EN_OFF	0x1
-
 static const char * const th_names[] = {
 	"load_store",
 	"insn_fetch",
@@ -333,7 +325,7 @@
 	/* Fall back to method we used for older processors: */
 	switch (block) {
 	case 0:
-		addr = MSR_IA32_MCx_MISC(bank);
+		addr = msr_ops.misc(bank);
 		break;
 	case 1:
 		offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@
 			int offset, u32 misc_high)
 {
 	unsigned int cpu = smp_processor_id();
+	u32 smca_low, smca_high, smca_addr;
 	struct threshold_block b;
 	int new;
 
@@ -369,24 +362,49 @@
 
 	b.interrupt_enable = 1;
 
-	if (mce_flags.smca) {
-		u32 smca_low, smca_high;
-		u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
-
-		if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
-			smca_high |= SMCA_MCAX_EN_OFF;
-			wrmsr(smca_addr, smca_low, smca_high);
-		}
-
-		/* Gather LVT offset for thresholding: */
-		if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
-			goto out;
-
-		new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
-	} else {
+	if (!mce_flags.smca) {
 		new = (misc_high & MASK_LVTOFF_HI) >> 20;
+		goto set_offset;
 	}
 
+	smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+	if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+		/*
+		 * OS is required to set the MCAX bit to acknowledge that it is
+		 * now using the new MSR ranges and new registers under each
+		 * bank. It also means that the OS will configure deferred
+		 * errors in the new MCx_CONFIG register. If the bit is not set,
+		 * uncorrectable errors will cause a system panic.
+		 *
+		 * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
+		 */
+		smca_high |= BIT(0);
+
+		/*
+		 * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
+		 * registers with the option of additionally logging to
+		 * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
+		 *
+		 * This bit is usually set by BIOS to retain the old behavior
+		 * for OSes that don't use the new registers. Linux supports the
+		 * new registers so let's disable that additional logging here.
+		 *
+		 * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
+		 * portion of the MSR).
+		 */
+		smca_high &= ~BIT(2);
+
+		wrmsr(smca_addr, smca_low, smca_high);
+	}
+
+	/* Gather LVT offset for thresholding: */
+	if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+		goto out;
+
+	new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+
+set_offset:
 	offset = setup_APIC_mce_threshold(offset, new);
 
 	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@@ -430,12 +448,23 @@
 		deferred_error_interrupt_enable(c);
 }
 
-static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+static void
+__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
 {
+	u32 msr_status = msr_ops.status(bank);
+	u32 msr_addr = msr_ops.addr(bank);
 	struct mce m;
 	u64 status;
 
-	rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+	WARN_ON_ONCE(deferred_err && threshold_err);
+
+	if (deferred_err && mce_flags.smca) {
+		msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
+		msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
+	}
+
+	rdmsrl(msr_status, status);
+
 	if (!(status & MCI_STATUS_VAL))
 		return;
 
@@ -448,10 +477,11 @@
 		m.misc = misc;
 
 	if (m.status & MCI_STATUS_ADDRV)
-		rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+		rdmsrl(msr_addr, m.addr);
 
 	mce_log(&m);
-	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+
+	wrmsrl(msr_status, 0);
 }
 
 static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@
 /* APIC interrupt handler for deferred errors */
 static void amd_deferred_error_interrupt(void)
 {
-	u64 status;
 	unsigned int bank;
+	u32 msr_status;
+	u64 status;
 
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
-		rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+		msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
+					      : msr_ops.status(bank);
+
+		rdmsrl(msr_status, status);
 
 		if (!(status & MCI_STATUS_VAL) ||
 		    !(status & MCI_STATUS_DEFERRED))
 			continue;
 
-		__log_error(bank, false, 0);
+		__log_error(bank, true, false, 0);
 		break;
 	}
 }
@@ -544,7 +578,7 @@
 	return;
 
 log:
-	__log_error(bank, true, ((u64)high << 32) | low);
+	__log_error(bank, false, true, ((u64)high << 32) | low);
 }
 
 /*

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1e8bb6c..1defb8e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c

@@ -84,7 +84,7 @@
 	 */
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 		return 0;
-	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+	if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
 		return 0;
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
 	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ac780ca..6b9dc4d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -450,7 +450,7 @@
 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
 static int intel_thermal_supported(struct cpuinfo_x86 *c)
 {
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return 0;
 	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
 		return 0;

diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index f8c81ba..b1086f7 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c

@@ -137,7 +137,7 @@
 	u32 cr0;
 
 	/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-	if (cpu_has_pge) {
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4 = __read_cr4();
 		__write_cr4(cr4 & ~X86_CR4_PGE);
 	}
@@ -170,7 +170,7 @@
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/* Restore value of CR4 */
-	if (cpu_has_pge)
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__write_cr4(cr4);
 }
 

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 19f5736..16e37a25 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c

@@ -444,11 +444,24 @@
 		pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
 }
 
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	prepare_set();
+
+	pat_init();
+
+	post_set();
+	local_irq_restore(flags);
+}
+
 /* Grab all of the MTRR state for this CPU into *state */
 bool __init get_mtrr_state(void)
 {
 	struct mtrr_var_range *vrs;
-	unsigned long flags;
 	unsigned lo, dummy;
 	unsigned int i;
 
@@ -481,15 +494,6 @@
 
 	mtrr_state_set = 1;
 
-	/* PAT setup for BP. We need to go through sync steps here */
-	local_irq_save(flags);
-	prepare_set();
-
-	pat_init();
-
-	post_set();
-	local_irq_restore(flags);
-
 	return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
 }
 
@@ -741,7 +745,7 @@
 	wbinvd();
 
 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
-	if (cpu_has_pge) {
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4 = __read_cr4();
 		__write_cr4(cr4 & ~X86_CR4_PGE);
 	}
@@ -771,7 +775,7 @@
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/* Restore value of CR4 */
-	if (cpu_has_pge)
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		__write_cr4(cr4);
 	raw_spin_unlock(&set_atomicity_lock);
 }

diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d47..7d393ec 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c

@@ -752,6 +752,9 @@
 			/* BIOS may override */
 			__mtrr_enabled = get_mtrr_state();
 
+			if (mtrr_enabled())
+				mtrr_bp_pat_init();
+
 			if (mtrr_cleanup(phys_addr)) {
 				changed_by_mtrr_cleanup = 1;
 				mtrr_if->set_all();
@@ -759,8 +762,16 @@
 		}
 	}
 
-	if (!mtrr_enabled())
+	if (!mtrr_enabled()) {
 		pr_info("MTRR: Disabled\n");
+
+		/*
+		 * PAT initialization relies on MTRR's rendezvous handler.
+		 * Skip PAT init until the handler can initialize both
+		 * features independently.
+		 */
+		pat_disable("MTRRs disabled, skipping PAT initialization too.");
+	}
 }
 
 void mtrr_ap_init(void)

diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884d..6c7ced0 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h

@@ -52,6 +52,7 @@
 void fill_mtrr_var_range(unsigned int index,
 		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
 
 extern void set_mtrr_ops(const struct mtrr_ops *ops);
 

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 364e583..8cac429 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c

@@ -94,7 +94,7 @@
  */
 static uint32_t __init vmware_platform(void)
 {
-	if (cpu_has_hypervisor) {
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
 		unsigned int eax;
 		unsigned int hyper_vendor_id[3];
 

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 1f4acd6..3fe45f8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c

@@ -151,7 +151,7 @@
 		return;
 
 	/* Did the boot loader setup the local APIC ? */
-	if (!cpu_has_apic) {
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
 		if (apic_force_enable(r.start))
 			return;
 	}

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8efa57a..2bb25c3f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c

@@ -260,19 +260,12 @@
 	unsigned long sp;
 #endif
 	printk(KERN_DEFAULT
-	       "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
-	printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
-	printk("SMP ");
-#endif
-	if (debug_pagealloc_enabled())
-		printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_KASAN
-	printk("KASAN");
-#endif
-	printk("\n");
+	       "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
+	       IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
+	       debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
+	       IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
+
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
 		return 1;

diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/ebda.c
similarity index 97%
rename from arch/x86/kernel/head.c
rename to arch/x86/kernel/ebda.c
index 992f442..afe65df 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/ebda.c

@@ -38,7 +38,7 @@
 	 * that the paravirt case can handle memory setup
 	 * correctly, without our help.
 	 */
-	if (paravirt_enabled())
+	if (!x86_platform.legacy.ebda_search)
 		return;
 
 	/* end of low (conventional) memory */

diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index dd9ca9b6..aad34aa 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c

@@ -21,11 +21,15 @@
  * We should really only care about bugs here
  * anyway. Not features.
  */
-static void __init check_fpu(void)
+void __init fpu__init_check_bugs(void)
 {
 	u32 cr0_saved;
 	s32 fdiv_bug;
 
+	/* kernel_fpu_begin/end() relies on patched alternative instructions. */
+	if (!boot_cpu_has(X86_FEATURE_FPU))
+		return;
+
 	/* We might have CR0::TS set already, clear it: */
 	cr0_saved = read_cr0();
 	write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@
 		pr_warn("Hmm, FPU with FDIV bug\n");
 	}
 }
-
-void __init fpu__init_check_bugs(void)
-{
-	/*
-	 * kernel_fpu_begin/end() in check_fpu() relies on the patched
-	 * alternative instructions.
-	 */
-	if (cpu_has_fpu)
-		check_fpu();
-}

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8e37cc8..9702754 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c

@@ -217,14 +217,14 @@
 
 void fpstate_init(union fpregs_state *state)
 {
-	if (!cpu_has_fpu) {
+	if (!static_cpu_has(X86_FEATURE_FPU)) {
 		fpstate_init_soft(&state->soft);
 		return;
 	}
 
 	memset(state, 0, xstate_size);
 
-	if (cpu_has_fxsr)
+	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
 	else
 		fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@
 	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 
-	if (!src_fpu->fpstate_active || !cpu_has_fpu)
+	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
 
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -506,33 +506,6 @@
  * x87 math exception handling:
  */
 
-static inline unsigned short get_fpu_cwd(struct fpu *fpu)
-{
-	if (cpu_has_fxsr) {
-		return fpu->state.fxsave.cwd;
-	} else {
-		return (unsigned short)fpu->state.fsave.cwd;
-	}
-}
-
-static inline unsigned short get_fpu_swd(struct fpu *fpu)
-{
-	if (cpu_has_fxsr) {
-		return fpu->state.fxsave.swd;
-	} else {
-		return (unsigned short)fpu->state.fsave.swd;
-	}
-}
-
-static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
-{
-	if (cpu_has_xmm) {
-		return fpu->state.fxsave.mxcsr;
-	} else {
-		return MXCSR_DEFAULT;
-	}
-}
-
 int fpu__exception_code(struct fpu *fpu, int trap_nr)
 {
 	int err;
@@ -547,10 +520,15 @@
 		 * so if this combination doesn't produce any single exception,
 		 * then we have a bad program that isn't synchronizing its FPU usage
 		 * and it will suffer the consequences since we won't be able to
-		 * fully reproduce the context of the exception
+		 * fully reproduce the context of the exception.
 		 */
-		cwd = get_fpu_cwd(fpu);
-		swd = get_fpu_swd(fpu);
+		if (boot_cpu_has(X86_FEATURE_FXSR)) {
+			cwd = fpu->state.fxsave.cwd;
+			swd = fpu->state.fxsave.swd;
+		} else {
+			cwd = (unsigned short)fpu->state.fsave.cwd;
+			swd = (unsigned short)fpu->state.fsave.swd;
+		}
 
 		err = swd & ~cwd;
 	} else {
@@ -560,7 +538,11 @@
 		 * unmasked exception was caught we must mask the exception mask bits
 		 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
 		 */
-		unsigned short mxcsr = get_fpu_mxcsr(fpu);
+		unsigned short mxcsr = MXCSR_DEFAULT;
+
+		if (boot_cpu_has(X86_FEATURE_XMM))
+			mxcsr = fpu->state.fxsave.mxcsr;
+
 		err = ~(mxcsr >> 7) & mxcsr;
 	}
 

diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 54c86ff..aacfd7a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c

@@ -29,22 +29,22 @@
 	unsigned long cr0;
 	unsigned long cr4_mask = 0;
 
-	if (cpu_has_fxsr)
+	if (boot_cpu_has(X86_FEATURE_FXSR))
 		cr4_mask |= X86_CR4_OSFXSR;
-	if (cpu_has_xmm)
+	if (boot_cpu_has(X86_FEATURE_XMM))
 		cr4_mask |= X86_CR4_OSXMMEXCPT;
 	if (cr4_mask)
 		cr4_set_bits(cr4_mask);
 
 	cr0 = read_cr0();
 	cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
-	if (!cpu_has_fpu)
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		cr0 |= X86_CR0_EM;
 	write_cr0(cr0);
 
 	/* Flush out any pending x87 state: */
 #ifdef CONFIG_MATH_EMULATION
-	if (!cpu_has_fpu)
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		fpstate_init_soft(&current->thread.fpu.state.soft);
 	else
 #endif
@@ -89,7 +89,7 @@
 	}
 
 #ifndef CONFIG_MATH_EMULATION
-	if (!cpu_has_fpu) {
+	if (!boot_cpu_has(X86_FEATURE_FPU)) {
 		pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
 		for (;;)
 			asm volatile("hlt");
@@ -106,7 +106,7 @@
 {
 	unsigned int mask = 0;
 
-	if (cpu_has_fxsr) {
+	if (boot_cpu_has(X86_FEATURE_FXSR)) {
 		/* Static because GCC does not get 16-byte stack alignment right: */
 		static struct fxregs_state fxregs __initdata;
 
@@ -212,7 +212,7 @@
 	 * fpu__init_system_xstate().
 	 */
 
-	if (!cpu_has_fpu) {
+	if (!boot_cpu_has(X86_FEATURE_FPU)) {
 		/*
 		 * Disable xsave as we do not support it if i387
 		 * emulation is enabled.
@@ -221,7 +221,7 @@
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
 		xstate_size = sizeof(struct swregs_state);
 	} else {
-		if (cpu_has_fxsr)
+		if (boot_cpu_has(X86_FEATURE_FXSR))
 			xstate_size = sizeof(struct fxregs_state);
 		else
 			xstate_size = sizeof(struct fregs_state);

diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 8bd1c00..81422df 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c

@@ -21,7 +21,10 @@
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+		return regset->n;
+	else
+		return 0;
 }
 
 int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@
 {
 	struct fpu *fpu = &target->thread.fpu;
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
 	fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@
 	struct fpu *fpu = &target->thread.fpu;
 	int ret;
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
 	fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@
 	 * update the header bits in the xsave header, indicating the
 	 * presence of FP and SSE state.
 	 */
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
 
 	return ret;
@@ -79,7 +82,7 @@
 	struct xregs_state *xsave;
 	int ret;
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
 	fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@
 	struct xregs_state *xsave;
 	int ret;
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
 	fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@
 
 	fpu__activate_fpstate_read(fpu);
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 					   &fpu->state.fsave, 0,
 					   -1);
@@ -306,10 +309,10 @@
 	fpu__activate_fpstate_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
+	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
 
-	if (!cpu_has_fxsr)
+	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &fpu->state.fsave, 0,
 					  -1);
@@ -325,7 +328,7 @@
 	 * update the header bit in the xsave header, indicating the
 	 * presence of FP.
 	 */
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
 	return ret;
 }

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b48ef35..4ea2a59 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c

@@ -190,7 +190,7 @@
  */
 void fpu__init_cpu_xstate(void)
 {
-	if (!cpu_has_xsave || !xfeatures_mask)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
 		return;
 
 	cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@
 	xstate_comp_offsets[0] = 0;
 	xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
 
-	if (!cpu_has_xsaves) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
 		for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 			if (xfeature_enabled(i)) {
 				xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return;
 
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (cpu_has_xsaves) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
 		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
 	}
@@ -417,7 +417,7 @@
  */
 static int using_compacted_format(void)
 {
-	return cpu_has_xsaves;
+	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
 static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int calculated_xstate_size;
 
-	if (!cpu_has_xsaves) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
 		/*
 		 * - CPUID function 0DH, sub-function 0:
 		 *    EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 
-	if (!cpu_has_xsave) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
 		pr_info("x86/fpu: Legacy x87 FPU detected.\n");
 		return;
 	}
@@ -667,7 +667,7 @@
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
 		xstate_size,
-		cpu_has_xsaves ? "compacted" : "standard");
+		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 }
 
 /*
@@ -678,7 +678,7 @@
 	/*
 	 * Restore XCR0 on xsave capable CPUs:
 	 */
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
 }
 

diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2911ef3..d784bb5 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c

@@ -34,6 +34,8 @@
 	cr4_init_shadow();
 	sanitize_boot_params(&boot_params);
 
+	x86_early_init_platform_quirks();
+
 	/* Call the subarch specific early setup function */
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_INTEL_MID:

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1f4422d..b72fb0b 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c

@@ -182,6 +182,7 @@
 	if (!boot_params.hdr.version)
 		copy_bootdata(__va(real_mode_data));
 
+	x86_early_init_platform_quirks();
 	reserve_ebda_region();
 
 	switch (boot_params.hdr.hardware_subarch) {

diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index af11129..6f8902b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S

@@ -555,62 +555,53 @@
 	 */
 	cld
 
-	cmpl $2,(%esp)		# X86_TRAP_NMI
-	je .Lis_nmi		# Ignore NMI
-
-	cmpl $2,%ss:early_recursion_flag
-	je hlt_loop
 	incl %ss:early_recursion_flag
 
-	push %eax		# 16(%esp)
-	push %ecx		# 12(%esp)
-	push %edx		#  8(%esp)
-	push %ds		#  4(%esp)
-	push %es		#  0(%esp)
-	movl $(__KERNEL_DS),%eax
-	movl %eax,%ds
-	movl %eax,%es
+	/* The vector number is in pt_regs->gs */
 
-	cmpl $(__KERNEL_CS),32(%esp)
-	jne 10f
+	cld
+	pushl	%fs		/* pt_regs->fs */
+	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%es		/* pt_regs->es */
+	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%ds		/* pt_regs->ds */
+	movw	$0, 2(%esp)	/* clear high bits (some CPUs leave garbage) */
+	pushl	%eax		/* pt_regs->ax */
+	pushl	%ebp		/* pt_regs->bp */
+	pushl	%edi		/* pt_regs->di */
+	pushl	%esi		/* pt_regs->si */
+	pushl	%edx		/* pt_regs->dx */
+	pushl	%ecx		/* pt_regs->cx */
+	pushl	%ebx		/* pt_regs->bx */
 
-	leal 28(%esp),%eax	# Pointer to %eip
-	call early_fixup_exception
-	andl %eax,%eax
-	jnz ex_entry		/* found an exception entry */
+	/* Fix up DS and ES */
+	movl	$(__KERNEL_DS), %ecx
+	movl	%ecx, %ds
+	movl	%ecx, %es
 
-10:
-#ifdef CONFIG_PRINTK
-	xorl %eax,%eax
-	movw %ax,2(%esp)	/* clean up the segment values on some cpus */
-	movw %ax,6(%esp)
-	movw %ax,34(%esp)
-	leal  40(%esp),%eax
-	pushl %eax		/* %esp before the exception */
-	pushl %ebx
-	pushl %ebp
-	pushl %esi
-	pushl %edi
-	movl %cr2,%eax
-	pushl %eax
-	pushl (20+6*4)(%esp)	/* trapno */
-	pushl $fault_msg
-	call printk
-#endif
-	call dump_stack
-hlt_loop:
-	hlt
-	jmp hlt_loop
+	/* Load the vector number into EDX */
+	movl	PT_GS(%esp), %edx
 
-ex_entry:
-	pop %es
-	pop %ds
-	pop %edx
-	pop %ecx
-	pop %eax
-	decl %ss:early_recursion_flag
-.Lis_nmi:
-	addl $8,%esp		/* drop vector number and error code */
+	/* Load GS into pt_regs->gs and clear high bits */
+	movw	%gs, PT_GS(%esp)
+	movw	$0, PT_GS+2(%esp)
+
+	movl	%esp, %eax	/* args are pt_regs (EAX), trapnr (EDX) */
+	call	early_fixup_exception
+
+	popl	%ebx		/* pt_regs->bx */
+	popl	%ecx		/* pt_regs->cx */
+	popl	%edx		/* pt_regs->dx */
+	popl	%esi		/* pt_regs->si */
+	popl	%edi		/* pt_regs->di */
+	popl	%ebp		/* pt_regs->bp */
+	popl	%eax		/* pt_regs->ax */
+	popl	%ds		/* pt_regs->ds */
+	popl	%es		/* pt_regs->es */
+	popl	%fs		/* pt_regs->fs */
+	popl	%gs		/* pt_regs->gs */
+	decl	%ss:early_recursion_flag
+	addl	$4, %esp	/* pop pt_regs->orig_ax */
 	iret
 ENDPROC(early_idt_handler_common)
 
@@ -647,10 +638,14 @@
 	popl %eax
 #endif
 	iret
+
+hlt_loop:
+	hlt
+	jmp hlt_loop
 ENDPROC(ignore_int)
 __INITDATA
 	.align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
 	.long 0
 
 __REFDATA
@@ -715,19 +710,6 @@
 int_msg:
 	.asciz "Unknown interrupt or fault at: %p %p %p\n"
 
-fault_msg:
-/* fault info: */
-	.ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
-	.ascii "     EDI %p  ESI %p  EBP %p  EBX %p\n"
-	.ascii "     ESP %p   ES %p   DS %p\n"
-	.ascii "     EDX %p  ECX %p  EAX %p\n"
-/* fault frame: */
-	.ascii "     vec %p  err %p  EIP %p   CS %p  flg %p\n"
-	.ascii "Stack: %p %p %p %p %p %p %p %p\n"
-	.ascii "       %p %p %p %p %p %p %p %p\n"
-	.asciz "       %p %p %p %p %p %p %p %p\n"
-
 #include "../../x86/xen/xen-head.S"
 
 /*

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22fbf9d..5df831e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S

@@ -20,6 +20,7 @@
 #include <asm/processor-flags.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include "../entry/calling.h"
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@
 	 * tables and then reload them.
 	 */
 
+	/*
+	 * Setup stack for verify_cpu(). "-8" because stack_start is defined
+	 * this way, see below. Our best guess is a NULL ptr for stack
+	 * termination heuristics and we don't want to break anything which
+	 * might depend on it (kgdb, ...).
+	 */
+	leaq	(__end_init_task - 8)(%rip), %rsp
+
 	/* Sanitize CPU configuration */
 	call verify_cpu
 
@@ -350,90 +359,48 @@
 	 */
 	cld
 
-	cmpl $2,(%rsp)		# X86_TRAP_NMI
-	je .Lis_nmi		# Ignore NMI
-
-	cmpl $2,early_recursion_flag(%rip)
-	jz  1f
 	incl early_recursion_flag(%rip)
 
-	pushq %rax		# 64(%rsp)
-	pushq %rcx		# 56(%rsp)
-	pushq %rdx		# 48(%rsp)
-	pushq %rsi		# 40(%rsp)
-	pushq %rdi		# 32(%rsp)
-	pushq %r8		# 24(%rsp)
-	pushq %r9		# 16(%rsp)
-	pushq %r10		#  8(%rsp)
-	pushq %r11		#  0(%rsp)
+	/* The vector number is currently in the pt_regs->di slot. */
+	pushq %rsi				/* pt_regs->si */
+	movq 8(%rsp), %rsi			/* RSI = vector number */
+	movq %rdi, 8(%rsp)			/* pt_regs->di = RDI */
+	pushq %rdx				/* pt_regs->dx */
+	pushq %rcx				/* pt_regs->cx */
+	pushq %rax				/* pt_regs->ax */
+	pushq %r8				/* pt_regs->r8 */
+	pushq %r9				/* pt_regs->r9 */
+	pushq %r10				/* pt_regs->r10 */
+	pushq %r11				/* pt_regs->r11 */
+	pushq %rbx				/* pt_regs->bx */
+	pushq %rbp				/* pt_regs->bp */
+	pushq %r12				/* pt_regs->r12 */
+	pushq %r13				/* pt_regs->r13 */
+	pushq %r14				/* pt_regs->r14 */
+	pushq %r15				/* pt_regs->r15 */
 
-	cmpl $__KERNEL_CS,96(%rsp)
-	jne 11f
-
-	cmpl $14,72(%rsp)	# Page fault?
+	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
-	GET_CR2_INTO(%rdi)	# can clobber any volatile register if pv
+	GET_CR2_INTO(%rdi)	/* Can clobber any volatile register if pv */
 	call early_make_pgtable
 	andl %eax,%eax
-	jz 20f			# All good
+	jz 20f			/* All good */
 
 10:
-	leaq 88(%rsp),%rdi	# Pointer to %rip
+	movq %rsp,%rdi		/* RDI = pt_regs; RSI is already trapnr */
 	call early_fixup_exception
-	andl %eax,%eax
-	jnz 20f			# Found an exception entry
 
-11:
-#ifdef CONFIG_EARLY_PRINTK
-	GET_CR2_INTO(%r9)	# can clobber any volatile register if pv
-	movl 80(%rsp),%r8d	# error code
-	movl 72(%rsp),%esi	# vector number
-	movl 96(%rsp),%edx	# %cs
-	movq 88(%rsp),%rcx	# %rip
-	xorl %eax,%eax
-	leaq early_idt_msg(%rip),%rdi
-	call early_printk
-	cmpl $2,early_recursion_flag(%rip)
-	jz  1f
-	call dump_stack
-#ifdef CONFIG_KALLSYMS	
-	leaq early_idt_ripmsg(%rip),%rdi
-	movq 40(%rsp),%rsi	# %rip again
-	call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1:	hlt
-	jmp 1b
-
-20:	# Exception table entry found or page table generated
-	popq %r11
-	popq %r10
-	popq %r9
-	popq %r8
-	popq %rdi
-	popq %rsi
-	popq %rdx
-	popq %rcx
-	popq %rax
+20:
 	decl early_recursion_flag(%rip)
-.Lis_nmi:
-	addq $16,%rsp		# drop vector number and error code
-	INTERRUPT_RETURN
+	jmp restore_regs_and_iret
 ENDPROC(early_idt_handler_common)
 
 	__INITDATA
 
 	.balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
 	.long 0
 
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
-	.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
-	.asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
 #define NEXT_PAGE(name) \
 	.balign	PAGE_SIZE; \
 GLOBAL(name)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a..f112af7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c

@@ -54,7 +54,7 @@
 	char				name[10];
 };
 
-inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
 {
 	return container_of(evtdev, struct hpet_dev, evt);
 }
@@ -773,7 +773,6 @@
 	.mask		= HPET_MASK,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 	.resume		= hpet_resume_counter,
-	.archdata	= { .vclock_mode = VCLOCK_HPET },
 };
 
 static int hpet_clocksource_register(void)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e565e0e..fc25f69 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c

@@ -13,6 +13,7 @@
 #include <linux/cpu.h>
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
+#include <asm/text-patching.h>
 
 #ifdef HAVE_JUMP_LABEL
 

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 2da6ee9..04cde52 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c

@@ -45,6 +45,7 @@
 #include <linux/uaccess.h>
 #include <linux/memory.h>
 
+#include <asm/text-patching.h>
 #include <asm/debugreg.h>
 #include <asm/apicdef.h>
 #include <asm/apic.h>

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ae703ac..38cf7a7 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c

@@ -51,6 +51,7 @@
 #include <linux/ftrace.h>
 #include <linux/frame.h>
 
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7b3b9d1..4425f59 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c

@@ -29,6 +29,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 
+#include <asm/text-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8079508..eea2a6f 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -285,14 +285,6 @@
 {
 	pv_info.name = "KVM";
 
-	/*
-	 * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
-	 * guest kernel works like a bare metal kernel with additional
-	 * features, and paravirt_enabled is about features that are
-	 * missing.
-	 */
-	pv_info.paravirt_enabled = 0;
-
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;
 
@@ -522,7 +514,7 @@
 	if (boot_cpu_data.cpuid_level < 0)
 		return 0;	/* So we don't blow up on old processors */
 
-	if (cpu_has_hypervisor)
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
 
 	return 0;

diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 005c03e..477ae80 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c

@@ -31,6 +31,7 @@
 #include <linux/jump_label.h>
 #include <linux/random.h>
 
+#include <asm/text-patching.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f08ac28..7b3b3f2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c

@@ -294,7 +294,6 @@
 
 struct pv_info pv_info = {
 	.name = "bare hardware",
-	.paravirt_enabled = 0,
 	.kernel_rpl = 0,
 	.shared_kernel_pmd = 1,	/* Only used when CONFIG_X86_PAE is set */
 
@@ -339,8 +338,10 @@
 	.write_cr8 = native_write_cr8,
 #endif
 	.wbinvd = native_wbinvd,
-	.read_msr = native_read_msr_safe,
-	.write_msr = native_write_msr_safe,
+	.read_msr = native_read_msr,
+	.write_msr = native_write_msr,
+	.read_msr_safe = native_read_msr_safe,
+	.write_msr_safe = native_write_msr_safe,
 	.read_pmc = native_read_pmc,
 	.load_tr_desc = native_load_tr_desc,
 	.set_ldt = native_set_ldt,

diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 35ccf75..f712dfd 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c

@@ -72,7 +72,7 @@
 	}
 }
 #else
-inline void check_iommu_entries(struct iommu_table_entry *start,
+void __init check_iommu_entries(struct iommu_table_entry *start,
 				       struct iommu_table_entry *finish)
 {
 }

diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
new file mode 100644
index 0000000..b2f8a33
--- /dev/null
+++ b/arch/x86/kernel/platform-quirks.c

@@ -0,0 +1,35 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+#include <asm/bios_ebda.h>
+
+void __init x86_early_init_platform_quirks(void)
+{
+	x86_platform.legacy.rtc = 1;
+	x86_platform.legacy.ebda_search = 0;
+	x86_platform.legacy.devices.pnpbios = 1;
+
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_PC:
+		x86_platform.legacy.ebda_search = 1;
+		break;
+	case X86_SUBARCH_XEN:
+	case X86_SUBARCH_LGUEST:
+	case X86_SUBARCH_INTEL_MID:
+	case X86_SUBARCH_CE4100:
+		x86_platform.legacy.devices.pnpbios = 0;
+		x86_platform.legacy.rtc = 0;
+		break;
+	}
+
+	if (x86_platform.set_legacy_features)
+		x86_platform.set_legacy_features();
+}
+
+#if defined(CONFIG_PNPBIOS)
+bool __init arch_pnpbios_disabled(void)
+{
+	return x86_platform.legacy.devices.pnpbios == 0;
+}
+#endif

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31..6b16c36 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c

@@ -136,25 +136,6 @@
 	}
 }
 
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
-	struct user_desc ud = {
-		.base_addr = addr,
-		.limit = 0xfffff,
-		.seg_32bit = 1,
-		.limit_in_pages = 1,
-		.useable = 1,
-	};
-	struct desc_struct *desc = t->thread.tls_array;
-	desc += tls;
-	fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
-	return get_desc_base(&t->thread.tls_array[tls]);
-}
-
 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 		unsigned long arg, struct task_struct *p, unsigned long tls)
 {
@@ -169,9 +150,9 @@
 	p->thread.io_bitmap_ptr = NULL;
 
 	savesegment(gs, p->thread.gsindex);
-	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
+	p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
 	savesegment(fs, p->thread.fsindex);
-	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
+	p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@
 	 */
 	if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-		if (is_ia32_task())
+		if (in_ia32_syscall())
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)tls, 0);
 		else
@@ -282,7 +263,7 @@
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
-	unsigned fsindex, gsindex;
+	unsigned prev_fsindex, prev_gsindex;
 	fpu_switch_t fpu_switch;
 
 	fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@
 	 *
 	 * (e.g. xen_load_tls())
 	 */
-	savesegment(fs, fsindex);
-	savesegment(gs, gsindex);
+	savesegment(fs, prev_fsindex);
+	savesegment(gs, prev_gsindex);
 
 	/*
 	 * Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@
 	 * Switch FS and GS.
 	 *
 	 * These are even more complicated than DS and ES: they have
-	 * 64-bit bases are that controlled by arch_prctl.  Those bases
-	 * only differ from the values in the GDT or LDT if the selector
-	 * is 0.
+	 * 64-bit bases are that controlled by arch_prctl.  The bases
+	 * don't necessarily match the selectors, as user code can do
+	 * any number of things to cause them to be inconsistent.
 	 *
-	 * Loading the segment register resets the hidden base part of
-	 * the register to 0 or the value from the GDT / LDT.  If the
-	 * next base address zero, writing 0 to the segment register is
-	 * much faster than using wrmsr to explicitly zero the base.
+	 * We don't promise to preserve the bases if the selectors are
+	 * nonzero.  We also don't promise to preserve the base if the
+	 * selector is zero and the base doesn't match whatever was
+	 * most recently passed to ARCH_SET_FS/GS.  (If/when the
+	 * FSGSBASE instructions are enabled, we'll need to offer
+	 * stronger guarantees.)
 	 *
-	 * The thread_struct.fs and thread_struct.gs values are 0
-	 * if the fs and gs bases respectively are not overridden
-	 * from the values implied by fsindex and gsindex.  They
-	 * are nonzero, and store the nonzero base addresses, if
-	 * the bases are overridden.
-	 *
-	 * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
-	 * be impossible.
-	 *
-	 * Therefore we need to reload the segment registers if either
-	 * the old or new selector is nonzero, and we need to override
-	 * the base address if next thread expects it to be overridden.
-	 *
-	 * This code is unnecessarily slow in the case where the old and
-	 * new indexes are zero and the new base is nonzero -- it will
-	 * unnecessarily write 0 to the selector before writing the new
-	 * base address.
-	 *
-	 * Note: This all depends on arch_prctl being the only way that
-	 * user code can override the segment base.  Once wrfsbase and
-	 * wrgsbase are enabled, most of this code will need to change.
+	 * As an invariant,
+	 * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
+	 * impossible.
 	 */
-	if (unlikely(fsindex | next->fsindex | prev->fs)) {
+	if (next->fsindex) {
+		/* Loading a nonzero value into FS sets the index and base. */
 		loadsegment(fs, next->fsindex);
-
-		/*
-		 * If user code wrote a nonzero value to FS, then it also
-		 * cleared the overridden base address.
-		 *
-		 * XXX: if user code wrote 0 to FS and cleared the base
-		 * address itself, we won't notice and we'll incorrectly
-		 * restore the prior base address next time we reschdule
-		 * the process.
-		 */
-		if (fsindex)
-			prev->fs = 0;
+	} else {
+		if (next->fsbase) {
+			/* Next index is zero but next base is nonzero. */
+			if (prev_fsindex)
+				loadsegment(fs, 0);
+			wrmsrl(MSR_FS_BASE, next->fsbase);
+		} else {
+			/* Next base and index are both zero. */
+			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+				/*
+				 * We don't know the previous base and can't
+				 * find out without RDMSR.  Forcibly clear it.
+				 */
+				loadsegment(fs, __USER_DS);
+				loadsegment(fs, 0);
+			} else {
+				/*
+				 * If the previous index is zero and ARCH_SET_FS
+				 * didn't change the base, then the base is
+				 * also zero and we don't need to do anything.
+				 */
+				if (prev->fsbase || prev_fsindex)
+					loadsegment(fs, 0);
+			}
+		}
 	}
-	if (next->fs)
-		wrmsrl(MSR_FS_BASE, next->fs);
-	prev->fsindex = fsindex;
+	/*
+	 * Save the old state and preserve the invariant.
+	 * NB: if prev_fsindex == 0, then we can't reliably learn the base
+	 * without RDMSR because Intel user code can zero it without telling
+	 * us and AMD user code can program any 32-bit value without telling
+	 * us.
+	 */
+	if (prev_fsindex)
+		prev->fsbase = 0;
+	prev->fsindex = prev_fsindex;
 
-	if (unlikely(gsindex | next->gsindex | prev->gs)) {
+	if (next->gsindex) {
+		/* Loading a nonzero value into GS sets the index and base. */
 		load_gs_index(next->gsindex);
-
-		/* This works (and fails) the same way as fsindex above. */
-		if (gsindex)
-			prev->gs = 0;
+	} else {
+		if (next->gsbase) {
+			/* Next index is zero but next base is nonzero. */
+			if (prev_gsindex)
+				load_gs_index(0);
+			wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
+		} else {
+			/* Next base and index are both zero. */
+			if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+				/*
+				 * We don't know the previous base and can't
+				 * find out without RDMSR.  Forcibly clear it.
+				 *
+				 * This contains a pointless SWAPGS pair.
+				 * Fixing it would involve an explicit check
+				 * for Xen or a new pvop.
+				 */
+				load_gs_index(__USER_DS);
+				load_gs_index(0);
+			} else {
+				/*
+				 * If the previous index is zero and ARCH_SET_GS
+				 * didn't change the base, then the base is
+				 * also zero and we don't need to do anything.
+				 */
+				if (prev->gsbase || prev_gsindex)
+					load_gs_index(0);
+			}
+		}
 	}
-	if (next->gs)
-		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
-	prev->gsindex = gsindex;
+	/*
+	 * Save the old state and preserve the invariant.
+	 * NB: if prev_gsindex == 0, then we can't reliably learn the base
+	 * without RDMSR because Intel user code can zero it without telling
+	 * us and AMD user code can program any 32-bit value without telling
+	 * us.
+	 */
+	if (prev_gsindex)
+		prev->gsbase = 0;
+	prev->gsindex = prev_gsindex;
 
 	switch_fpu_finish(next_fpu, fpu_switch);
 
@@ -516,23 +535,11 @@
 		if (addr >= TASK_SIZE_OF(task))
 			return -EPERM;
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to
-		   switch. */
-		if (addr <= 0xffffffff) {
-			set_32bit_tls(task, GS_TLS, addr);
-			if (doit) {
-				load_TLS(&task->thread, cpu);
-				load_gs_index(GS_TLS_SEL);
-			}
-			task->thread.gsindex = GS_TLS_SEL;
-			task->thread.gs = 0;
-		} else {
-			task->thread.gsindex = 0;
-			task->thread.gs = addr;
-			if (doit) {
-				load_gs_index(0);
-				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
-			}
+		task->thread.gsindex = 0;
+		task->thread.gsbase = addr;
+		if (doit) {
+			load_gs_index(0);
+			ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
 		}
 		put_cpu();
 		break;
@@ -542,52 +549,30 @@
 		if (addr >= TASK_SIZE_OF(task))
 			return -EPERM;
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to
-		   switch. */
-		if (addr <= 0xffffffff) {
-			set_32bit_tls(task, FS_TLS, addr);
-			if (doit) {
-				load_TLS(&task->thread, cpu);
-				loadsegment(fs, FS_TLS_SEL);
-			}
-			task->thread.fsindex = FS_TLS_SEL;
-			task->thread.fs = 0;
-		} else {
-			task->thread.fsindex = 0;
-			task->thread.fs = addr;
-			if (doit) {
-				/* set the selector to 0 to not confuse
-				   __switch_to */
-				loadsegment(fs, 0);
-				ret = wrmsrl_safe(MSR_FS_BASE, addr);
-			}
+		task->thread.fsindex = 0;
+		task->thread.fsbase = addr;
+		if (doit) {
+			/* set the selector to 0 to not confuse __switch_to */
+			loadsegment(fs, 0);
+			ret = wrmsrl_safe(MSR_FS_BASE, addr);
 		}
 		put_cpu();
 		break;
 	case ARCH_GET_FS: {
 		unsigned long base;
-		if (task->thread.fsindex == FS_TLS_SEL)
-			base = read_32bit_tls(task, FS_TLS);
-		else if (doit)
+		if (doit)
 			rdmsrl(MSR_FS_BASE, base);
 		else
-			base = task->thread.fs;
+			base = task->thread.fsbase;
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 	}
 	case ARCH_GET_GS: {
 		unsigned long base;
-		unsigned gsindex;
-		if (task->thread.gsindex == GS_TLS_SEL)
-			base = read_32bit_tls(task, GS_TLS);
-		else if (doit) {
-			savesegment(gs, gsindex);
-			if (gsindex)
-				rdmsrl(MSR_KERNEL_GS_BASE, base);
-			else
-				base = task->thread.gs;
-		} else
-			base = task->thread.gs;
+		if (doit)
+			rdmsrl(MSR_KERNEL_GS_BASE, base);
+		else
+			base = task->thread.gsbase;
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 	}

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9c..e60ef91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c

@@ -303,29 +303,11 @@
 
 	switch (offset) {
 	case offsetof(struct user_regs_struct,fs):
-		/*
-		 * If this is setting fs as for normal 64-bit use but
-		 * setting fs_base has implicitly changed it, leave it.
-		 */
-		if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
-		     task->thread.fs != 0) ||
-		    (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
-		     task->thread.fs == 0))
-			break;
 		task->thread.fsindex = value;
 		if (task == current)
 			loadsegment(fs, task->thread.fsindex);
 		break;
 	case offsetof(struct user_regs_struct,gs):
-		/*
-		 * If this is setting gs as for normal 64-bit use but
-		 * setting gs_base has implicitly changed it, leave it.
-		 */
-		if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
-		     task->thread.gs != 0) ||
-		    (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
-		     task->thread.gs == 0))
-			break;
 		task->thread.gsindex = value;
 		if (task == current)
 			load_gs_index(task->thread.gsindex);
@@ -417,7 +399,7 @@
 		 * to set either thread.fs or thread.fsindex and the
 		 * corresponding GDT slot.
 		 */
-		if (child->thread.fs != value)
+		if (child->thread.fsbase != value)
 			return do_arch_prctl(child, ARCH_SET_FS, value);
 		return 0;
 	case offsetof(struct user_regs_struct,gs_base):
@@ -426,7 +408,7 @@
 		 */
 		if (value >= TASK_SIZE_OF(child))
 			return -EIO;
-		if (child->thread.gs != value)
+		if (child->thread.gsbase != value)
 			return do_arch_prctl(child, ARCH_SET_GS, value);
 		return 0;
 #endif
@@ -453,31 +435,17 @@
 #ifdef CONFIG_X86_64
 	case offsetof(struct user_regs_struct, fs_base): {
 		/*
-		 * do_arch_prctl may have used a GDT slot instead of
-		 * the MSR.  To userland, it appears the same either
-		 * way, except the %fs segment selector might not be 0.
+		 * XXX: This will not behave as expected if called on
+		 * current or if fsindex != 0.
 		 */
-		unsigned int seg = task->thread.fsindex;
-		if (task->thread.fs != 0)
-			return task->thread.fs;
-		if (task == current)
-			asm("movl %%fs,%0" : "=r" (seg));
-		if (seg != FS_TLS_SEL)
-			return 0;
-		return get_desc_base(&task->thread.tls_array[FS_TLS]);
+		return task->thread.fsbase;
 	}
 	case offsetof(struct user_regs_struct, gs_base): {
 		/*
-		 * Exactly the same here as the %fs handling above.
+		 * XXX: This will not behave as expected if called on
+		 * current or if fsindex != 0.
 		 */
-		unsigned int seg = task->thread.gsindex;
-		if (task->thread.gs != 0)
-			return task->thread.gs;
-		if (task == current)
-			asm("movl %%gs,%0" : "=r" (seg));
-		if (seg != GS_TLS_SEL)
-			return 0;
-		return get_desc_base(&task->thread.tls_array[GS_TLS]);
+		return task->thread.gsbase;
 	}
 #endif
 	}
@@ -1266,7 +1234,7 @@
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
 #ifdef CONFIG_X86_X32_ABI
-	if (!is_ia32_task())
+	if (!in_ia32_syscall())
 		return x32_arch_ptrace(child, request, caddr, cdata);
 #endif
 #ifdef CONFIG_IA32_EMULATION

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab0adc0..a9b31eb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c

@@ -535,6 +535,15 @@
 	mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
 	*((unsigned short *)__va(0x472)) = mode;
 
+	/*
+	 * If an EFI capsule has been registered with the firmware then
+	 * override the reboot= parameter.
+	 */
+	if (efi_capsule_pending(NULL)) {
+		pr_info("EFI capsule is pending, forcing EFI reboot.\n");
+		reboot_type = BOOT_EFI;
+	}
+
 	for (;;) {
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) {

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4af8d06..eceaa08 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c

@@ -14,6 +14,7 @@
 #include <asm/time.h>
 #include <asm/intel-mid.h>
 #include <asm/rtc.h>
+#include <asm/setup.h>
 
 #ifdef CONFIG_X86_32
 /*
@@ -185,22 +186,7 @@
 		}
 	}
 #endif
-	if (of_have_populated_dt())
-		return 0;
-
-	/* Intel MID platforms don't have ioport rtc */
-	if (intel_mid_identify_cpu())
-		return -ENODEV;
-
-#ifdef CONFIG_ACPI
-	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
-		/* This warning can likely go away again in a year or two. */
-		pr_info("ACPI: not registering RTC platform device\n");
-		return -ENODEV;
-	}
-#endif
-
-	if (paravirt_enabled() && !paravirt_has(RTC))
+	if (!x86_platform.legacy.rtc)
 		return -ENODEV;
 
 	platform_device_register(&rtc_device);

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2367ae0..c4e7b39 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c

@@ -398,6 +398,11 @@
 
 	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
 }
+
+static void __init early_initrd_acpi_init(void)
+{
+	early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
+}
 #else
 static void __init early_reserve_initrd(void)
 {
@@ -405,6 +410,9 @@
 static void __init reserve_initrd(void)
 {
 }
+static void __init early_initrd_acpi_init(void)
+{
+}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 static void __init parse_setup_data(void)
@@ -1138,9 +1146,7 @@
 
 	reserve_initrd();
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
-	acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
-#endif
+	early_initrd_acpi_init();
 
 	vsmp_init();
 

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7..22cc2f9 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c

@@ -248,18 +248,17 @@
 	if (config_enabled(CONFIG_X86_64))
 		sp -= 128;
 
-	if (!onsigstack) {
-		/* This is the X/Open sanctioned signal stack switching.  */
-		if (ka->sa.sa_flags & SA_ONSTACK) {
-			if (current->sas_ss_size)
-				sp = current->sas_ss_sp + current->sas_ss_size;
-		} else if (config_enabled(CONFIG_X86_32) &&
-			   (regs->ss & 0xffff) != __USER_DS &&
-			   !(ka->sa.sa_flags & SA_RESTORER) &&
-			   ka->sa.sa_restorer) {
-				/* This is the legacy signal stack switching. */
-				sp = (unsigned long) ka->sa.sa_restorer;
-		}
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (sas_ss_flags(sp) == 0)
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	} else if (config_enabled(CONFIG_X86_32) &&
+		   !onsigstack &&
+		   (regs->ss & 0xffff) != __USER_DS &&
+		   !(ka->sa.sa_flags & SA_RESTORER) &&
+		   ka->sa.sa_restorer) {
+		/* This is the legacy signal stack switching. */
+		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
 	if (fpu->fpstate_active) {
@@ -391,7 +390,7 @@
 		put_user_ex(&frame->uc, &frame->puc);
 
 		/* Create the ucontext.  */
-		if (cpu_has_xsave)
+		if (boot_cpu_has(X86_FEATURE_XSAVE))
 			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
 		else
 			put_user_ex(0, &frame->uc.uc_flags);
@@ -442,7 +441,7 @@
 {
 	unsigned long flags;
 
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
 	else
 		flags = UC_SIGCONTEXT_SS;
@@ -762,7 +761,7 @@
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_64
-	if (is_ia32_task())
+	if (in_ia32_syscall())
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0e4329e..fafe8b9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -1236,7 +1236,7 @@
 	 * If we couldn't find a local APIC, then get out of here now!
 	 */
 	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
-	    !cpu_has_apic) {
+	    !boot_cpu_has(X86_FEATURE_APIC)) {
 		if (!disable_apic) {
 			pr_err("BIOS bug, local APIC #%d not detected!...\n",
 				boot_cpu_physical_apicid);

diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 5da924b..623965e 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c

@@ -68,6 +68,21 @@
 	[M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
 };
 
+void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+	int i;
+
+	for (i = 0; i < M_UNKNOWN; i++) {
+		if (efifb_dmi_list[i].base != 0 &&
+		    !strcmp(opt, efifb_dmi_list[i].optname)) {
+			si->lfb_base = efifb_dmi_list[i].base;
+			si->lfb_linelength = efifb_dmi_list[i].stride;
+			si->lfb_width = efifb_dmi_list[i].width;
+			si->lfb_height = efifb_dmi_list[i].height;
+		}
+	}
+}
+
 #define choose_value(dmivalue, fwvalue, field, flags) ({	\
 		typeof(fwvalue) _ret_ = fwvalue;		\
 		if ((flags) & (field))				\

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e72a07f..9b0185f 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c

@@ -74,12 +74,6 @@
 		return;
 	}
 
-	/* only a natively booted kernel should be using TXT */
-	if (paravirt_enabled()) {
-		pr_warning("non-0 tboot_addr but pv_ops is enabled\n");
-		return;
-	}
-
 	/* Map and check for tboot UUID. */
 	set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
 	tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);

diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index ab40954..f386bad 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c

@@ -40,7 +40,7 @@
 static inline void flush_tce(void* tceaddr)
 {
 	/* a single tce can't cross a cache line */
-	if (cpu_has_clflush)
+	if (boot_cpu_has(X86_FEATURE_CLFLUSH))
 		clflush(tceaddr);
 	else
 		wbinvd();

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 7fc5e84..9692a5e 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c

@@ -114,6 +114,7 @@
 		       int can_allocate)
 {
 	struct user_desc info;
+	unsigned short __maybe_unused sel, modified_sel;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
@@ -141,6 +142,47 @@
 
 	set_tls_desc(p, idx, &info, 1);
 
+	/*
+	 * If DS, ES, FS, or GS points to the modified segment, forcibly
+	 * refresh it.  Only needed on x86_64 because x86_32 reloads them
+	 * on return to user mode.
+	 */
+	modified_sel = (idx << 3) | 3;
+
+	if (p == current) {
+#ifdef CONFIG_X86_64
+		savesegment(ds, sel);
+		if (sel == modified_sel)
+			loadsegment(ds, sel);
+
+		savesegment(es, sel);
+		if (sel == modified_sel)
+			loadsegment(es, sel);
+
+		savesegment(fs, sel);
+		if (sel == modified_sel)
+			loadsegment(fs, sel);
+
+		savesegment(gs, sel);
+		if (sel == modified_sel)
+			load_gs_index(sel);
+#endif
+
+#ifdef CONFIG_X86_32_LAZY_GS
+		savesegment(gs, sel);
+		if (sel == modified_sel)
+			loadsegment(gs, sel);
+#endif
+	} else {
+#ifdef CONFIG_X86_64
+		if (p->thread.fsindex == modified_sel)
+			p->thread.fsbase = info.base_addr;
+
+		if (p->thread.gsindex == modified_sel)
+			p->thread.gsbase = info.base_addr;
+#endif
+	}
+
 	return 0;
 }
 

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 06cbe25..d159048 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -51,6 +51,7 @@
 #include <asm/processor.h>
 #include <asm/debugreg.h>
 #include <linux/atomic.h>
+#include <asm/text-patching.h>
 #include <asm/ftrace.h>
 #include <asm/traps.h>
 #include <asm/desc.h>

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c9c4c7c..38ba6de 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c

@@ -36,7 +36,7 @@
 
 /* native_sched_clock() is called before tsc_init(), so
    we must start with the TSC soft disabled to prevent
-   erroneous rdtsc usage on !cpu_has_tsc processors */
+   erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
 static int __read_mostly tsc_disabled = -1;
 
 static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@
 #ifndef CONFIG_SMP
 	unsigned long cpu_khz_old = cpu_khz;
 
-	if (cpu_has_tsc) {
-		tsc_khz = x86_platform.calibrate_tsc();
-		cpu_khz = tsc_khz;
-		cpu_data(0).loops_per_jiffy =
-			cpufreq_scale(cpu_data(0).loops_per_jiffy,
-					cpu_khz_old, cpu_khz);
-		return 0;
-	} else
+	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return -ENODEV;
+
+	tsc_khz = x86_platform.calibrate_tsc();
+	cpu_khz = tsc_khz;
+	cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
+						    cpu_khz_old, cpu_khz);
+
+	return 0;
 #else
 	return -ENODEV;
 #endif
@@ -922,9 +922,6 @@
 	struct cpufreq_freqs *freq = data;
 	unsigned long *lpj;
 
-	if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
-		return 0;
-
 	lpj = &boot_cpu_data.loops_per_jiffy;
 #ifdef CONFIG_SMP
 	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@
 	.notifier_call  = time_cpufreq_notifier
 };
 
-static int __init cpufreq_tsc(void)
+static int __init cpufreq_register_tsc_scaling(void)
 {
-	if (!cpu_has_tsc)
+	if (!boot_cpu_has(X86_FEATURE_TSC))
 		return 0;
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
 		return 0;
@@ -965,7 +962,7 @@
 	return 0;
 }
 
-core_initcall(cpufreq_tsc);
+core_initcall(cpufreq_register_tsc_scaling);
 
 #endif /* CONFIG_CPU_FREQ */
 
@@ -1081,7 +1078,7 @@
  */
 int unsynchronized_tsc(void)
 {
-	if (!cpu_has_tsc || tsc_unstable)
+	if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
 		return 1;
 
 #ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@
 
 static int __init init_tsc_clocksource(void)
 {
-	if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+	if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
 		return 0;
 
 	if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@
 	u64 lpj;
 	int cpu;
 
-	if (!cpu_has_tsc) {
+	if (!boot_cpu_has(X86_FEATURE_TSC)) {
 		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
 		return;
 	}

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6e..6c1ff31 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c

@@ -516,7 +516,7 @@
 
 static inline int sizeof_long(void)
 {
-	return is_ia32_task() ? 4 : 8;
+	return in_ia32_syscall() ? 4 : 8;
 }
 
 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -578,7 +578,7 @@
 	riprel_post_xol(auprobe, regs);
 }
 
-static struct uprobe_xol_ops default_xol_ops = {
+static const struct uprobe_xol_ops default_xol_ops = {
 	.pre_xol  = default_pre_xol_op,
 	.post_xol = default_post_xol_op,
 	.abort	  = default_abort_op,
@@ -695,7 +695,7 @@
 		0, insn->immediate.nbytes);
 }
 
-static struct uprobe_xol_ops branch_xol_ops = {
+static const struct uprobe_xol_ops branch_xol_ops = {
 	.emulate  = branch_emulate_op,
 	.post_xol = branch_post_xol_op,
 };

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4c941f8..9297a00 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S

@@ -334,7 +334,7 @@
 		__brk_limit = .;
 	}
 
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(PAGE_SIZE);		/* keep VO_INIT_SIZE page aligned */
 	_end = .;
 
         STABS_DEBUG

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbbaa802..769af90 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c

@@ -75,7 +75,7 @@
 		return 0;
 
 	/* Update OSXSAVE bit */
-	if (cpu_has_xsave && best->function == 0x1) {
+	if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
 		best->ecx &= ~F(OSXSAVE);
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
 			best->ecx |= F(OSXSAVE);

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b6f50e8..38c0c32 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c

@@ -3844,7 +3844,8 @@
 		__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
 					boot_cpu_data.x86_phys_bits,
 					context->shadow_root_level, false,
-					cpu_has_gbpages, true, true);
+					boot_cpu_has(X86_FEATURE_GBPAGES),
+					true, true);
 	else
 		__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
 					    boot_cpu_data.x86_phys_bits,

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31346a3..fafd720 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c

@@ -1254,7 +1254,7 @@
 	kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
 	loadsegment(fs, svm->host.fs);
-	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+	wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
 	load_gs_index(svm->host.gs);
 #else
 #ifdef CONFIG_X86_32_LAZY_GS

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 2f1ea2f..b72743c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h

@@ -809,8 +809,7 @@
 
 #define host_clocks					\
 	{VCLOCK_NONE, "none"},				\
-	{VCLOCK_TSC,  "tsc"},				\
-	{VCLOCK_HPET, "hpet"}				\
+	{VCLOCK_TSC,  "tsc"}				\
 
 TRACE_EVENT(kvm_update_master_clock,
 	TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 133679d..cb47fe3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -3390,7 +3390,7 @@
 		}
 	}
 
-	if (cpu_has_xsaves)
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		rdmsrl(MSR_IA32_XSS, host_xss);
 
 	return 0;

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b7798c..12f33e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -2611,7 +2611,7 @@
 		r = KVM_MAX_MCE_BANKS;
 		break;
 	case KVM_CAP_XCRS:
-		r = cpu_has_xsave;
+		r = boot_cpu_has(X86_FEATURE_XSAVE);
 		break;
 	case KVM_CAP_TSC_CONTROL:
 		r = kvm_has_tsc_control;
@@ -3094,7 +3094,7 @@
 
 	/* Set XSTATE_BV and possibly XCOMP_BV.  */
 	xsave->header.xfeatures = xstate_bv;
-	if (cpu_has_xsaves)
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
@@ -3121,7 +3121,7 @@
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 {
-	if (cpu_has_xsave) {
+	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
 		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
@@ -3139,7 +3139,7 @@
 	u64 xstate_bv =
 		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
-	if (cpu_has_xsave) {
+	if (boot_cpu_has(X86_FEATURE_XSAVE)) {
 		/*
 		 * Here we allow setting states that are not present in
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
@@ -3160,7 +3160,7 @@
 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
 					struct kvm_xcrs *guest_xcrs)
 {
-	if (!cpu_has_xsave) {
+	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
 		guest_xcrs->nr_xcrs = 0;
 		return;
 	}
@@ -3176,7 +3176,7 @@
 {
 	int i, r = 0;
 
-	if (!cpu_has_xsave)
+	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -EINVAL;
 
 	if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
@@ -5865,7 +5865,7 @@
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
-	if (cpu_has_xsave)
+	if (boot_cpu_has(X86_FEATURE_XSAVE))
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
 	kvm_lapic_init();
@@ -7293,7 +7293,7 @@
 static void fx_init(struct kvm_vcpu *vcpu)
 {
 	fpstate_init(&vcpu->arch.guest_fpu.state);
-	if (cpu_has_xsaves)
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
 			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index fd57d3a..3847e73 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c

@@ -1408,13 +1408,10 @@
 {
 	/* We're under lguest. */
 	pv_info.name = "lguest";
-	/* Paravirt is enabled. */
-	pv_info.paravirt_enabled = 1;
 	/* We're running at privilege level 1, not 0 as normal. */
 	pv_info.kernel_rpl = 1;
 	/* Everyone except Xen runs with this set. */
 	pv_info.shared_kernel_pmd = 1;
-	pv_info.features = 0;
 
 	/*
 	 * We set up all the lguest overrides for sensitive operations.  These

diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index be110ef..bf2c607 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S

@@ -29,8 +29,10 @@
  * there is contention on the semaphore.
  *
  * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
+ * registers (%eax, %edx and %ecx) except %eax which is either a return
+ * value or just gets clobbered. Same is true for %edx so make sure GCC
+ * reloads it after the slow path, by making it hold a temporary, for
+ * example see ____down_write().
  */
 
 #define save_common_regs \
@@ -106,6 +108,16 @@
 	ret
 ENDPROC(call_rwsem_down_write_failed)
 
+ENTRY(call_rwsem_down_write_failed_killable)
+	FRAME_BEGIN
+	save_common_regs
+	movq %rax,%rdi
+	call rwsem_down_write_failed_killable
+	restore_common_regs
+	FRAME_END
+	ret
+ENDPROC(call_rwsem_down_write_failed_killable)
+
 ENTRY(call_rwsem_wake)
 	FRAME_BEGIN
 	/* do nothing if still outstanding active readers */

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 91d93b9..b559d92 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c

@@ -612,7 +612,7 @@
 {
 	stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
-	if (n > 64 && cpu_has_xmm2)
+	if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
 		n = __copy_user_zeroing_intel_nocache(to, from, n);
 	else
 		__copy_user_zeroing(to, from, n);
@@ -629,7 +629,7 @@
 {
 	stac();
 #ifdef CONFIG_X86_INTEL_USERCOPY
-	if (n > 64 && cpu_has_xmm2)
+	if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
 		n = __copy_user_intel_nocache(to, from, n);
 	else
 		__copy_user(to, from, n);

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index f989132..62c0043 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile

@@ -2,7 +2,7 @@
 KCOV_INSTRUMENT_tlb.o	:= n
 
 obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o pgtable.o physaddr.o gup.o setup_nx.o
+	    pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
@@ -12,7 +12,6 @@
 CFLAGS_fault.o := -I$(src)/../include/asm/trace
 
 obj-$(CONFIG_X86_PAT)		+= pat_rbtree.o
-obj-$(CONFIG_SMP)		+= tlb.o
 
 obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
 

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 82447b3..4bb53b8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c

@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <asm/uaccess.h>
+#include <asm/traps.h>
 
 typedef bool (*ex_handler_t)(const struct exception_table_entry *,
 			    struct pt_regs *, int);
@@ -42,6 +43,43 @@
 }
 EXPORT_SYMBOL(ex_handler_ext);
 
+bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+			     struct pt_regs *regs, int trapnr)
+{
+	WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
+		  (unsigned int)regs->cx);
+
+	/* Pretend that the read succeeded and returned 0. */
+	regs->ip = ex_fixup_addr(fixup);
+	regs->ax = 0;
+	regs->dx = 0;
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
+
+bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+			     struct pt_regs *regs, int trapnr)
+{
+	WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
+		  (unsigned int)regs->cx,
+		  (unsigned int)regs->dx, (unsigned int)regs->ax);
+
+	/* Pretend that the write succeeded. */
+	regs->ip = ex_fixup_addr(fixup);
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
+
+bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+			 struct pt_regs *regs, int trapnr)
+{
+	if (static_cpu_has(X86_BUG_NULL_SEG))
+		asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
+	asm volatile ("mov %0, %%fs" : : "rm" (0));
+	return ex_handler_default(fixup, regs, trapnr);
+}
+EXPORT_SYMBOL(ex_handler_clear_fs);
+
 bool ex_has_fault_handler(unsigned long ip)
 {
 	const struct exception_table_entry *e;
@@ -82,24 +120,46 @@
 	return handler(e, regs, trapnr);
 }
 
+extern unsigned int early_recursion_flag;
+
 /* Restricted version used during very early boot */
-int __init early_fixup_exception(unsigned long *ip)
+void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
 {
-	const struct exception_table_entry *e;
-	unsigned long new_ip;
-	ex_handler_t handler;
+	/* Ignore early NMIs. */
+	if (trapnr == X86_TRAP_NMI)
+		return;
 
-	e = search_exception_tables(*ip);
-	if (!e)
-		return 0;
+	if (early_recursion_flag > 2)
+		goto halt_loop;
 
-	new_ip  = ex_fixup_addr(e);
-	handler = ex_fixup_handler(e);
+	if (regs->cs != __KERNEL_CS)
+		goto fail;
 
-	/* special handling not supported during early boot */
-	if (handler != ex_handler_default)
-		return 0;
+	/*
+	 * The full exception fixup machinery is available as soon as
+	 * the early IDT is loaded.  This means that it is the
+	 * responsibility of extable users to either function correctly
+	 * when handlers are invoked early or to simply avoid causing
+	 * exceptions before they're ready to handle them.
+	 *
+	 * This is better than filtering which handlers can be used,
+	 * because refusing to call a handler here is guaranteed to
+	 * result in a hard-to-debug panic.
+	 *
+	 * Keep in mind that not all vectors actually get here.  Early
+	 * fage faults, for example, are special.
+	 */
+	if (fixup_exception(regs, trapnr))
+		return;
 
-	*ip = new_ip;
-	return 1;
+fail:
+	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
+		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
+		     regs->orig_ax, read_cr2());
+
+	show_regs(regs);
+
+halt_loop:
+	while (true)
+		halt();
 }

diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 740d7ac..14a9505 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c

@@ -162,7 +162,7 @@
 	unsigned long ps = memparse(opt, &opt);
 	if (ps == PMD_SIZE) {
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-	} else if (ps == PUD_SIZE && cpu_has_gbpages) {
+	} else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 	} else {
 		printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
@@ -177,7 +177,7 @@
 static __init int gigantic_pages_init(void)
 {
 	/* With compaction or CMA we can allocate gigantic pages at runtime */
-	if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
+	if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT))
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
 	return 0;
 }

diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
new file mode 100644
index 0000000..ec21796
--- /dev/null
+++ b/arch/x86/mm/ident_map.c

@@ -0,0 +1,79 @@
+/*
+ * Helper routines for building identity mapping page tables. This is
+ * included by both the compressed kernel and the regular kernel.
+ */
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+			   unsigned long addr, unsigned long end)
+{
+	addr &= PMD_MASK;
+	for (; addr < end; addr += PMD_SIZE) {
+		pmd_t *pmd = pmd_page + pmd_index(addr);
+
+		if (!pmd_present(*pmd))
+			set_pmd(pmd, __pmd(addr | pmd_flag));
+	}
+}
+
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+			  unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next) {
+		pud_t *pud = pud_page + pud_index(addr);
+		pmd_t *pmd;
+
+		next = (addr & PUD_MASK) + PUD_SIZE;
+		if (next > end)
+			next = end;
+
+		if (pud_present(*pud)) {
+			pmd = pmd_offset(pud, 0);
+			ident_pmd_init(info->pmd_flag, pmd, addr, next);
+			continue;
+		}
+		pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+		if (!pmd)
+			return -ENOMEM;
+		ident_pmd_init(info->pmd_flag, pmd, addr, next);
+		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+	}
+
+	return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+			      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	int result;
+	int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+	for (; addr < end; addr = next) {
+		pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+		pud_t *pud;
+
+		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+
+		if (pgd_present(*pgd)) {
+			pud = pud_offset(pgd, 0);
+			result = ident_pud_init(info, pud, addr, next);
+			if (result)
+				return result;
+			continue;
+		}
+
+		pud = (pud_t *)info->alloc_pgt_page(info->context);
+		if (!pud)
+			return -ENOMEM;
+		result = ident_pud_init(info, pud, addr, next);
+		if (result)
+			return result;
+		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+	}
+
+	return 0;
+}

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 9d56f27..372aad2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c

@@ -157,23 +157,23 @@
 	 * This will simplify cpa(), which otherwise needs to support splitting
 	 * large pages into small in interrupt context, etc.
 	 */
-	if (cpu_has_pse && !debug_pagealloc_enabled())
+	if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
 		page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
 
 	/* Enable PSE if available */
-	if (cpu_has_pse)
+	if (boot_cpu_has(X86_FEATURE_PSE))
 		cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
 	/* Enable PGE if available */
-	if (cpu_has_pge) {
+	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	} else
 		__supported_pte_mask &= ~_PAGE_GLOBAL;
 
 	/* Enable 1 GB linear kernel mappings if available: */
-	if (direct_gbpages && cpu_has_gbpages) {
+	if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
 		printk(KERN_INFO "Using GB pages for direct mapping\n");
 		page_size_mask |= 1 << PG_LEVEL_1G;
 	} else {

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bd7a9b9..84df150 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c

@@ -284,7 +284,7 @@
 	 */
 	mapping_iter = 1;
 
-	if (!cpu_has_pse)
+	if (!boot_cpu_has(X86_FEATURE_PSE))
 		use_pse = 0;
 
 repeat:
@@ -804,9 +804,6 @@
 	BUILD_BUG_ON(VMALLOC_START			>= VMALLOC_END);
 #undef high_memory
 #undef __FIXADDR_TOP
-#ifdef CONFIG_RANDOMIZE_BASE
-	BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
-#endif
 
 #ifdef CONFIG_HIGHMEM
 	BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE	> FIXADDR_START);

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 214afda..bce2e5d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c

@@ -58,79 +58,7 @@
 
 #include "mm_internal.h"
 
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
-			   unsigned long addr, unsigned long end)
-{
-	addr &= PMD_MASK;
-	for (; addr < end; addr += PMD_SIZE) {
-		pmd_t *pmd = pmd_page + pmd_index(addr);
-
-		if (!pmd_present(*pmd))
-			set_pmd(pmd, __pmd(addr | pmd_flag));
-	}
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
-			  unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-
-	for (; addr < end; addr = next) {
-		pud_t *pud = pud_page + pud_index(addr);
-		pmd_t *pmd;
-
-		next = (addr & PUD_MASK) + PUD_SIZE;
-		if (next > end)
-			next = end;
-
-		if (pud_present(*pud)) {
-			pmd = pmd_offset(pud, 0);
-			ident_pmd_init(info->pmd_flag, pmd, addr, next);
-			continue;
-		}
-		pmd = (pmd_t *)info->alloc_pgt_page(info->context);
-		if (!pmd)
-			return -ENOMEM;
-		ident_pmd_init(info->pmd_flag, pmd, addr, next);
-		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
-	}
-
-	return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-			      unsigned long addr, unsigned long end)
-{
-	unsigned long next;
-	int result;
-	int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
-	for (; addr < end; addr = next) {
-		pgd_t *pgd = pgd_page + pgd_index(addr) + off;
-		pud_t *pud;
-
-		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
-		if (next > end)
-			next = end;
-
-		if (pgd_present(*pgd)) {
-			pud = pud_offset(pgd, 0);
-			result = ident_pud_init(info, pud, addr, next);
-			if (result)
-				return result;
-			continue;
-		}
-
-		pud = (pud_t *)info->alloc_pgt_page(info->context);
-		if (!pud)
-			return -ENOMEM;
-		result = ident_pud_init(info, pud, addr, next);
-		if (result)
-			return result;
-		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
-	}
-
-	return 0;
-}
+#include "ident_map.c"
 
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
@@ -1295,7 +1223,7 @@
 	struct vmem_altmap *altmap = to_vmem_altmap(start);
 	int err;
 
-	if (cpu_has_pse)
+	if (boot_cpu_has(X86_FEATURE_PSE))
 		err = vmemmap_populate_hugepages(start, end, node, altmap);
 	else if (altmap) {
 		pr_err_once("%s: no cpu support for altmap allocations\n",
@@ -1338,7 +1266,7 @@
 		}
 		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
 
-		if (!cpu_has_pse) {
+		if (!boot_cpu_has(X86_FEATURE_PSE)) {
 			next = (addr + PAGE_SIZE) & PAGE_MASK;
 			pmd = pmd_offset(pud, addr);
 			if (pmd_none(*pmd))

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0d8d53d..f089491 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c

@@ -378,7 +378,7 @@
 int __init arch_ioremap_pud_supported(void)
 {
 #ifdef CONFIG_X86_64
-	return cpu_has_gbpages;
+	return boot_cpu_has(X86_FEATURE_GBPAGES);
 #else
 	return 0;
 #endif
@@ -386,7 +386,7 @@
 
 int __init arch_ioremap_pmd_supported(void)
 {
-	return cpu_has_pse;
+	return boot_cpu_has(X86_FEATURE_PSE);
 }
 
 /*

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 01be9ec..7a1f7bb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c

@@ -1055,7 +1055,7 @@
 	/*
 	 * Map everything starting from the Gb boundary, possibly with 1G pages
 	 */
-	while (cpu_has_gbpages && end - start >= PUD_SIZE) {
+	while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
 		set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
 				   massage_pgprot(pud_pgprot)));
 
@@ -1125,8 +1125,14 @@
 static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
 			       int primary)
 {
-	if (cpa->pgd)
+	if (cpa->pgd) {
+		/*
+		 * Right now, we only execute this code path when mapping
+		 * the EFI virtual memory map regions, no other users
+		 * provide a ->pgd value. This may change in the future.
+		 */
 		return populate_pgd(cpa, vaddr);
+	}
 
 	/*
 	 * Ignore all non primary paths.
@@ -1460,7 +1466,7 @@
 	 * error case we fall back to cpa_flush_all (which uses
 	 * WBINVD):
 	 */
-	if (!ret && cpu_has_clflush) {
+	if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index faec01e..fb0604f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c

@@ -40,11 +40,22 @@
 static bool boot_cpu_done;
 
 static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
+static void init_cache_modes(void);
 
-static inline void pat_disable(const char *reason)
+void pat_disable(const char *reason)
 {
+	if (!__pat_enabled)
+		return;
+
+	if (boot_cpu_done) {
+		WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+		return;
+	}
+
 	__pat_enabled = 0;
 	pr_info("x86/PAT: %s\n", reason);
+
+	init_cache_modes();
 }
 
 static int __init nopat(char *str)
@@ -181,7 +192,7 @@
  * configuration.
  * Using lower indices is preferred, so we start with highest index.
  */
-void pat_init_cache_modes(u64 pat)
+static void __init_cache_modes(u64 pat)
 {
 	enum page_cache_mode cache;
 	char pat_msg[33];
@@ -202,14 +213,11 @@
 {
 	u64 tmp_pat;
 
-	if (!cpu_has_pat) {
+	if (!boot_cpu_has(X86_FEATURE_PAT)) {
 		pat_disable("PAT not supported by CPU.");
 		return;
 	}
 
-	if (!pat_enabled())
-		goto done;
-
 	rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
 	if (!tmp_pat) {
 		pat_disable("PAT MSR is 0, disabled.");
@@ -218,16 +226,12 @@
 
 	wrmsrl(MSR_IA32_CR_PAT, pat);
 
-done:
-	pat_init_cache_modes(pat);
+	__init_cache_modes(pat);
 }
 
 static void pat_ap_init(u64 pat)
 {
-	if (!pat_enabled())
-		return;
-
-	if (!cpu_has_pat) {
+	if (!boot_cpu_has(X86_FEATURE_PAT)) {
 		/*
 		 * If this happens we are on a secondary CPU, but switched to
 		 * PAT on the boot CPU. We have no way to undo PAT.
@@ -238,18 +242,32 @@
 	wrmsrl(MSR_IA32_CR_PAT, pat);
 }
 
-void pat_init(void)
+static void init_cache_modes(void)
 {
-	u64 pat;
-	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u64 pat = 0;
+	static int init_cm_done;
 
-	if (!pat_enabled()) {
+	if (init_cm_done)
+		return;
+
+	if (boot_cpu_has(X86_FEATURE_PAT)) {
+		/*
+		 * CPU supports PAT. Set PAT table to be consistent with
+		 * PAT MSR. This case supports "nopat" boot option, and
+		 * virtual machine environments which support PAT without
+		 * MTRRs. In specific, Xen has unique setup to PAT MSR.
+		 *
+		 * If PAT MSR returns 0, it is considered invalid and emulates
+		 * as No PAT.
+		 */
+		rdmsrl(MSR_IA32_CR_PAT, pat);
+	}
+
+	if (!pat) {
 		/*
 		 * No PAT. Emulate the PAT table that corresponds to the two
-		 * cache bits, PWT (Write Through) and PCD (Cache Disable). This
-		 * setup is the same as the BIOS default setup when the system
-		 * has PAT but the "nopat" boot option has been specified. This
-		 * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+		 * cache bits, PWT (Write Through) and PCD (Cache Disable).
+		 * This setup is also the same as the BIOS default setup.
 		 *
 		 * PTE encoding:
 		 *
@@ -266,10 +284,36 @@
 		 */
 		pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
 		      PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+	}
 
-	} else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
-		   (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
-		    ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+	__init_cache_modes(pat);
+
+	init_cm_done = 1;
+}
+
+/**
+ * pat_init - Initialize PAT MSR and PAT table
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC and WT.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+	u64 pat;
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (!pat_enabled()) {
+		init_cache_modes();
+		return;
+	}
+
+	if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+	    (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+	     ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
 		/*
 		 * PAT support with the lower four entries. Intel Pentium 2,
 		 * 3, M, and 4 are affected by PAT errata, which makes the
@@ -734,25 +778,6 @@
 	if (file->f_flags & O_DSYNC)
 		pcm = _PAGE_CACHE_MODE_UC_MINUS;
 
-#ifdef CONFIG_X86_32
-	/*
-	 * On the PPro and successors, the MTRRs are used to set
-	 * memory types for physical addresses outside main memory,
-	 * so blindly setting UC or PWT on those pages is wrong.
-	 * For Pentiums and earlier, the surround logic should disable
-	 * caching for the high addresses through the KEN pin, but
-	 * we maintain the tradition of paranoia in this code.
-	 */
-	if (!pat_enabled() &&
-	    !(boot_cpu_has(X86_FEATURE_MTRR) ||
-	      boot_cpu_has(X86_FEATURE_K6_MTRR) ||
-	      boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
-	      boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
-	    (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
-		pcm = _PAGE_CACHE_MODE_UC;
-	}
-#endif
-
 	*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
 			     cachemode2protval(pcm));
 	return 1;

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index fe9b9f7..5643fd0 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c

@@ -28,6 +28,8 @@
  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+#ifdef CONFIG_SMP
+
 struct flush_tlb_info {
 	struct mm_struct *flush_mm;
 	unsigned long flush_start;
@@ -57,6 +59,118 @@
 }
 EXPORT_SYMBOL_GPL(leave_mm);
 
+#endif /* CONFIG_SMP */
+
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+	       struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	switch_mm_irqs_off(prev, next, tsk);
+	local_irq_restore(flags);
+}
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+			struct task_struct *tsk)
+{
+	unsigned cpu = smp_processor_id();
+
+	if (likely(prev != next)) {
+#ifdef CONFIG_SMP
+		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		this_cpu_write(cpu_tlbstate.active_mm, next);
+#endif
+		cpumask_set_cpu(cpu, mm_cpumask(next));
+
+		/*
+		 * Re-load page tables.
+		 *
+		 * This logic has an ordering constraint:
+		 *
+		 *  CPU 0: Write to a PTE for 'next'
+		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
+		 *  CPU 1: set bit 1 in next's mm_cpumask
+		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
+		 *
+		 * We need to prevent an outcome in which CPU 1 observes
+		 * the new PTE value and CPU 0 observes bit 1 clear in
+		 * mm_cpumask.  (If that occurs, then the IPI will never
+		 * be sent, and CPU 0's TLB will contain a stale entry.)
+		 *
+		 * The bad outcome can occur if either CPU's load is
+		 * reordered before that CPU's store, so both CPUs must
+		 * execute full barriers to prevent this from happening.
+		 *
+		 * Thus, switch_mm needs a full barrier between the
+		 * store to mm_cpumask and any operation that could load
+		 * from next->pgd.  TLB fills are special and can happen
+		 * due to instruction fetches or for no reason at all,
+		 * and neither LOCK nor MFENCE orders them.
+		 * Fortunately, load_cr3() is serializing and gives the
+		 * ordering guarantee we need.
+		 *
+		 */
+		load_cr3(next->pgd);
+
+		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+		/* Stop flush ipis for the previous mm */
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+
+		/* Load per-mm CR4 state */
+		load_mm_cr4(next);
+
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+		/*
+		 * Load the LDT, if the LDT is different.
+		 *
+		 * It's possible that prev->context.ldt doesn't match
+		 * the LDT register.  This can happen if leave_mm(prev)
+		 * was called and then modify_ldt changed
+		 * prev->context.ldt but suppressed an IPI to this CPU.
+		 * In this case, prev->context.ldt != NULL, because we
+		 * never set context.ldt to NULL while the mm still
+		 * exists.  That means that next->context.ldt !=
+		 * prev->context.ldt, because mms never share an LDT.
+		 */
+		if (unlikely(prev->context.ldt != next->context.ldt))
+			load_mm_ldt(next);
+#endif
+	}
+#ifdef CONFIG_SMP
+	  else {
+		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+
+		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+			/*
+			 * On established mms, the mm_cpumask is only changed
+			 * from irq context, from ptep_clear_flush() while in
+			 * lazy tlb mode, and here. Irqs are blocked during
+			 * schedule, protecting us from simultaneous changes.
+			 */
+			cpumask_set_cpu(cpu, mm_cpumask(next));
+
+			/*
+			 * We were in lazy tlb mode and leave_mm disabled
+			 * tlb flush IPI delivery. We must reload CR3
+			 * to make sure to use no freed page tables.
+			 *
+			 * As above, load_cr3() is serializing and orders TLB
+			 * fills with respect to the mm_cpumask write.
+			 */
+			load_cr3(next->pgd);
+			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+			load_mm_cr4(next);
+			load_mm_ldt(next);
+		}
+	}
+#endif
+}
+
+#ifdef CONFIG_SMP
+
 /*
  * The flush IPI assumes that a thread switch happens in this order:
  * [cpu0: the cpu that switches]
@@ -353,3 +467,5 @@
 	return 0;
 }
 late_initcall(create_tlb_single_page_flush_ceiling);
+
+#endif /* CONFIG_SMP */

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0e07e09..28c0412 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c

@@ -636,7 +636,7 @@
 	__u8 cpu_model = boot_cpu_data.x86_model;
 	struct op_x86_model_spec *spec = &op_ppro_spec;	/* default */
 
-	if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
+	if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
 		return 0;
 
 	/*
@@ -700,7 +700,7 @@
 	char *cpu_type = NULL;
 	int ret = 0;
 
-	if (!cpu_has_apic)
+	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 
 	if (force_cpu_type == timer)
@@ -761,7 +761,7 @@
 		if (cpu_type)
 			break;
 
-		if (!cpu_has_arch_perfmon)
+		if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
 			return -ENODEV;
 
 		/* use arch perfmon as fallback */

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index d90528e..350f709 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c

@@ -75,7 +75,7 @@
 	u64 val;
 	int i;
 
-	if (cpu_has_arch_perfmon) {
+	if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
 		union cpuid10_eax eax;
 		eax.full = cpuid_eax(0xa);
 

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3cd6983..b2a4e2a 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c

@@ -396,7 +396,6 @@
 		return -ENODEV;
 
 	printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
-	acpi_irq_penalty_init();
 	pcibios_enable_irq = acpi_pci_irq_enable;
 	pcibios_disable_irq = acpi_pci_irq_disable;
 	x86_init.pci.init_irq = x86_init_noop;

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index beac4df..4bd08b0 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c

@@ -445,7 +445,7 @@
 		uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
 
 		if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
-		    ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic))
+		    ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
 			return;
 	}
 

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 994a7df8..f93545e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c

@@ -54,10 +54,6 @@
 #include <asm/rtc.h>
 #include <asm/uv/uv.h>
 
-#define EFI_DEBUG
-
-struct efi_memory_map memmap;
-
 static struct efi efi_phys __initdata;
 static efi_system_table_t efi_systab __initdata;
 
@@ -119,11 +115,10 @@
 
 void __init efi_find_mirror(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 	u64 mirror_size = 0, total_size = 0;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
 
@@ -146,10 +141,9 @@
 
 static void __init do_add_efi_memmap(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
 		int e820_type;
@@ -209,47 +203,47 @@
 #else
 	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
 #endif
-	memmap.phys_map		= pmap;
-	memmap.nr_map		= e->efi_memmap_size /
+	efi.memmap.phys_map	= pmap;
+	efi.memmap.nr_map	= e->efi_memmap_size /
 				  e->efi_memdesc_size;
-	memmap.desc_size	= e->efi_memdesc_size;
-	memmap.desc_version	= e->efi_memdesc_version;
+	efi.memmap.desc_size	= e->efi_memdesc_size;
+	efi.memmap.desc_version	= e->efi_memdesc_version;
 
-	memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+	WARN(efi.memmap.desc_version != 1,
+	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+	     efi.memmap.desc_version);
 
-	efi.memmap = &memmap;
+	memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
 
 	return 0;
 }
 
 void __init efi_print_memmap(void)
 {
-#ifdef EFI_DEBUG
 	efi_memory_desc_t *md;
-	void *p;
-	int i;
+	int i = 0;
 
-	for (p = memmap.map, i = 0;
-	     p < memmap.map_end;
-	     p += memmap.desc_size, i++) {
+	for_each_efi_memory_desc(md) {
 		char buf[64];
 
-		md = p;
 		pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
-			i, efi_md_typeattr_format(buf, sizeof(buf), md),
+			i++, efi_md_typeattr_format(buf, sizeof(buf), md),
 			md->phys_addr,
 			md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
 			(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
 	}
-#endif  /*  EFI_DEBUG  */
 }
 
 void __init efi_unmap_memmap(void)
 {
+	unsigned long size;
+
 	clear_bit(EFI_MEMMAP, &efi.flags);
-	if (memmap.map) {
-		early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size);
-		memmap.map = NULL;
+
+	size = efi.memmap.nr_map * efi.memmap.desc_size;
+	if (efi.memmap.map) {
+		early_memunmap(efi.memmap.map, size);
+		efi.memmap.map = NULL;
 	}
 }
 
@@ -352,8 +346,6 @@
 		       efi.systab->hdr.revision >> 16,
 		       efi.systab->hdr.revision & 0xffff);
 
-	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
 	return 0;
 }
 
@@ -440,17 +432,22 @@
 
 static int __init efi_memmap_init(void)
 {
+	unsigned long addr, size;
+
 	if (efi_enabled(EFI_PARAVIRT))
 		return 0;
 
 	/* Map the EFI memory map */
-	memmap.map = early_memremap((unsigned long)memmap.phys_map,
-				   memmap.nr_map * memmap.desc_size);
-	if (memmap.map == NULL) {
+	size = efi.memmap.nr_map * efi.memmap.desc_size;
+	addr = (unsigned long)efi.memmap.phys_map;
+
+	efi.memmap.map = early_memremap(addr, size);
+	if (efi.memmap.map == NULL) {
 		pr_err("Could not map the memory map!\n");
 		return -ENOMEM;
 	}
-	memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
+	efi.memmap.map_end = efi.memmap.map + size;
 
 	if (add_efi_memmap)
 		do_add_efi_memmap();
@@ -552,12 +549,9 @@
 void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
-	void *p;
 
 	/* Make EFI runtime service code area executable */
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
-
+	for_each_efi_memory_desc(md) {
 		if (md->type != EFI_RUNTIME_SERVICES_CODE)
 			continue;
 
@@ -604,12 +598,10 @@
 /* Merge contiguous regions of the same type and attribute */
 static void __init efi_merge_regions(void)
 {
-	void *p;
 	efi_memory_desc_t *md, *prev_md = NULL;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+	for_each_efi_memory_desc(md) {
 		u64 prev_size;
-		md = p;
 
 		if (!prev_md) {
 			prev_md = md;
@@ -651,30 +643,31 @@
 static void __init save_runtime_map(void)
 {
 #ifdef CONFIG_KEXEC_CORE
+	unsigned long desc_size;
 	efi_memory_desc_t *md;
-	void *tmp, *p, *q = NULL;
+	void *tmp, *q = NULL;
 	int count = 0;
 
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	desc_size = efi.memmap.desc_size;
 
+	for_each_efi_memory_desc(md) {
 		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
 		    (md->type == EFI_BOOT_SERVICES_CODE) ||
 		    (md->type == EFI_BOOT_SERVICES_DATA))
 			continue;
-		tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
+		tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
 		if (!tmp)
 			goto out;
 		q = tmp;
 
-		memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
+		memcpy(q + count * desc_size, md, desc_size);
 		count++;
 	}
 
-	efi_runtime_map_setup(q, count, memmap.desc_size);
+	efi_runtime_map_setup(q, count, desc_size);
 	return;
 
 out:
@@ -714,10 +707,10 @@
 {
 	/* Initial call */
 	if (!entry)
-		return memmap.map_end - memmap.desc_size;
+		return efi.memmap.map_end - efi.memmap.desc_size;
 
-	entry -= memmap.desc_size;
-	if (entry < memmap.map)
+	entry -= efi.memmap.desc_size;
+	if (entry < efi.memmap.map)
 		return NULL;
 
 	return entry;
@@ -759,10 +752,10 @@
 
 	/* Initial call */
 	if (!entry)
-		return memmap.map;
+		return efi.memmap.map;
 
-	entry += memmap.desc_size;
-	if (entry >= memmap.map_end)
+	entry += efi.memmap.desc_size;
+	if (entry >= efi.memmap.map_end)
 		return NULL;
 
 	return entry;
@@ -776,8 +769,11 @@
 {
 	void *p, *new_memmap = NULL;
 	unsigned long left = 0;
+	unsigned long desc_size;
 	efi_memory_desc_t *md;
 
+	desc_size = efi.memmap.desc_size;
+
 	p = NULL;
 	while ((p = efi_map_next_entry(p))) {
 		md = p;
@@ -792,7 +788,7 @@
 		efi_map_region(md);
 		get_systab_virt_addr(md);
 
-		if (left < memmap.desc_size) {
+		if (left < desc_size) {
 			new_memmap = realloc_pages(new_memmap, *pg_shift);
 			if (!new_memmap)
 				return NULL;
@@ -801,10 +797,9 @@
 			(*pg_shift)++;
 		}
 
-		memcpy(new_memmap + (*count * memmap.desc_size), md,
-		       memmap.desc_size);
+		memcpy(new_memmap + (*count * desc_size), md, desc_size);
 
-		left -= memmap.desc_size;
+		left -= desc_size;
 		(*count)++;
 	}
 
@@ -816,7 +811,6 @@
 #ifdef CONFIG_KEXEC_CORE
 	efi_memory_desc_t *md;
 	unsigned int num_pages;
-	void *p;
 
 	efi.systab = NULL;
 
@@ -840,8 +834,7 @@
 	* Map efi regions which were passed via setup_data. The virt_addr is a
 	* fixed addr which was used in first kernel of a kexec boot.
 	*/
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		efi_map_region_fixed(md); /* FIXME: add error handling */
 		get_systab_virt_addr(md);
 	}
@@ -850,10 +843,10 @@
 
 	BUG_ON(!efi.systab);
 
-	num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE);
+	num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
 	num_pages >>= PAGE_SHIFT;
 
-	if (efi_setup_page_tables(memmap.phys_map, num_pages)) {
+	if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
 	}
@@ -937,16 +930,16 @@
 
 	if (efi_is_native()) {
 		status = phys_efi_set_virtual_address_map(
-				memmap.desc_size * count,
-				memmap.desc_size,
-				memmap.desc_version,
+				efi.memmap.desc_size * count,
+				efi.memmap.desc_size,
+				efi.memmap.desc_version,
 				(efi_memory_desc_t *)__pa(new_memmap));
 	} else {
 		status = efi_thunk_set_virtual_address_map(
 				efi_phys.set_virtual_address_map,
-				memmap.desc_size * count,
-				memmap.desc_size,
-				memmap.desc_version,
+				efi.memmap.desc_size * count,
+				efi.memmap.desc_size,
+				efi.memmap.desc_version,
 				(efi_memory_desc_t *)__pa(new_memmap));
 	}
 
@@ -1011,13 +1004,11 @@
 u32 efi_mem_type(unsigned long phys_addr)
 {
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (!efi_enabled(EFI_MEMMAP))
 		return 0;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		if ((md->phys_addr <= phys_addr) &&
 		    (phys_addr < (md->phys_addr +
 				  (md->num_pages << EFI_PAGE_SHIFT))))

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 49e4dd4..6e7242b 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c

@@ -55,14 +55,12 @@
 static void __init early_code_mapping_set_exec(int executable)
 {
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (!(__supported_pte_mask & _PAGE_NX))
 		return;
 
 	/* Make EFI service code area executable */
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		if (md->type == EFI_RUNTIME_SERVICES_CODE ||
 		    md->type == EFI_BOOT_SERVICES_CODE)
 			efi_set_executable(md, executable);
@@ -253,7 +251,7 @@
 	 * Map all of RAM so that we can access arguments in the 1:1
 	 * mapping when making EFI runtime calls.
 	 */
-	for_each_efi_memory_desc(&memmap, md) {
+	for_each_efi_memory_desc(md) {
 		if (md->type != EFI_CONVENTIONAL_MEMORY &&
 		    md->type != EFI_LOADER_DATA &&
 		    md->type != EFI_LOADER_CODE)
@@ -398,7 +396,6 @@
 	unsigned long pfn;
 	pgd_t *pgd = efi_pgd;
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (efi_enabled(EFI_OLD_MEMMAP)) {
 		if (__supported_pte_mask & _PAGE_NX)
@@ -409,9 +406,8 @@
 	if (!efi_enabled(EFI_NX_PE_DATA))
 		return;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+	for_each_efi_memory_desc(md) {
 		unsigned long pf = 0;
-		md = p;
 
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index ab50ada..4480c06 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c

@@ -195,10 +195,9 @@
 */
 void __init efi_reserve_boot_services(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		u64 start = md->phys_addr;
 		u64 size = md->num_pages << EFI_PAGE_SHIFT;
 		bool already_reserved;
@@ -250,10 +249,9 @@
 
 void __init efi_free_boot_services(void)
 {
-	void *p;
+	efi_memory_desc_t *md;
 
-	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
-		efi_memory_desc_t *md = p;
+	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
 
@@ -373,5 +371,5 @@
 
 bool efi_poweroff_required(void)
 {
-	return !!acpi_gbl_reduced_hardware;
+	return acpi_gbl_reduced_hardware || acpi_no_s5;
 }

diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 1584cbe..815fec6 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c

@@ -21,19 +21,20 @@
 
 #include <linux/efi.h>
 #include <linux/export.h>
+#include <linux/slab.h>
 #include <asm/efi.h>
 #include <linux/io.h>
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
-static struct uv_systab uv_systab;
+struct uv_systab *uv_systab;
 
 s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-	struct uv_systab *tab = &uv_systab;
+	struct uv_systab *tab = uv_systab;
 	s64 ret;
 
-	if (!tab->function)
+	if (!tab || !tab->function)
 		/*
 		 * BIOS does not support UV systab
 		 */
@@ -183,34 +184,31 @@
 }
 EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 
-
 #ifdef CONFIG_EFI
 void uv_bios_init(void)
 {
-	struct uv_systab *tab;
-
-	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
-	    (efi.uv_systab == (unsigned long)NULL)) {
-		printk(KERN_CRIT "No EFI UV System Table.\n");
-		uv_systab.function = (unsigned long)NULL;
+	uv_systab = NULL;
+	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+		pr_crit("UV: UVsystab: missing\n");
 		return;
 	}
 
-	tab = (struct uv_systab *)ioremap(efi.uv_systab,
-					sizeof(struct uv_systab));
-	if (strncmp(tab->signature, "UVST", 4) != 0)
-		printk(KERN_ERR "bad signature in UV system table!");
+	uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab));
+	if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
+		pr_err("UV: UVsystab: bad signature!\n");
+		iounmap(uv_systab);
+		return;
+	}
 
-	/*
-	 * Copy table to permanent spot for later use.
-	 */
-	memcpy(&uv_systab, tab, sizeof(struct uv_systab));
-	iounmap(tab);
-
-	printk(KERN_INFO "EFI UV System Table Revision %d\n",
-					uv_systab.revision);
+	if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+		iounmap(uv_systab);
+		uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+		if (!uv_systab) {
+			pr_err("UV: UVsystab: ioremap(%d) failed!\n",
+				uv_systab->size);
+			return;
+		}
+	}
+	pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
 }
-#else	/* !CONFIG_EFI */
-
-void uv_bios_init(void) { }
 #endif

diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 3b6ec42..fdb4d42 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c

@@ -37,7 +37,7 @@
 };
 
 static int timeout_us;
-static int nobau;
+static bool nobau = true;
 static int nobau_perm;
 static cycles_t congested_cycles;
 
@@ -106,13 +106,28 @@
 	"enable:   number times use of the BAU was re-enabled"
 };
 
-static int __init
-setup_nobau(char *arg)
+static int __init setup_bau(char *arg)
 {
-	nobau = 1;
+	int result;
+
+	if (!arg)
+		return -EINVAL;
+
+	result = strtobool(arg, &nobau);
+	if (result)
+		return result;
+
+	/* we need to flip the logic here, so that bau=y sets nobau to false */
+	nobau = !nobau;
+
+	if (!nobau)
+		pr_info("UV BAU Enabled\n");
+	else
+		pr_info("UV BAU Disabled\n");
+
 	return 0;
 }
-early_param("nobau", setup_nobau);
+early_param("bau", setup_bau);
 
 /* base pnode in this partition */
 static int uv_base_pnode __read_mostly;
@@ -131,10 +146,10 @@
 		pr_info("BAU not initialized; cannot be turned on\n");
 		return;
 	}
-	nobau = 0;
+	nobau = false;
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
-		bcp->nobau = 0;
+		bcp->nobau = false;
 	}
 	pr_info("BAU turned on\n");
 	return;
@@ -146,10 +161,10 @@
 	int cpu;
 	struct bau_control *bcp;
 
-	nobau = 1;
+	nobau = true;
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
-		bcp->nobau = 1;
+		bcp->nobau = true;
 	}
 	pr_info("BAU turned off\n");
 	return;
@@ -1886,7 +1901,7 @@
 		bcp = &per_cpu(bau_control, cpu);
 		bcp->baudisabled		= 0;
 		if (nobau)
-			bcp->nobau		= 1;
+			bcp->nobau		= true;
 		bcp->statp			= &per_cpu(ptcstats, cpu);
 		/* time interval to catch a hardware stay-busy bug */
 		bcp->timeout_interval		= usec_2_cycles(2*timeout_us);
@@ -2025,7 +2040,8 @@
 			return 1;
 		}
 		bcp->uvhub_master = *hmasterp;
-		bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+		bcp->uvhub_cpu = uv_cpu_blade_processor_id(cpu);
+
 		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
 			printk(KERN_EMERG "%d cpus per uvhub invalid\n",
 				bcp->uvhub_cpu);

diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index 5d4ba30..e9da9eb 100644
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c

@@ -34,7 +34,7 @@
 static ssize_t coherence_id_show(struct kobject *kobj,
 			struct kobj_attribute *attr, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+	return snprintf(buf, PAGE_SIZE, "%ld\n", uv_partition_coherence_id());
 }
 
 static struct kobj_attribute partition_id_attr =

diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 2b158a9..b333fc4 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c

@@ -165,7 +165,7 @@
 	for_each_present_cpu(cpu) {
 		int nid = cpu_to_node(cpu);
 		int bid = uv_cpu_to_blade_id(cpu);
-		int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+		int bcpu = uv_cpu_blade_processor_id(cpu);
 		struct uv_rtc_timer_head *head = blade_info[bid];
 
 		if (!head) {
@@ -226,7 +226,7 @@
 	int pnode = uv_cpu_to_pnode(cpu);
 	int bid = uv_cpu_to_blade_id(cpu);
 	struct uv_rtc_timer_head *head = blade_info[bid];
-	int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+	int bcpu = uv_cpu_blade_processor_id(cpu);
 	u64 *t = &head->cpu[bcpu].expires;
 	unsigned long flags;
 	int next_cpu;
@@ -262,7 +262,7 @@
 	int pnode = uv_cpu_to_pnode(cpu);
 	int bid = uv_cpu_to_blade_id(cpu);
 	struct uv_rtc_timer_head *head = blade_info[bid];
-	int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+	int bcpu = uv_cpu_blade_processor_id(cpu);
 	u64 *t = &head->cpu[bcpu].expires;
 	unsigned long flags;
 	int rc = 0;

diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 291226b..9f14bd3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c

@@ -106,7 +106,7 @@
 			 * normal page tables.
 			 * NOTE: We can mark everything as executable here
 			 */
-			if (cpu_has_pse) {
+			if (boot_cpu_has(X86_FEATURE_PSE)) {
 				set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
 				pfn += PTRS_PER_PTE;
 			} else {

diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index df280da..d957d5f 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig

@@ -1,4 +1,4 @@
-config AMD_MCE_INJ
+config MCE_AMD_INJ
 	tristate "Simple MCE injection interface for AMD processors"
 	depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
 	default n

diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
index dd2c98b..5f94546 100644
--- a/arch/x86/ras/Makefile
+++ b/arch/x86/ras/Makefile

@@ -1,2 +1,2 @@
-obj-$(CONFIG_AMD_MCE_INJ)		+= mce_amd_inj.o
+obj-$(CONFIG_MCE_AMD_INJ)		+= mce_amd_inj.o
 

diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 9e02dca..e69f470 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c

@@ -290,14 +290,33 @@
 	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
 		     (u32)mcg_status, (u32)(mcg_status >> 32));
 
-	wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
-		     (u32)i_mce.status, (u32)(i_mce.status >> 32));
+	if (boot_cpu_has(X86_FEATURE_SMCA)) {
+		if (inj_type == DFR_INT_INJ) {
+			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
+				     (u32)i_mce.status, (u32)(i_mce.status >> 32));
 
-	wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
-		     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
+				     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+		} else {
+			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
+				     (u32)i_mce.status, (u32)(i_mce.status >> 32));
 
-	wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
-		     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+			wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
+				     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+		}
+
+		wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
+			     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+	} else {
+		wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
+			     (u32)i_mce.status, (u32)(i_mce.status >> 32));
+
+		wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
+			     (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+
+		wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
+			     (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+	}
 
 	toggle_hw_mce_inject(cpu, false);
 

diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh
deleted file mode 100644
index 1a4c17b..0000000
--- a/arch/x86/tools/calc_run_size.sh
+++ /dev/null

@@ -1,42 +0,0 @@
-#!/bin/sh
-#
-# Calculate the amount of space needed to run the kernel, including room for
-# the .bss and .brk sections.
-#
-# Usage:
-# objdump -h a.out | sh calc_run_size.sh
-
-NUM='\([0-9a-fA-F]*[ \t]*\)'
-OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p')
-if [ -z "$OUT" ] ; then
-	echo "Never found .bss or .brk file offset" >&2
-	exit 1
-fi
-
-OUT=$(echo ${OUT# })
-sizeA=$(printf "%d" 0x${OUT%% *})
-OUT=${OUT#* }
-offsetA=$(printf "%d" 0x${OUT%% *})
-OUT=${OUT#* }
-sizeB=$(printf "%d" 0x${OUT%% *})
-OUT=${OUT#* }
-offsetB=$(printf "%d" 0x${OUT%% *})
-
-run_size=$(( $offsetA + $sizeA + $sizeB ))
-
-# BFD linker shows the same file offset in ELF.
-if [ "$offsetA" -ne "$offsetB" ] ; then
-	# Gold linker shows them as consecutive.
-	endB=$(( $offsetB + $sizeB ))
-	if [ "$endB" != "$run_size" ] ; then
-		printf "sizeA: 0x%x\n" $sizeA >&2
-		printf "offsetA: 0x%x\n" $offsetA >&2
-		printf "sizeB: 0x%x\n" $sizeB >&2
-		printf "offsetB: 0x%x\n" $offsetB >&2
-		echo ".bss and .brk are non-contiguous" >&2
-		exit 1
-	fi
-fi
-
-printf "%d\n" $run_size
-exit 0

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 880862c..760789a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c

@@ -75,7 +75,6 @@
 #include <asm/mach_traps.h>
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
-#include <asm/pat.h>
 #include <asm/cpu.h>
 
 #ifdef CONFIG_ACPI
@@ -1093,6 +1092,26 @@
 	return ret;
 }
 
+static u64 xen_read_msr(unsigned int msr)
+{
+	/*
+	 * This will silently swallow a #GP from RDMSR.  It may be worth
+	 * changing that.
+	 */
+	int err;
+
+	return xen_read_msr_safe(msr, &err);
+}
+
+static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+{
+	/*
+	 * This will silently swallow a #GP from WRMSR.  It may be worth
+	 * changing that.
+	 */
+	xen_write_msr_safe(msr, low, high);
+}
+
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1187,13 +1206,11 @@
 }
 
 static const struct pv_info xen_info __initconst = {
-	.paravirt_enabled = 1,
 	.shared_kernel_pmd = 0,
 
 #ifdef CONFIG_X86_64
 	.extra_user_64bit_cs = FLAT_USER_CS64,
 #endif
-	.features = 0,
 	.name = "Xen",
 };
 
@@ -1223,8 +1240,11 @@
 
 	.wbinvd = native_wbinvd,
 
-	.read_msr = xen_read_msr_safe,
-	.write_msr = xen_write_msr_safe,
+	.read_msr = xen_read_msr,
+	.write_msr = xen_write_msr,
+
+	.read_msr_safe = xen_read_msr_safe,
+	.write_msr_safe = xen_write_msr_safe,
 
 	.read_pmc = xen_read_pmc,
 
@@ -1469,10 +1489,10 @@
 	 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
 	 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
 	*/
-	if (cpu_has_pse)
+	if (boot_cpu_has(X86_FEATURE_PSE))
 		cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
-	if (cpu_has_pge)
+	if (boot_cpu_has(X86_FEATURE_PGE))
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
 }
 
@@ -1506,12 +1526,16 @@
 }
 #endif    /* CONFIG_XEN_PVH */
 
+static void __init xen_dom0_set_legacy_features(void)
+{
+	x86_platform.legacy.rtc = 1;
+}
+
 /* First C function to be called on Xen boot */
 asmlinkage __visible void __init xen_start_kernel(void)
 {
 	struct physdev_set_iopl set_iopl;
 	unsigned long initrd_start = 0;
-	u64 pat;
 	int rc;
 
 	if (!xen_start_info)
@@ -1527,8 +1551,6 @@
 
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
-	if (xen_initial_domain())
-		pv_info.features |= PV_SUPPORTED_RTC;
 	pv_init_ops = xen_init_ops;
 	if (!xen_pvh_domain()) {
 		pv_cpu_ops = xen_cpu_ops;
@@ -1618,13 +1640,6 @@
 				   xen_start_info->nr_pages);
 	xen_reserve_special_pages();
 
-	/*
-	 * Modify the cache mode translation tables to match Xen's PAT
-	 * configuration.
-	 */
-	rdmsrl(MSR_IA32_CR_PAT, pat);
-	pat_init_cache_modes(pat);
-
 	/* keep using Xen gdt for now; no urgent need to change it */
 
 #ifdef CONFIG_X86_32
@@ -1670,6 +1685,7 @@
 	boot_params.hdr.ramdisk_image = initrd_start;
 	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
 	boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
+	boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
 
 	if (!xen_initial_domain()) {
 		add_preferred_console("xenboot", 0, NULL);
@@ -1687,6 +1703,8 @@
 			.u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
 		};
 
+		x86_platform.set_legacy_features =
+				xen_dom0_set_legacy_features;
 		xen_init_vga(info, xen_start_info->console.dom0.info_size);
 		xen_start_info->console.domU.mfn = 0;
 		xen_start_info->console.domU.evtchn = 0;

diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index b56855a..28cf4c5 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild

@@ -22,6 +22,7 @@
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += resource.h
+generic-y += rwsem.h
 generic-y += sections.h
 generic-y += siginfo.h
 generic-y += statfs.h

diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
deleted file mode 100644
index 249619e..0000000
--- a/arch/xtensa/include/asm/rwsem.h
+++ /dev/null

@@ -1,131 +0,0 @@
-/*
- * include/asm-xtensa/rwsem.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Largely copied from include/asm-ppc/rwsem.h
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_RWSEM_H
-#define _XTENSA_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (atomic_add_return(1,(atomic_t *)(&sem->count)) > 0)
-		smp_wmb();
-	else
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			smp_wmb();
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				(atomic_t *)(&sem->count));
-	if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
-		smp_wmb();
-	else
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	smp_wmb();
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_sub_return(1,(atomic_t *)(&sem->count));
-	if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	smp_wmb();
-	if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-			      (atomic_t *)(&sem->count)) < 0)
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
-	if (tmp < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	smp_mb();
-	return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif	/* _XTENSA_RWSEM_H */

diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index 54f0118..a6b00b3 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c

@@ -332,14 +332,14 @@
 void perf_callchain_kernel(struct perf_callchain_entry *entry,
 			   struct pt_regs *regs)
 {
-	xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+	xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
 				callchain_trace, NULL, entry);
 }
 
 void perf_callchain_user(struct perf_callchain_entry *entry,
 			 struct pt_regs *regs)
 {
-	xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+	xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
 			      callchain_trace, entry);
 }
 

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 82b96ee..b7e2e77 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig

@@ -5,10 +5,10 @@
 menuconfig ACPI
 	bool "ACPI (Advanced Configuration and Power Interface) Support"
 	depends on !IA64_HP_SIM
-	depends on IA64 || X86 || (ARM64 && EXPERT)
+	depends on IA64 || X86 || ARM64
 	depends on PCI
 	select PNP
-	default y
+	default y if (IA64 || X86)
 	help
 	  Advanced Configuration and Power Interface (ACPI) support for 
 	  Linux requires an ACPI-compliant platform (hardware/firmware),
@@ -311,12 +311,12 @@
 	bool
 	default ACPI_CUSTOM_DSDT_FILE != ""
 
-config ACPI_INITRD_TABLE_OVERRIDE
-	bool "ACPI tables override via initrd"
+config ACPI_TABLE_UPGRADE
+	bool "Allow upgrading ACPI tables via initrd"
 	depends on BLK_DEV_INITRD && X86
-	default n
+	default y
 	help
-	  This option provides functionality to override arbitrary ACPI tables
+	  This option provides functionality to upgrade arbitrary ACPI tables
 	  via initrd. No functional change if no ACPI tables are passed via
 	  initrd, therefore it's safe to say Y.
 	  See Documentation/acpi/initrd_table_override.txt for details

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index edeb2d1..251ce85 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile

@@ -18,7 +18,7 @@
 					acpica/
 
 # All the builtin files are in the "acpi." module_param namespace.
-acpi-y				+= osl.o utils.o reboot.o
+acpi-y				+= osi.o osl.o utils.o reboot.o
 acpi-y				+= nvs.o
 
 # Power management related files
@@ -47,6 +47,7 @@
 acpi-y				+= int340x_thermal.o
 acpi-y				+= power.o
 acpi-y				+= event.o
+acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
 acpi-y				+= sysfs.o
 acpi-y				+= property.o
 acpi-$(CONFIG_X86)		+= acpi_cmos_rtc.o

diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
index 2a61b54..7f77c07 100644
--- a/drivers/acpi/acpi_amba.c
+++ b/drivers/acpi/acpi_amba.c

@@ -35,8 +35,7 @@
 	if (amba_dummy_clk)
 		return;
 
-	amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL,
-						CLK_IS_ROOT, 0);
+	amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0);
 	clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
 }
 

diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index f245bf3..1daf9c4 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c

@@ -62,8 +62,7 @@
 	if (dev_desc->fixed_clk_rate) {
 		clk = clk_register_fixed_rate(&pdata->adev->dev,
 					dev_name(&pdata->adev->dev),
-					NULL, CLK_IS_ROOT,
-					dev_desc->fixed_clk_rate);
+					NULL, 0, dev_desc->fixed_clk_rate);
 		clk_register_clkdev(clk, NULL, dev_name(&pdata->adev->dev));
 		pdata->clk = clk;
 	}

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 4361bc9..3d5b8a0 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c

@@ -191,19 +191,6 @@
 	u8 _DDC:1;		/* Return the EDID for this device */
 };
 
-struct acpi_video_brightness_flags {
-	u8 _BCL_no_ac_battery_levels:1;	/* no AC/Battery levels in _BCL */
-	u8 _BCL_reversed:1;		/* _BCL package is in a reversed order */
-	u8 _BQC_use_index:1;		/* _BQC returns an index value */
-};
-
-struct acpi_video_device_brightness {
-	int curr;
-	int count;
-	int *levels;
-	struct acpi_video_brightness_flags flags;
-};
-
 struct acpi_video_device {
 	unsigned long device_id;
 	struct acpi_video_device_flags flags;
@@ -325,7 +312,7 @@
  */
 
 static int
-acpi_video_device_lcd_query_levels(struct acpi_video_device *device,
+acpi_video_device_lcd_query_levels(acpi_handle handle,
 				   union acpi_object **levels)
 {
 	int status;
@@ -335,7 +322,7 @@
 
 	*levels = NULL;
 
-	status = acpi_evaluate_object(device->dev->handle, "_BCL", NULL, &buffer);
+	status = acpi_evaluate_object(handle, "_BCL", NULL, &buffer);
 	if (!ACPI_SUCCESS(status))
 		return status;
 	obj = (union acpi_object *)buffer.pointer;
@@ -766,36 +753,28 @@
 	return 0;
 }
 
-
-/*
- *  Arg:
- *	device	: video output device (LCD, CRT, ..)
- *
- *  Return Value:
- *	Maximum brightness level
- *
- *  Allocate and initialize device->brightness.
- */
-
-static int
-acpi_video_init_brightness(struct acpi_video_device *device)
+int acpi_video_get_levels(struct acpi_device *device,
+			  struct acpi_video_device_brightness **dev_br)
 {
 	union acpi_object *obj = NULL;
 	int i, max_level = 0, count = 0, level_ac_battery = 0;
-	unsigned long long level, level_old;
 	union acpi_object *o;
 	struct acpi_video_device_brightness *br = NULL;
-	int result = -EINVAL;
+	int result = 0;
 	u32 value;
 
-	if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) {
+	if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device->handle,
+								&obj))) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available "
 						"LCD brightness level\n"));
+		result = -ENODEV;
 		goto out;
 	}
 
-	if (obj->package.count < 2)
+	if (obj->package.count < 2) {
+		result = -EINVAL;
 		goto out;
+	}
 
 	br = kzalloc(sizeof(*br), GFP_KERNEL);
 	if (!br) {
@@ -861,6 +840,38 @@
 			    "Found unordered _BCL package"));
 
 	br->count = count;
+	*dev_br = br;
+
+out:
+	kfree(obj);
+	return result;
+out_free:
+	kfree(br);
+	goto out;
+}
+EXPORT_SYMBOL(acpi_video_get_levels);
+
+/*
+ *  Arg:
+ *	device	: video output device (LCD, CRT, ..)
+ *
+ *  Return Value:
+ *	Maximum brightness level
+ *
+ *  Allocate and initialize device->brightness.
+ */
+
+static int
+acpi_video_init_brightness(struct acpi_video_device *device)
+{
+	int i, max_level = 0;
+	unsigned long long level, level_old;
+	struct acpi_video_device_brightness *br = NULL;
+	int result = -EINVAL;
+
+	result = acpi_video_get_levels(device->dev, &br);
+	if (result)
+		return result;
 	device->brightness = br;
 
 	/* _BQC uses INDEX while _BCL uses VALUE in some laptops */
@@ -903,17 +914,13 @@
 		goto out_free_levels;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-			  "found %d brightness levels\n", count - 2));
-	kfree(obj);
-	return result;
+			  "found %d brightness levels\n", br->count - 2));
+	return 0;
 
 out_free_levels:
 	kfree(br->levels);
-out_free:
 	kfree(br);
-out:
 	device->brightness = NULL;
-	kfree(obj);
 	return result;
 }
 

diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index f682374..227bb7b 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile

@@ -43,6 +43,7 @@
 	evxfregn.o
 
 acpi-y +=		\
+	exconcat.o	\
 	exconfig.o	\
 	exconvrt.o	\
 	excreate.o	\
@@ -149,6 +150,7 @@
 acpi-y +=		\
 	utaddress.o	\
 	utalloc.o	\
+	utascii.o	\
 	utbuffer.o	\
 	utcopy.o	\
 	utexcep.o	\

diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 993af9e..f6404ea 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h

@@ -53,7 +53,7 @@
 #define ACPI_DEBUG_BUFFER_SIZE  0x4000	/* 16K buffer for return objects */
 
 struct acpi_db_command_info {
-	char *name;		/* Command Name */
+	const char *name;	/* Command Name */
 	u8 min_args;		/* Minimum arguments required */
 };
 
@@ -64,7 +64,7 @@
 };
 
 struct acpi_db_argument_info {
-	char *name;		/* Argument Name */
+	const char *name;	/* Argument Name */
 };
 
 struct acpi_db_execute_walk {
@@ -196,7 +196,7 @@
 							     acpi_walk_state
 							     *walk_state))
 
- acpi_status acpi_db_display_all_methods(char *display_count_arg);
+acpi_status acpi_db_display_all_methods(char *display_count_arg);
 
 void acpi_db_display_arguments(void);
 
@@ -220,7 +220,7 @@
  * dbexec - debugger control method execution
  */
 void
-acpi_db_execute(char *name, char **args, acpi_object_type * types, u32 flags);
+acpi_db_execute(char *name, char **args, acpi_object_type *types, u32 flags);
 
 void
 acpi_db_create_execution_threads(char *num_threads_arg,
@@ -271,7 +271,7 @@
 acpi_status acpi_db_user_commands(void);
 
 char *acpi_db_get_next_token(char *string,
-			     char **next, acpi_object_type * return_type);
+			     char **next, acpi_object_type *return_type);
 
 /*
  * dbobject

diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 010cf81..77af91c 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h

@@ -72,6 +72,7 @@
 ACPI_HW_DEPENDENT_RETURN_OK(acpi_status
 			    acpi_ev_acquire_global_lock(u16 timeout))
 ACPI_HW_DEPENDENT_RETURN_OK(acpi_status acpi_ev_release_global_lock(void))
+
 acpi_status acpi_ev_remove_global_lock_handler(void);
 
 /*
@@ -198,8 +199,6 @@
 acpi_ev_detach_region(union acpi_operand_object *region_obj,
 		      u8 acpi_ns_is_locked);
 
-void acpi_ev_associate_reg_method(union acpi_operand_object *region_obj);
-
 void
 acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
 			    acpi_adr_space_type space_id, u32 function);

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 51b073b..fded776 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h

@@ -187,6 +187,8 @@
 extern const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS];
 extern const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS];
 extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS];
+extern const char acpi_gbl_lower_hex_digits[];
+extern const char acpi_gbl_upper_hex_digits[];
 extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES];
 
 #ifdef ACPI_DBG_TRACK_ALLOCATIONS
@@ -361,6 +363,15 @@
 
 #endif				/* ACPI_DEBUGGER */
 
+#if defined (ACPI_DISASSEMBLER) || defined (ACPI_ASL_COMPILER)
+
+ACPI_GLOBAL(const char, *acpi_gbl_pld_panel_list[]);
+ACPI_GLOBAL(const char, *acpi_gbl_pld_vertical_position_list[]);
+ACPI_GLOBAL(const char, *acpi_gbl_pld_horizontal_position_list[]);
+ACPI_GLOBAL(const char, *acpi_gbl_pld_shape_list[]);
+
+#endif
+
 /*****************************************************************************
  *
  * Application globals

diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index bae1a35..7ead235 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h

@@ -67,7 +67,7 @@
 typedef const struct acpi_exdump_info {
 	u8 opcode;
 	u8 offset;
-	char *name;
+	const char *name;
 
 } acpi_exdump_info;
 
@@ -370,7 +370,7 @@
 acpi_status
 acpi_ex_resolve_multiple(struct acpi_walk_state *walk_state,
 			 union acpi_operand_object *operand,
-			 acpi_object_type * return_type,
+			 acpi_object_type *return_type,
 			 union acpi_operand_object **return_desc);
 
 /*

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 9562a10..13331d7 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h

@@ -278,7 +278,7 @@
 };
 
 typedef
-acpi_status(*acpi_internal_method) (struct acpi_walk_state * walk_state);
+acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state);
 
 /*
  * Bitmapped ACPI types. Used internally only
@@ -395,11 +395,12 @@
 
 /* Return object auto-repair info */
 
-typedef acpi_status(*acpi_object_converter) (struct acpi_namespace_node * scope,
-					     union acpi_operand_object
-					     *original_object,
-					     union acpi_operand_object
-					     **converted_object);
+typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node *
+					      scope,
+					      union acpi_operand_object *
+					      original_object,
+					      union acpi_operand_object **
+					      converted_object);
 
 struct acpi_simple_repair_info {
 	char name[ACPI_NAME_SIZE];
@@ -539,10 +540,10 @@
 	struct acpi_namespace_node *gpe_device;
 };
 
-typedef acpi_status(*acpi_gpe_callback) (struct acpi_gpe_xrupt_info *
-					 gpe_xrupt_info,
-					 struct acpi_gpe_block_info *gpe_block,
-					 void *context);
+typedef acpi_status (*acpi_gpe_callback) (struct acpi_gpe_xrupt_info *
+					  gpe_xrupt_info,
+					  struct acpi_gpe_block_info *
+					  gpe_block, void *context);
 
 /* Information about each particular fixed event */
 
@@ -657,10 +658,11 @@
 };
 
 typedef
-acpi_status(*acpi_parse_downwards) (struct acpi_walk_state * walk_state,
-				    union acpi_parse_object ** out_op);
+acpi_status (*acpi_parse_downwards) (struct acpi_walk_state * walk_state,
+				     union acpi_parse_object ** out_op);
 
-typedef acpi_status(*acpi_parse_upwards) (struct acpi_walk_state * walk_state);
+typedef
+acpi_status (*acpi_parse_upwards) (struct acpi_walk_state * walk_state);
 
 /* Global handlers for AML Notifies */
 
@@ -700,7 +702,8 @@
  *
  ****************************************************************************/
 
-typedef acpi_status(*acpi_execute_op) (struct acpi_walk_state * walk_state);
+typedef
+acpi_status (*acpi_execute_op) (struct acpi_walk_state * walk_state);
 
 /* Address Range info block */
 
@@ -853,24 +856,24 @@
 
 /* Parse object flags */
 
-#define ACPI_PARSEOP_GENERIC            0x01
-#define ACPI_PARSEOP_NAMED              0x02
-#define ACPI_PARSEOP_DEFERRED           0x04
-#define ACPI_PARSEOP_BYTELIST           0x08
-#define ACPI_PARSEOP_IN_STACK           0x10
-#define ACPI_PARSEOP_TARGET             0x20
-#define ACPI_PARSEOP_IN_CACHE           0x80
+#define ACPI_PARSEOP_GENERIC                0x01
+#define ACPI_PARSEOP_NAMED_OBJECT           0x02
+#define ACPI_PARSEOP_DEFERRED               0x04
+#define ACPI_PARSEOP_BYTELIST               0x08
+#define ACPI_PARSEOP_IN_STACK               0x10
+#define ACPI_PARSEOP_TARGET                 0x20
+#define ACPI_PARSEOP_IN_CACHE               0x80
 
 /* Parse object disasm_flags */
 
-#define ACPI_PARSEOP_IGNORE             0x01
-#define ACPI_PARSEOP_PARAMLIST          0x02
-#define ACPI_PARSEOP_EMPTY_TERMLIST     0x04
-#define ACPI_PARSEOP_PREDEF_CHECKED     0x08
-#define ACPI_PARSEOP_CLOSING_PAREN      0x10
-#define ACPI_PARSEOP_COMPOUND           0x20
-#define ACPI_PARSEOP_ASSIGNMENT         0x40
-#define ACPI_PARSEOP_ELSEIF             0x80
+#define ACPI_PARSEOP_IGNORE                 0x01
+#define ACPI_PARSEOP_PARAMETER_LIST         0x02
+#define ACPI_PARSEOP_EMPTY_TERMLIST         0x04
+#define ACPI_PARSEOP_PREDEFINED_CHECKED     0x08
+#define ACPI_PARSEOP_CLOSING_PAREN          0x10
+#define ACPI_PARSEOP_COMPOUND_ASSIGNMENT    0x20
+#define ACPI_PARSEOP_ASSIGNMENT             0x40
+#define ACPI_PARSEOP_ELSEIF                 0x80
 
 /*****************************************************************************
  *
@@ -1096,6 +1099,7 @@
 #define ACPI_EXT_ORIGIN_FROM_FILE           0x02	/* External came from a file */
 #define ACPI_EXT_INTERNAL_PATH_ALLOCATED    0x04	/* Deallocate internal path on completion */
 #define ACPI_EXT_EXTERNAL_EMITTED           0x08	/* External() statement has been emitted */
+#define ACPI_EXT_ORIGIN_FROM_OPCODE         0x10	/* External came from a External() opcode */
 
 struct acpi_external_file {
 	char *path;

diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index 411c18b..a3b9543 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h

@@ -260,14 +260,31 @@
 
 #define ACPI_IS_MISALIGNED(value)           (((acpi_size) value) & (sizeof(acpi_size)-1))
 
+/* Generic (power-of-two) rounding */
+
+#define ACPI_IS_ALIGNED(a, s)               (((a) & ((s) - 1)) == 0)
+#define ACPI_IS_POWER_OF_TWO(a)             ACPI_IS_ALIGNED(a, a)
+
 /*
  * Bitmask creation
  * Bit positions start at zero.
  * MASK_BITS_ABOVE creates a mask starting AT the position and above
  * MASK_BITS_BELOW creates a mask starting one bit BELOW the position
+ * MASK_BITS_ABOVE/BELOW accpets a bit offset to create a mask
+ * MASK_BITS_ABOVE/BELOW_32/64 accpets a bit width to create a mask
+ * Note: The ACPI_INTEGER_BIT_SIZE check is used to bypass compiler
+ * differences with the shift operator
  */
 #define ACPI_MASK_BITS_ABOVE(position)      (~((ACPI_UINT64_MAX) << ((u32) (position))))
 #define ACPI_MASK_BITS_BELOW(position)      ((ACPI_UINT64_MAX) << ((u32) (position)))
+#define ACPI_MASK_BITS_ABOVE_32(width)      ((u32) ACPI_MASK_BITS_ABOVE(width))
+#define ACPI_MASK_BITS_BELOW_32(width)      ((u32) ACPI_MASK_BITS_BELOW(width))
+#define ACPI_MASK_BITS_ABOVE_64(width)      ((width) == ACPI_INTEGER_BIT_SIZE ? \
+												ACPI_UINT64_MAX : \
+												ACPI_MASK_BITS_ABOVE(width))
+#define ACPI_MASK_BITS_BELOW_64(width)      ((width) == ACPI_INTEGER_BIT_SIZE ? \
+												(u64) 0 : \
+												ACPI_MASK_BITS_BELOW(width))
 
 /* Bitfields within ACPI registers */
 
@@ -283,10 +300,10 @@
 /* Generic bitfield macros and masks */
 
 #define ACPI_GET_BITS(source_ptr, position, mask) \
-	((*source_ptr >> position) & mask)
+	((*(source_ptr) >> (position)) & (mask))
 
 #define ACPI_SET_BITS(target_ptr, position, mask, value) \
-	(*target_ptr |= ((value & mask) << position))
+	(*(target_ptr) |= (((value) & (mask)) << (position)))
 
 #define ACPI_1BIT_MASK      0x00000001
 #define ACPI_2BIT_MASK      0x00000003

diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 022d69c..f33a4ba 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h

@@ -206,9 +206,10 @@
 void acpi_ns_dump_entry(acpi_handle handle, u32 debug_level);
 
 void
-acpi_ns_dump_pathname(acpi_handle handle, char *msg, u32 level, u32 component);
+acpi_ns_dump_pathname(acpi_handle handle,
+		      const char *msg, u32 level, u32 component);
 
-void acpi_ns_print_pathname(u32 num_segments, char *pathname);
+void acpi_ns_print_pathname(u32 num_segments, const char *pathname);
 
 acpi_status
 acpi_ns_dump_one_object(acpi_handle obj_handle,

diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index 7da639d..fc30577 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h

@@ -139,7 +139,7 @@
  */
 const struct acpi_opcode_info *acpi_ps_get_opcode_info(u16 opcode);
 
-char *acpi_ps_get_opcode_name(u16 opcode);
+const char *acpi_ps_get_opcode_name(u16 opcode);
 
 u8 acpi_ps_get_argument_count(u32 op_type);
 

diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 5faeab4..888440b 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h

@@ -129,7 +129,8 @@
 	ACPI_PTYPE2_REV_FIXED = 9,
 	ACPI_PTYPE2_FIX_VAR = 10,
 	ACPI_PTYPE2_VAR_VAR = 11,
-	ACPI_PTYPE2_UUID_PAIR = 12
+	ACPI_PTYPE2_UUID_PAIR = 12,
+	ACPI_PTYPE_CUSTOM = 13
 };
 
 /* Support macros for users of the predefined info table */
@@ -340,7 +341,7 @@
 
 	{{"_BIX", METHOD_0ARGS,
 	  METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}},	/* Fixed-length (16 Int),(4 Str) */
-	PACKAGE_INFO(ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16,
+	PACKAGE_INFO(ACPI_PTYPE_CUSTOM, ACPI_RTYPE_INTEGER, 16,
 		     ACPI_RTYPE_STRING, 4, 0),
 
 	{{"_BLT",
@@ -523,6 +524,9 @@
 	  METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}},	/* Fixed-length (4 Int) */
 	PACKAGE_INFO(ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4, 0, 0, 0),
 
+	{{"_FIT", METHOD_0ARGS,
+	  METHOD_RETURNS(ACPI_RTYPE_BUFFER)}},	/* ACPI 6.0 */
+
 	{{"_FIX", METHOD_0ARGS,
 	  METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}},	/* Variable-length (Ints) */
 	PACKAGE_INFO(ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0, 0, 0, 0),
@@ -1053,6 +1057,12 @@
 	  METHOD_RETURNS(ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING |
 			 ACPI_RTYPE_BUFFER)}},
 
+	{{"_WPC", METHOD_0ARGS,
+	  METHOD_RETURNS(ACPI_RTYPE_INTEGER)}},	/* ACPI 6.1 */
+
+	{{"_WPP", METHOD_0ARGS,
+	  METHOD_RETURNS(ACPI_RTYPE_INTEGER)}},	/* ACPI 6.1 */
+
 	PACKAGE_INFO(0, 0, 0, 0, 0, 0)	/* Table terminator */
 };
 #else

diff --git a/drivers/acpi/acpica/acresrc.h b/drivers/acpi/acpica/acresrc.h
index 5dd58be..63da1e3 100644
--- a/drivers/acpi/acpica/acresrc.h
+++ b/drivers/acpi/acpica/acresrc.h

@@ -124,7 +124,7 @@
 typedef const struct acpi_rsdump_info {
 	u8 opcode;
 	u8 offset;
-	char *name;
+	const char *name;
 	const char **pointer;
 
 } acpi_rsdump_info;
@@ -209,7 +209,7 @@
 
 acpi_status
 acpi_rs_get_method_data(acpi_handle handle,
-			char *path, struct acpi_buffer *ret_buffer);
+			const char *path, struct acpi_buffer *ret_buffer);
 
 acpi_status
 acpi_rs_set_srs_method_data(struct acpi_namespace_node *node,
@@ -223,16 +223,16 @@
  * rscalc
  */
 acpi_status
-acpi_rs_get_list_length(u8 * aml_buffer,
-			u32 aml_buffer_length, acpi_size * size_needed);
+acpi_rs_get_list_length(u8 *aml_buffer,
+			u32 aml_buffer_length, acpi_size *size_needed);
 
 acpi_status
 acpi_rs_get_aml_length(struct acpi_resource *resource_list,
-		       acpi_size resource_list_size, acpi_size * size_needed);
+		       acpi_size resource_list_size, acpi_size *size_needed);
 
 acpi_status
 acpi_rs_get_pci_routing_table_length(union acpi_operand_object *package_object,
-				     acpi_size * buffer_size_needed);
+				     acpi_size *buffer_size_needed);
 
 acpi_status
 acpi_rs_convert_aml_to_resources(u8 * aml,

diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h
index b3b386e..6235642 100644
--- a/drivers/acpi/acpica/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h

@@ -184,7 +184,7 @@
 	/* The first 3 elements are passed by the caller to acpi_ns_evaluate */
 
 	struct acpi_namespace_node *prefix_node;	/* Input: starting node */
-	char *relative_pathname;	/* Input: path relative to prefix_node */
+	const char *relative_pathname;	/* Input: path relative to prefix_node */
 	union acpi_operand_object **parameters;	/* Input: argument list */
 
 	struct acpi_namespace_node *node;	/* Resolved node (prefix_node:relative_pathname) */

diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index 848ad3a..cd5a135 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h

@@ -161,8 +161,6 @@
 
 acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address);
 
-u8 acpi_is_valid_signature(char *signature);
-
 /*
  * tbxfload
  */

diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index e43ab6f..a7dbb2b 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h

@@ -136,16 +136,16 @@
 #define ACPI_SMALL_VARIABLE_LENGTH      3
 
 typedef
-acpi_status(*acpi_walk_aml_callback) (u8 *aml,
-				      u32 length,
-				      u32 offset,
-				      u8 resource_index, void **context);
+acpi_status (*acpi_walk_aml_callback) (u8 *aml,
+				       u32 length,
+				       u32 offset,
+				       u8 resource_index, void **context);
 
 typedef
-acpi_status(*acpi_pkg_callback) (u8 object_type,
-				 union acpi_operand_object *source_object,
-				 union acpi_generic_state * state,
-				 void *context);
+acpi_status (*acpi_pkg_callback) (u8 object_type,
+				  union acpi_operand_object * source_object,
+				  union acpi_generic_state * state,
+				  void *context);
 
 struct acpi_pkg_info {
 	u8 *free_space;
@@ -167,6 +167,15 @@
 #define DB_QWORD_DISPLAY    8
 
 /*
+ * utascii - ASCII utilities
+ */
+u8 acpi_ut_valid_nameseg(char *signature);
+
+u8 acpi_ut_valid_name_char(char character, u32 position);
+
+void acpi_ut_check_and_repair_ascii(u8 *name, char *repaired_name, u32 count);
+
+/*
  * utnonansi - Non-ANSI C library functions
  */
 void acpi_ut_strupr(char *src_string);
@@ -175,7 +184,14 @@
 
 int acpi_ut_stricmp(char *string1, char *string2);
 
-acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer);
+acpi_status
+acpi_ut_strtoul64(char *string,
+		  u32 base, u32 max_integer_byte_width, u64 *ret_integer);
+
+/* Values for max_integer_byte_width above */
+
+#define ACPI_MAX32_BYTE_WIDTH       4
+#define ACPI_MAX64_BYTE_WIDTH       8
 
 /*
  * utglobal - Global data structures and procedures
@@ -266,7 +282,8 @@
 void
 acpi_ut_trace_ptr(u32 line_number,
 		  const char *function_name,
-		  const char *module_name, u32 component_id, void *pointer);
+		  const char *module_name,
+		  u32 component_id, const void *pointer);
 
 void
 acpi_ut_trace_u32(u32 line_number,
@@ -276,7 +293,8 @@
 void
 acpi_ut_trace_str(u32 line_number,
 		  const char *function_name,
-		  const char *module_name, u32 component_id, char *string);
+		  const char *module_name,
+		  u32 component_id, const char *string);
 
 void
 acpi_ut_exit(u32 line_number,
@@ -335,12 +353,12 @@
  */
 acpi_status
 acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
-			char *path,
+			const char *path,
 			u32 expected_return_btypes,
 			union acpi_operand_object **return_desc);
 
 acpi_status
-acpi_ut_evaluate_numeric_object(char *object_name,
+acpi_ut_evaluate_numeric_object(const char *object_name,
 				struct acpi_namespace_node *device_node,
 				u64 *value);
 
@@ -415,7 +433,7 @@
 union acpi_operand_object *acpi_ut_create_string_object(acpi_size string_size);
 
 acpi_status
-acpi_ut_get_object_size(union acpi_operand_object *obj, acpi_size * obj_length);
+acpi_ut_get_object_size(union acpi_operand_object *obj, acpi_size *obj_length);
 
 /*
  * utosi - Support for the _OSI predefined control method
@@ -526,15 +544,15 @@
 void
 acpi_ut_display_init_pathname(u8 type,
 			      struct acpi_namespace_node *obj_handle,
-			      char *path);
+			      const char *path);
 #endif
 
 /*
  * utownerid - Support for Table/Method Owner IDs
  */
-acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id);
+acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id);
 
-void acpi_ut_release_owner_id(acpi_owner_id * owner_id);
+void acpi_ut_release_owner_id(acpi_owner_id *owner_id);
 
 /*
  * utresrc
@@ -570,10 +588,6 @@
 void ut_convert_backslashes(char *pathname);
 #endif
 
-u8 acpi_ut_valid_acpi_name(char *name);
-
-u8 acpi_ut_valid_acpi_char(char character, u32 position);
-
 void acpi_ut_repair_name(char *name);
 
 #if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION)
@@ -628,7 +642,7 @@
 void acpi_ut_dump_allocations(u32 component, const char *module);
 
 acpi_status
-acpi_ut_create_list(char *list_name,
+acpi_ut_create_list(const char *list_name,
 		    u16 object_size, struct acpi_memory_list **return_cache);
 
 #endif				/* ACPI_DBG_TRACK_ALLOCATIONS */

diff --git a/drivers/acpi/acpica/dbcmds.c b/drivers/acpi/acpica/dbcmds.c
index 772178c..62bd446 100644
--- a/drivers/acpi/acpica/dbcmds.c
+++ b/drivers/acpi/acpica/dbcmds.c

@@ -738,9 +738,9 @@
 	original_aml = return_buffer.pointer;
 
 	acpi_dm_compare_aml_resources(original_aml->buffer.pointer,
-				      (acpi_rsdesc_size) original_aml->buffer.
+				      (acpi_rsdesc_size)original_aml->buffer.
 				      length, new_aml.pointer,
-				      (acpi_rsdesc_size) new_aml.length);
+				      (acpi_rsdesc_size)new_aml.length);
 
 	/* Cleanup and exit */
 

diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c
index 68f4e0f4..7cd07b2 100644
--- a/drivers/acpi/acpica/dbconvert.c
+++ b/drivers/acpi/acpica/dbconvert.c

@@ -194,7 +194,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_db_convert_to_package(char *string, union acpi_object * object)
+acpi_status acpi_db_convert_to_package(char *string, union acpi_object *object)
 {
 	char *this;
 	char *next;
@@ -252,7 +252,7 @@
 
 acpi_status
 acpi_db_convert_to_object(acpi_object_type type,
-			  char *string, union acpi_object * object)
+			  char *string, union acpi_object *object)
 {
 	acpi_status status = AE_OK;
 
@@ -277,7 +277,9 @@
 	default:
 
 		object->type = ACPI_TYPE_INTEGER;
-		status = acpi_ut_strtoul64(string, 16, &object->integer.value);
+		status =
+		    acpi_ut_strtoul64(string, 16, acpi_gbl_integer_byte_width,
+				      &object->integer.value);
 		break;
 	}
 

diff --git a/drivers/acpi/acpica/dbexec.c b/drivers/acpi/acpica/dbexec.c
index c814855..12df291 100644
--- a/drivers/acpi/acpica/dbexec.c
+++ b/drivers/acpi/acpica/dbexec.c

@@ -361,7 +361,7 @@
  ******************************************************************************/
 
 void
-acpi_db_execute(char *name, char **args, acpi_object_type * types, u32 flags)
+acpi_db_execute(char *name, char **args, acpi_object_type *types, u32 flags)
 {
 	acpi_status status;
 	struct acpi_buffer return_obj;

diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 417c02a..7cd5d2e 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c

@@ -57,12 +57,12 @@
 
 static u32 acpi_db_match_command(char *user_command);
 
-static void acpi_db_display_command_info(char *command, u8 display_all);
+static void acpi_db_display_command_info(const char *command, u8 display_all);
 
 static void acpi_db_display_help(char *command);
 
 static u8
-acpi_db_match_command_help(char *command,
+acpi_db_match_command_help(const char *command,
 			   const struct acpi_db_command_help *help);
 
 /*
@@ -348,7 +348,7 @@
  ******************************************************************************/
 
 static u8
-acpi_db_match_command_help(char *command,
+acpi_db_match_command_help(const char *command,
 			   const struct acpi_db_command_help *help)
 {
 	char *invocation = help->invocation;
@@ -402,7 +402,7 @@
  *
  ******************************************************************************/
 
-static void acpi_db_display_command_info(char *command, u8 display_all)
+static void acpi_db_display_command_info(const char *command, u8 display_all)
 {
 	const struct acpi_db_command_help *next;
 	u8 matched;
@@ -466,7 +466,7 @@
  ******************************************************************************/
 
 char *acpi_db_get_next_token(char *string,
-			     char **next, acpi_object_type * return_type)
+			     char **next, acpi_object_type *return_type)
 {
 	char *start;
 	u32 depth;
@@ -656,8 +656,9 @@
 	}
 
 	for (i = CMD_FIRST_VALID; acpi_gbl_db_commands[i].name; i++) {
-		if (strstr(acpi_gbl_db_commands[i].name, user_command) ==
-		    acpi_gbl_db_commands[i].name) {
+		if (strstr
+		    (ACPI_CAST_PTR(char, acpi_gbl_db_commands[i].name),
+		     user_command) == acpi_gbl_db_commands[i].name) {
 			return (i);
 		}
 	}
@@ -683,8 +684,8 @@
 
 acpi_status
 acpi_db_command_dispatch(char *input_buffer,
-			 struct acpi_walk_state * walk_state,
-			 union acpi_parse_object * op)
+			 struct acpi_walk_state *walk_state,
+			 union acpi_parse_object *op)
 {
 	u32 temp;
 	u32 command_index;

diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index 3c23b5a..8667f14 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c

@@ -285,7 +285,7 @@
 	u32 max_depth = ACPI_UINT32_MAX;
 	acpi_owner_id owner_id;
 
-	owner_id = (acpi_owner_id) strtoul(owner_arg, NULL, 0);
+	owner_id = (acpi_owner_id)strtoul(owner_arg, NULL, 0);
 
 	/* Now we can check for the depth argument */
 
@@ -709,7 +709,7 @@
 		return (AE_OK);
 	}
 
-	if (!acpi_ut_valid_acpi_name(node->name.ascii)) {
+	if (!acpi_ut_valid_nameseg(node->name.ascii)) {
 		acpi_os_printf("Invalid AcpiName for Node %p\n", node);
 		return (AE_OK);
 	}

diff --git a/drivers/acpi/acpica/dbutils.c b/drivers/acpi/acpica/dbutils.c
index b37a2c7..ae80106 100644
--- a/drivers/acpi/acpica/dbutils.c
+++ b/drivers/acpi/acpica/dbutils.c

@@ -56,8 +56,6 @@
 void acpi_db_dump_buffer(u32 address);
 #endif
 
-static char *gbl_hex_to_ascii = "0123456789ABCDEF";
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_db_match_argument
@@ -82,8 +80,9 @@
 	}
 
 	for (i = 0; arguments[i].name; i++) {
-		if (strstr(arguments[i].name, user_argument) ==
-		    arguments[i].name) {
+		if (strstr(ACPI_CAST_PTR(char, arguments[i].name),
+			   ACPI_CAST_PTR(char,
+					 user_argument)) == arguments[i].name) {
 			return (i);
 		}
 	}
@@ -339,7 +338,7 @@
 	buffer[8] = '\0';
 
 	for (i = 7; i >= 0; i--) {
-		buffer[i] = gbl_hex_to_ascii[value & 0x0F];
+		buffer[i] = acpi_gbl_upper_hex_digits[value & 0x0F];
 		value = value >> 4;
 	}
 }

diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index e94e0d8..124db23 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c

@@ -162,8 +162,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_db_single_step(struct acpi_walk_state * walk_state,
-		    union acpi_parse_object * op, u32 opcode_class)
+acpi_db_single_step(struct acpi_walk_state *walk_state,
+		    union acpi_parse_object *op, u32 opcode_class)
 {
 	union acpi_parse_object *next;
 	acpi_status status = AE_OK;

diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index c9a663f..4ddcbf1 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c

@@ -163,8 +163,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ds_exec_end_control_op(struct acpi_walk_state * walk_state,
-			    union acpi_parse_object * op)
+acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state,
+			    union acpi_parse_object *op)
 {
 	acpi_status status = AE_OK;
 	union acpi_generic_state *control_state;

diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index 5aa1c5f..f1e6dcc 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c

@@ -188,7 +188,7 @@
 
 acpi_status
 acpi_ds_initialize_objects(u32 table_index,
-			   struct acpi_namespace_node * start_node)
+			   struct acpi_namespace_node *start_node)
 {
 	acpi_status status;
 	struct acpi_init_walk_info info;

diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index da198b8..47c7b52 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c

@@ -209,7 +209,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ds_method_error(acpi_status status, struct acpi_walk_state * walk_state)
+acpi_ds_method_error(acpi_status status, struct acpi_walk_state *walk_state)
 {
 	u32 aml_offset;
 

diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index 8ca9416..f393de9 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c

@@ -569,7 +569,7 @@
 					/* TBD: May only be temporary */
 
 					obj_desc =
-					    acpi_ut_create_string_object((acpi_size) name_length);
+					    acpi_ut_create_string_object((acpi_size)name_length);
 
 					strncpy(obj_desc->string.pointer,
 						name_string, name_length);

diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index d1cedcf..fd34040 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c

@@ -137,8 +137,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ds_load1_begin_op(struct acpi_walk_state * walk_state,
-		       union acpi_parse_object ** out_op)
+acpi_ds_load1_begin_op(struct acpi_walk_state *walk_state,
+		       union acpi_parse_object **out_op)
 {
 	union acpi_parse_object *op;
 	struct acpi_namespace_node *node;

diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 0bac6e1..762db3f 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c

@@ -490,8 +490,8 @@
 
 			status =
 			    acpi_ds_create_index_field(op,
-						       (acpi_handle) arg->
-						       common.node, walk_state);
+						       (acpi_handle)arg->common.
+						       node, walk_state);
 			break;
 
 		case AML_BANK_FIELD_OP:

diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
index 3a26ddb..e333869 100644
--- a/drivers/acpi/acpica/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c

@@ -143,8 +143,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ds_result_push(union acpi_operand_object * object,
-		    struct acpi_walk_state * walk_state)
+acpi_ds_result_push(union acpi_operand_object *object,
+		    struct acpi_walk_state *walk_state)
 {
 	union acpi_generic_state *state;
 	acpi_status status;
@@ -307,7 +307,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ds_obj_stack_push(void *object, struct acpi_walk_state * walk_state)
+acpi_ds_obj_stack_push(void *object, struct acpi_walk_state *walk_state)
 {
 	ACPI_FUNCTION_NAME(ds_obj_stack_push);
 
@@ -354,7 +354,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ds_obj_stack_pop(u32 pop_count, struct acpi_walk_state * walk_state)
+acpi_ds_obj_stack_pop(u32 pop_count, struct acpi_walk_state *walk_state)
 {
 	u32 i;
 
@@ -411,7 +411,7 @@
 		return;
 	}
 
-	for (i = (s32) pop_count - 1; i >= 0; i--) {
+	for (i = (s32)pop_count - 1; i >= 0; i--) {
 		if (walk_state->num_operands == 0) {
 			return;
 		}

diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index b47e62aaf..4b4949c 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c

@@ -440,7 +440,7 @@
 
 				gpe_event_info =
 				    &gpe_block->
-				    event_info[((acpi_size) i *
+				    event_info[((acpi_size)i *
 						ACPI_GPE_REGISTER_WIDTH) + j];
 				gpe_number =
 				    j + gpe_register_info->base_gpe_number;
@@ -652,7 +652,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info * gpe_event_info)
+acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info *gpe_event_info)
 {
 	acpi_status status;
 

diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 447fa1c..d54014c 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c

@@ -211,7 +211,7 @@
 
 	/* Allocate the GPE register information block */
 
-	gpe_register_info = ACPI_ALLOCATE_ZEROED((acpi_size) gpe_block->
+	gpe_register_info = ACPI_ALLOCATE_ZEROED((acpi_size)gpe_block->
 						 register_count *
 						 sizeof(struct
 							acpi_gpe_register_info));
@@ -225,7 +225,7 @@
 	 * Allocate the GPE event_info block. There are eight distinct GPEs
 	 * per register. Initialization to zeros is sufficient.
 	 */
-	gpe_event_info = ACPI_ALLOCATE_ZEROED((acpi_size) gpe_block->gpe_count *
+	gpe_event_info = ACPI_ALLOCATE_ZEROED((acpi_size)gpe_block->gpe_count *
 					      sizeof(struct
 						     acpi_gpe_event_info));
 	if (!gpe_event_info) {

diff --git a/drivers/acpi/acpica/evgpeutil.c b/drivers/acpi/acpica/evgpeutil.c
index 66c4b5b..3f150d5 100644
--- a/drivers/acpi/acpica/evgpeutil.c
+++ b/drivers/acpi/acpica/evgpeutil.c

@@ -163,7 +163,7 @@
 
 acpi_status
 acpi_ev_get_gpe_xrupt_block(u32 interrupt_number,
-			    struct acpi_gpe_xrupt_info ** gpe_xrupt_block)
+			    struct acpi_gpe_xrupt_info **gpe_xrupt_block)
 {
 	struct acpi_gpe_xrupt_info *next_gpe_xrupt;
 	struct acpi_gpe_xrupt_info *gpe_xrupt;
@@ -320,7 +320,7 @@
 		/* Now look at the individual GPEs in this byte register */
 
 		for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) {
-			gpe_event_info = &gpe_block->event_info[((acpi_size) i *
+			gpe_event_info = &gpe_block->event_info[((acpi_size)i *
 								 ACPI_GPE_REGISTER_WIDTH)
 								+ j];
 

diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c
index 0f6be89..24768ca 100644
--- a/drivers/acpi/acpica/evhandler.c
+++ b/drivers/acpi/acpica/evhandler.c

@@ -359,7 +359,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ev_install_space_handler(struct acpi_namespace_node * node,
+acpi_ev_install_space_handler(struct acpi_namespace_node *node,
 			      acpi_adr_space_type space_id,
 			      acpi_adr_space_handler handler,
 			      acpi_adr_space_setup setup, void *context)

diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index c67d78c..f51d43a 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c

@@ -99,8 +99,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ev_queue_notify_request(struct acpi_namespace_node * node,
-			     u32 notify_value)
+acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value)
 {
 	union acpi_operand_object *obj_desc;
 	union acpi_operand_object *handler_list_head = NULL;

diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index 63924d1..4c6f795 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c

@@ -526,52 +526,6 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ev_associate_reg_method
- *
- * PARAMETERS:  region_obj          - Region object
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Find and associate _REG method to a region
- *
- ******************************************************************************/
-
-void acpi_ev_associate_reg_method(union acpi_operand_object *region_obj)
-{
-	acpi_name *reg_name_ptr = (acpi_name *) METHOD_NAME__REG;
-	struct acpi_namespace_node *method_node;
-	struct acpi_namespace_node *node;
-	union acpi_operand_object *region_obj2;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ev_associate_reg_method);
-
-	region_obj2 = acpi_ns_get_secondary_object(region_obj);
-	if (!region_obj2) {
-		return_VOID;
-	}
-
-	node = region_obj->region.node->parent;
-
-	/* Find any "_REG" method associated with this region definition */
-
-	status =
-	    acpi_ns_search_one_scope(*reg_name_ptr, node, ACPI_TYPE_METHOD,
-				     &method_node);
-	if (ACPI_SUCCESS(status)) {
-		/*
-		 * The _REG method is optional and there can be only one per region
-		 * definition. This will be executed when the handler is attached
-		 * or removed
-		 */
-		region_obj2->extra.method_REG = method_node;
-	}
-
-	return_VOID;
-}
-
-/*******************************************************************************
- *
  * FUNCTION:    acpi_ev_execute_reg_method
  *
  * PARAMETERS:  region_obj          - Region object
@@ -589,18 +543,42 @@
 	struct acpi_evaluate_info *info;
 	union acpi_operand_object *args[3];
 	union acpi_operand_object *region_obj2;
+	const acpi_name *reg_name_ptr =
+	    ACPI_CAST_PTR(acpi_name, METHOD_NAME__REG);
+	struct acpi_namespace_node *method_node;
+	struct acpi_namespace_node *node;
 	acpi_status status;
 
 	ACPI_FUNCTION_TRACE(ev_execute_reg_method);
 
+	if (!acpi_gbl_namespace_initialized ||
+	    region_obj->region.handler == NULL) {
+		return_ACPI_STATUS(AE_OK);
+	}
+
 	region_obj2 = acpi_ns_get_secondary_object(region_obj);
 	if (!region_obj2) {
 		return_ACPI_STATUS(AE_NOT_EXIST);
 	}
 
-	if (region_obj2->extra.method_REG == NULL ||
-	    region_obj->region.handler == NULL ||
-	    !acpi_gbl_namespace_initialized) {
+	/*
+	 * Find any "_REG" method associated with this region definition.
+	 * The method should always be updated as this function may be
+	 * invoked after a namespace change.
+	 */
+	node = region_obj->region.node->parent;
+	status =
+	    acpi_ns_search_one_scope(*reg_name_ptr, node, ACPI_TYPE_METHOD,
+				     &method_node);
+	if (ACPI_SUCCESS(status)) {
+		/*
+		 * The _REG method is optional and there can be only one per
+		 * region definition. This will be executed when the handler is
+		 * attached or removed.
+		 */
+		region_obj2->extra.method_REG = method_node;
+	}
+	if (region_obj2->extra.method_REG == NULL) {
 		return_ACPI_STATUS(AE_OK);
 	}
 

diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index fda869c..b6ea9c0 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c

@@ -227,7 +227,7 @@
 
 				/* Install a handler for this PCI root bridge */
 
-				status = acpi_install_address_space_handler((acpi_handle) pci_root_node, ACPI_ADR_SPACE_PCI_CONFIG, ACPI_DEFAULT_HANDLER, NULL, NULL);
+				status = acpi_install_address_space_handler((acpi_handle)pci_root_node, ACPI_ADR_SPACE_PCI_CONFIG, ACPI_DEFAULT_HANDLER, NULL, NULL);
 				if (ACPI_FAILURE(status)) {
 					if (status == AE_SAME_HANDLER) {
 						/*
@@ -518,7 +518,6 @@
 		return_ACPI_STATUS(AE_OK);
 	}
 
-	acpi_ev_associate_reg_method(region_obj);
 	region_obj->common.flags |= AOPOBJ_OBJECT_INITIALIZED;
 
 	node = region_obj->region.node->parent;

diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 9045671..17cfef7 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c

@@ -917,7 +917,7 @@
  *              the FADT-defined gpe blocks. Otherwise, the GPE block device.
  *
  ******************************************************************************/
-acpi_status acpi_get_gpe_device(u32 index, acpi_handle * gpe_device)
+acpi_status acpi_get_gpe_device(u32 index, acpi_handle *gpe_device)
 {
 	struct acpi_gpe_device_info info;
 	acpi_status status;

diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
new file mode 100644
index 0000000..2423fe0
--- /dev/null
+++ b/drivers/acpi/acpica/exconcat.c

@@ -0,0 +1,439 @@
+/******************************************************************************
+ *
+ * Module Name: exconcat - Concatenate-type AML operators
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acinterp.h"
+#include "amlresrc.h"
+
+#define _COMPONENT          ACPI_EXECUTER
+ACPI_MODULE_NAME("exconcat")
+
+/* Local Prototypes */
+static acpi_status
+acpi_ex_convert_to_object_type_string(union acpi_operand_object *obj_desc,
+				      union acpi_operand_object **result_desc);
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ex_do_concatenate
+ *
+ * PARAMETERS:  operand0            - First source object
+ *              operand1            - Second source object
+ *              actual_return_desc  - Where to place the return object
+ *              walk_state          - Current walk state
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Concatenate two objects with the ACPI-defined conversion
+ *              rules as necessary.
+ * NOTE:
+ * Per the ACPI spec (up to 6.1), Concatenate only supports Integer,
+ * String, and Buffer objects. However, we support all objects here
+ * as an extension. This improves the usefulness of both Concatenate
+ * and the Printf/Fprintf macros. The extension returns a string
+ * describing the object type for the other objects.
+ * 02/2016.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ex_do_concatenate(union acpi_operand_object *operand0,
+		       union acpi_operand_object *operand1,
+		       union acpi_operand_object **actual_return_desc,
+		       struct acpi_walk_state *walk_state)
+{
+	union acpi_operand_object *local_operand0 = operand0;
+	union acpi_operand_object *local_operand1 = operand1;
+	union acpi_operand_object *temp_operand1 = NULL;
+	union acpi_operand_object *return_desc;
+	char *buffer;
+	acpi_object_type operand0_type;
+	acpi_object_type operand1_type;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ex_do_concatenate);
+
+	/* Operand 0 preprocessing */
+
+	switch (operand0->common.type) {
+	case ACPI_TYPE_INTEGER:
+	case ACPI_TYPE_STRING:
+	case ACPI_TYPE_BUFFER:
+
+		operand0_type = operand0->common.type;
+		break;
+
+	default:
+
+		/* For all other types, get the "object type" string */
+
+		status =
+		    acpi_ex_convert_to_object_type_string(operand0,
+							  &local_operand0);
+		if (ACPI_FAILURE(status)) {
+			goto cleanup;
+		}
+
+		operand0_type = ACPI_TYPE_STRING;
+		break;
+	}
+
+	/* Operand 1 preprocessing */
+
+	switch (operand1->common.type) {
+	case ACPI_TYPE_INTEGER:
+	case ACPI_TYPE_STRING:
+	case ACPI_TYPE_BUFFER:
+
+		operand1_type = operand1->common.type;
+		break;
+
+	default:
+
+		/* For all other types, get the "object type" string */
+
+		status =
+		    acpi_ex_convert_to_object_type_string(operand1,
+							  &local_operand1);
+		if (ACPI_FAILURE(status)) {
+			goto cleanup;
+		}
+
+		operand1_type = ACPI_TYPE_STRING;
+		break;
+	}
+
+	/*
+	 * Convert the second operand if necessary. The first operand (0)
+	 * determines the type of the second operand (1) (See the Data Types
+	 * section of the ACPI specification). Both object types are
+	 * guaranteed to be either Integer/String/Buffer by the operand
+	 * resolution mechanism.
+	 */
+	switch (operand0_type) {
+	case ACPI_TYPE_INTEGER:
+
+		status =
+		    acpi_ex_convert_to_integer(local_operand1, &temp_operand1,
+					       16);
+		break;
+
+	case ACPI_TYPE_BUFFER:
+
+		status =
+		    acpi_ex_convert_to_buffer(local_operand1, &temp_operand1);
+		break;
+
+	case ACPI_TYPE_STRING:
+
+		switch (operand1_type) {
+		case ACPI_TYPE_INTEGER:
+		case ACPI_TYPE_STRING:
+		case ACPI_TYPE_BUFFER:
+
+			/* Other types have already been converted to string */
+
+			status =
+			    acpi_ex_convert_to_string(local_operand1,
+						      &temp_operand1,
+						      ACPI_IMPLICIT_CONVERT_HEX);
+			break;
+
+		default:
+
+			status = AE_OK;
+			break;
+		}
+		break;
+
+	default:
+
+		ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
+			    operand0->common.type));
+		status = AE_AML_INTERNAL;
+	}
+
+	if (ACPI_FAILURE(status)) {
+		goto cleanup;
+	}
+
+	/* Take care with any newly created operand objects */
+
+	if ((local_operand1 != operand1) && (local_operand1 != temp_operand1)) {
+		acpi_ut_remove_reference(local_operand1);
+	}
+
+	local_operand1 = temp_operand1;
+
+	/*
+	 * Both operands are now known to be the same object type
+	 * (Both are Integer, String, or Buffer), and we can now perform
+	 * the concatenation.
+	 *
+	 * There are three cases to handle, as per the ACPI spec:
+	 *
+	 * 1) Two Integers concatenated to produce a new Buffer
+	 * 2) Two Strings concatenated to produce a new String
+	 * 3) Two Buffers concatenated to produce a new Buffer
+	 */
+	switch (operand0_type) {
+	case ACPI_TYPE_INTEGER:
+
+		/* Result of two Integers is a Buffer */
+		/* Need enough buffer space for two integers */
+
+		return_desc = acpi_ut_create_buffer_object((acpi_size)
+							   ACPI_MUL_2
+							   (acpi_gbl_integer_byte_width));
+		if (!return_desc) {
+			status = AE_NO_MEMORY;
+			goto cleanup;
+		}
+
+		buffer = (char *)return_desc->buffer.pointer;
+
+		/* Copy the first integer, LSB first */
+
+		memcpy(buffer, &operand0->integer.value,
+		       acpi_gbl_integer_byte_width);
+
+		/* Copy the second integer (LSB first) after the first */
+
+		memcpy(buffer + acpi_gbl_integer_byte_width,
+		       &local_operand1->integer.value,
+		       acpi_gbl_integer_byte_width);
+		break;
+
+	case ACPI_TYPE_STRING:
+
+		/* Result of two Strings is a String */
+
+		return_desc = acpi_ut_create_string_object(((acpi_size)
+							    local_operand0->
+							    string.length +
+							    local_operand1->
+							    string.length));
+		if (!return_desc) {
+			status = AE_NO_MEMORY;
+			goto cleanup;
+		}
+
+		buffer = return_desc->string.pointer;
+
+		/* Concatenate the strings */
+
+		strcpy(buffer, local_operand0->string.pointer);
+		strcat(buffer, local_operand1->string.pointer);
+		break;
+
+	case ACPI_TYPE_BUFFER:
+
+		/* Result of two Buffers is a Buffer */
+
+		return_desc = acpi_ut_create_buffer_object(((acpi_size)
+							    operand0->buffer.
+							    length +
+							    local_operand1->
+							    buffer.length));
+		if (!return_desc) {
+			status = AE_NO_MEMORY;
+			goto cleanup;
+		}
+
+		buffer = (char *)return_desc->buffer.pointer;
+
+		/* Concatenate the buffers */
+
+		memcpy(buffer, operand0->buffer.pointer,
+		       operand0->buffer.length);
+		memcpy(buffer + operand0->buffer.length,
+		       local_operand1->buffer.pointer,
+		       local_operand1->buffer.length);
+		break;
+
+	default:
+
+		/* Invalid object type, should not happen here */
+
+		ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
+			    operand0->common.type));
+		status = AE_AML_INTERNAL;
+		goto cleanup;
+	}
+
+	*actual_return_desc = return_desc;
+
+cleanup:
+	if (local_operand0 != operand0) {
+		acpi_ut_remove_reference(local_operand0);
+	}
+
+	if (local_operand1 != operand1) {
+		acpi_ut_remove_reference(local_operand1);
+	}
+
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ex_convert_to_object_type_string
+ *
+ * PARAMETERS:  obj_desc            - Object to be converted
+ *              return_desc         - Where to place the return object
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Convert an object of arbitrary type to a string object that
+ *              contains the namestring for the object. Used for the
+ *              concatenate operator.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_ex_convert_to_object_type_string(union acpi_operand_object *obj_desc,
+				      union acpi_operand_object **result_desc)
+{
+	union acpi_operand_object *return_desc;
+	const char *type_string;
+
+	type_string = acpi_ut_get_type_name(obj_desc->common.type);
+
+	return_desc = acpi_ut_create_string_object(((acpi_size)strlen(type_string) + 9));	/* 9 For "[ Object]" */
+	if (!return_desc) {
+		return (AE_NO_MEMORY);
+	}
+
+	strcpy(return_desc->string.pointer, "[");
+	strcat(return_desc->string.pointer, type_string);
+	strcat(return_desc->string.pointer, " Object]");
+
+	*result_desc = return_desc;
+	return (AE_OK);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ex_concat_template
+ *
+ * PARAMETERS:  operand0            - First source object
+ *              operand1            - Second source object
+ *              actual_return_desc  - Where to place the return object
+ *              walk_state          - Current walk state
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Concatenate two resource templates
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ex_concat_template(union acpi_operand_object *operand0,
+			union acpi_operand_object *operand1,
+			union acpi_operand_object **actual_return_desc,
+			struct acpi_walk_state *walk_state)
+{
+	acpi_status status;
+	union acpi_operand_object *return_desc;
+	u8 *new_buf;
+	u8 *end_tag;
+	acpi_size length0;
+	acpi_size length1;
+	acpi_size new_length;
+
+	ACPI_FUNCTION_TRACE(ex_concat_template);
+
+	/*
+	 * Find the end_tag descriptor in each resource template.
+	 * Note1: returned pointers point TO the end_tag, not past it.
+	 * Note2: zero-length buffers are allowed; treated like one end_tag
+	 */
+
+	/* Get the length of the first resource template */
+
+	status = acpi_ut_get_resource_end_tag(operand0, &end_tag);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	length0 = ACPI_PTR_DIFF(end_tag, operand0->buffer.pointer);
+
+	/* Get the length of the second resource template */
+
+	status = acpi_ut_get_resource_end_tag(operand1, &end_tag);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	length1 = ACPI_PTR_DIFF(end_tag, operand1->buffer.pointer);
+
+	/* Combine both lengths, minimum size will be 2 for end_tag */
+
+	new_length = length0 + length1 + sizeof(struct aml_resource_end_tag);
+
+	/* Create a new buffer object for the result (with one end_tag) */
+
+	return_desc = acpi_ut_create_buffer_object(new_length);
+	if (!return_desc) {
+		return_ACPI_STATUS(AE_NO_MEMORY);
+	}
+
+	/*
+	 * Copy the templates to the new buffer, 0 first, then 1 follows. One
+	 * end_tag descriptor is copied from Operand1.
+	 */
+	new_buf = return_desc->buffer.pointer;
+	memcpy(new_buf, operand0->buffer.pointer, length0);
+	memcpy(new_buf + length0, operand1->buffer.pointer, length1);
+
+	/* Insert end_tag and set the checksum to zero, means "ignore checksum" */
+
+	new_buf[new_length - 1] = 0;
+	new_buf[new_length - 2] = ACPI_RESOURCE_NAME_END_TAG | 1;
+
+	/* Return the completed resource template */
+
+	*actual_return_desc = return_desc;
+	return_ACPI_STATUS(AE_OK);
+}

diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index f741613..a1d177d 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c

@@ -118,7 +118,9 @@
 	/* Execute any module-level code that was found in the table */
 
 	acpi_ex_exit_interpreter();
-	acpi_ns_exec_module_code_list();
+	if (acpi_gbl_group_module_level_code) {
+		acpi_ns_exec_module_code_list();
+	}
 	acpi_ex_enter_interpreter();
 
 	/*

diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index 0b9f2c1..b7e9b3d 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c

@@ -124,7 +124,9 @@
 		 * of ACPI 3.0) is that the to_integer() operator allows both decimal
 		 * and hexadecimal strings (hex prefixed with "0x").
 		 */
-		status = acpi_ut_strtoul64((char *)pointer, flags, &result);
+		status = acpi_ut_strtoul64((char *)pointer, flags,
+					   acpi_gbl_integer_byte_width,
+					   &result);
 		if (ACPI_FAILURE(status)) {
 			return_ACPI_STATUS(status);
 		}
@@ -439,7 +441,7 @@
 		 * Need enough space for one ASCII integer (plus null terminator)
 		 */
 		return_desc =
-		    acpi_ut_create_string_object((acpi_size) string_length);
+		    acpi_ut_create_string_object((acpi_size)string_length);
 		if (!return_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
@@ -518,7 +520,7 @@
 		}
 
 		return_desc =
-		    acpi_ut_create_string_object((acpi_size) string_length);
+		    acpi_ut_create_string_object((acpi_size)string_length);
 		if (!return_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}

diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index bea9612..613ba6e 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c

@@ -394,7 +394,7 @@
 	obj_desc->processor.proc_id = (u8) operand[1]->integer.value;
 	obj_desc->processor.length = (u8) operand[3]->integer.value;
 	obj_desc->processor.address =
-	    (acpi_io_address) operand[2]->integer.value;
+	    (acpi_io_address)operand[2]->integer.value;
 
 	/* Install the processor object in the parent Node */
 

diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index ee30974..fce6b2e 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c

@@ -55,9 +55,9 @@
  */
 #if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER)
 /* Local prototypes */
-static void acpi_ex_out_string(char *title, char *value);
+static void acpi_ex_out_string(const char *title, const char *value);
 
-static void acpi_ex_out_pointer(char *title, void *value);
+static void acpi_ex_out_pointer(const char *title, const void *value);
 
 static void
 acpi_ex_dump_object(union acpi_operand_object *obj_desc,
@@ -365,8 +365,7 @@
 		    struct acpi_exdump_info *info)
 {
 	u8 *target;
-	char *name;
-	const char *reference_name;
+	const char *name;
 	u8 count;
 	union acpi_operand_object *start;
 	union acpi_operand_object *data = NULL;
@@ -459,9 +458,9 @@
 
 		case ACPI_EXD_REFERENCE:
 
-			reference_name = acpi_ut_get_reference_name(obj_desc);
 			acpi_ex_out_string("Class Name",
-					   ACPI_CAST_PTR(char, reference_name));
+					   acpi_ut_get_reference_name
+					   (obj_desc));
 			acpi_ex_dump_reference_obj(obj_desc);
 			break;
 
@@ -934,12 +933,12 @@
  *
  ******************************************************************************/
 
-static void acpi_ex_out_string(char *title, char *value)
+static void acpi_ex_out_string(const char *title, const char *value)
 {
 	acpi_os_printf("%20s : %s\n", title, value);
 }
 
-static void acpi_ex_out_pointer(char *title, void *value)
+static void acpi_ex_out_pointer(const char *title, const void *value)
 {
 	acpi_os_printf("%20s : %p\n", title, value);
 }

diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index d5d8020..d7d3ee3 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c

@@ -126,7 +126,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ex_read_data_from_field(struct acpi_walk_state * walk_state,
+acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 			     union acpi_operand_object *obj_desc,
 			     union acpi_operand_object **ret_buffer_desc)
 {
@@ -233,7 +233,7 @@
 	 * Note: Field.length is in bits.
 	 */
 	length =
-	    (acpi_size) ACPI_ROUND_BITS_UP_TO_BYTES(obj_desc->field.bit_length);
+	    (acpi_size)ACPI_ROUND_BITS_UP_TO_BYTES(obj_desc->field.bit_length);
 
 	if (length > acpi_gbl_integer_byte_width) {
 

diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index f0c5ed0..ee76d29 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c

@@ -164,7 +164,7 @@
 			if (ACPI_ROUND_UP(rgn_desc->region.length,
 					  obj_desc->common_field.
 					  access_byte_width) >=
-			    ((acpi_size) obj_desc->common_field.
+			    ((acpi_size)obj_desc->common_field.
 			     base_byte_offset +
 			     obj_desc->common_field.access_byte_width +
 			     field_datum_byte_offset)) {
@@ -897,17 +897,9 @@
 
 	access_bit_width = ACPI_MUL_8(obj_desc->common_field.access_byte_width);
 
-	/*
-	 * Create the bitmasks used for bit insertion.
-	 * Note: This if/else is used to bypass compiler differences with the
-	 * shift operator
-	 */
-	if (access_bit_width == ACPI_INTEGER_BIT_SIZE) {
-		width_mask = ACPI_UINT64_MAX;
-	} else {
-		width_mask = ACPI_MASK_BITS_ABOVE(access_bit_width);
-	}
+	/* Create the bitmasks used for bit insertion */
 
+	width_mask = ACPI_MASK_BITS_ABOVE_64(access_bit_width);
 	mask = width_mask &
 	    ACPI_MASK_BITS_BELOW(obj_desc->common_field.start_field_bit_offset);
 

diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index db30ae4..4f7e667 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c

@@ -45,7 +45,6 @@
 #include "accommon.h"
 #include "acinterp.h"
 #include "amlcode.h"
-#include "amlresrc.h"
 
 #define _COMPONENT          ACPI_EXECUTER
 ACPI_MODULE_NAME("exmisc")
@@ -140,295 +139,6 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ex_concat_template
- *
- * PARAMETERS:  operand0            - First source object
- *              operand1            - Second source object
- *              actual_return_desc  - Where to place the return object
- *              walk_state          - Current walk state
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Concatenate two resource templates
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ex_concat_template(union acpi_operand_object *operand0,
-			union acpi_operand_object *operand1,
-			union acpi_operand_object **actual_return_desc,
-			struct acpi_walk_state *walk_state)
-{
-	acpi_status status;
-	union acpi_operand_object *return_desc;
-	u8 *new_buf;
-	u8 *end_tag;
-	acpi_size length0;
-	acpi_size length1;
-	acpi_size new_length;
-
-	ACPI_FUNCTION_TRACE(ex_concat_template);
-
-	/*
-	 * Find the end_tag descriptor in each resource template.
-	 * Note1: returned pointers point TO the end_tag, not past it.
-	 * Note2: zero-length buffers are allowed; treated like one end_tag
-	 */
-
-	/* Get the length of the first resource template */
-
-	status = acpi_ut_get_resource_end_tag(operand0, &end_tag);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	length0 = ACPI_PTR_DIFF(end_tag, operand0->buffer.pointer);
-
-	/* Get the length of the second resource template */
-
-	status = acpi_ut_get_resource_end_tag(operand1, &end_tag);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	length1 = ACPI_PTR_DIFF(end_tag, operand1->buffer.pointer);
-
-	/* Combine both lengths, minimum size will be 2 for end_tag */
-
-	new_length = length0 + length1 + sizeof(struct aml_resource_end_tag);
-
-	/* Create a new buffer object for the result (with one end_tag) */
-
-	return_desc = acpi_ut_create_buffer_object(new_length);
-	if (!return_desc) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
-	}
-
-	/*
-	 * Copy the templates to the new buffer, 0 first, then 1 follows. One
-	 * end_tag descriptor is copied from Operand1.
-	 */
-	new_buf = return_desc->buffer.pointer;
-	memcpy(new_buf, operand0->buffer.pointer, length0);
-	memcpy(new_buf + length0, operand1->buffer.pointer, length1);
-
-	/* Insert end_tag and set the checksum to zero, means "ignore checksum" */
-
-	new_buf[new_length - 1] = 0;
-	new_buf[new_length - 2] = ACPI_RESOURCE_NAME_END_TAG | 1;
-
-	/* Return the completed resource template */
-
-	*actual_return_desc = return_desc;
-	return_ACPI_STATUS(AE_OK);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ex_do_concatenate
- *
- * PARAMETERS:  operand0            - First source object
- *              operand1            - Second source object
- *              actual_return_desc  - Where to place the return object
- *              walk_state          - Current walk state
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Concatenate two objects OF THE SAME TYPE.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ex_do_concatenate(union acpi_operand_object *operand0,
-		       union acpi_operand_object *operand1,
-		       union acpi_operand_object **actual_return_desc,
-		       struct acpi_walk_state *walk_state)
-{
-	union acpi_operand_object *local_operand1 = operand1;
-	union acpi_operand_object *return_desc;
-	char *new_buf;
-	const char *type_string;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ex_do_concatenate);
-
-	/*
-	 * Convert the second operand if necessary. The first operand
-	 * determines the type of the second operand, (See the Data Types
-	 * section of the ACPI specification.)  Both object types are
-	 * guaranteed to be either Integer/String/Buffer by the operand
-	 * resolution mechanism.
-	 */
-	switch (operand0->common.type) {
-	case ACPI_TYPE_INTEGER:
-
-		status =
-		    acpi_ex_convert_to_integer(operand1, &local_operand1, 16);
-		break;
-
-	case ACPI_TYPE_STRING:
-		/*
-		 * Per the ACPI spec, Concatenate only supports int/str/buf.
-		 * However, we support all objects here as an extension.
-		 * This improves the usefulness of the Printf() macro.
-		 * 12/2015.
-		 */
-		switch (operand1->common.type) {
-		case ACPI_TYPE_INTEGER:
-		case ACPI_TYPE_STRING:
-		case ACPI_TYPE_BUFFER:
-
-			status =
-			    acpi_ex_convert_to_string(operand1, &local_operand1,
-						      ACPI_IMPLICIT_CONVERT_HEX);
-			break;
-
-		default:
-			/*
-			 * Just emit a string containing the object type.
-			 */
-			type_string =
-			    acpi_ut_get_type_name(operand1->common.type);
-
-			local_operand1 = acpi_ut_create_string_object(((acpi_size) strlen(type_string) + 9));	/* 9 For "[Object]" */
-			if (!local_operand1) {
-				status = AE_NO_MEMORY;
-				goto cleanup;
-			}
-
-			strcpy(local_operand1->string.pointer, "[");
-			strcat(local_operand1->string.pointer, type_string);
-			strcat(local_operand1->string.pointer, " Object]");
-			status = AE_OK;
-			break;
-		}
-		break;
-
-	case ACPI_TYPE_BUFFER:
-
-		status = acpi_ex_convert_to_buffer(operand1, &local_operand1);
-		break;
-
-	default:
-
-		ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
-			    operand0->common.type));
-		status = AE_AML_INTERNAL;
-	}
-
-	if (ACPI_FAILURE(status)) {
-		goto cleanup;
-	}
-
-	/*
-	 * Both operands are now known to be the same object type
-	 * (Both are Integer, String, or Buffer), and we can now perform the
-	 * concatenation.
-	 */
-
-	/*
-	 * There are three cases to handle:
-	 *
-	 * 1) Two Integers concatenated to produce a new Buffer
-	 * 2) Two Strings concatenated to produce a new String
-	 * 3) Two Buffers concatenated to produce a new Buffer
-	 */
-	switch (operand0->common.type) {
-	case ACPI_TYPE_INTEGER:
-
-		/* Result of two Integers is a Buffer */
-		/* Need enough buffer space for two integers */
-
-		return_desc = acpi_ut_create_buffer_object((acpi_size)
-							   ACPI_MUL_2
-							   (acpi_gbl_integer_byte_width));
-		if (!return_desc) {
-			status = AE_NO_MEMORY;
-			goto cleanup;
-		}
-
-		new_buf = (char *)return_desc->buffer.pointer;
-
-		/* Copy the first integer, LSB first */
-
-		memcpy(new_buf, &operand0->integer.value,
-		       acpi_gbl_integer_byte_width);
-
-		/* Copy the second integer (LSB first) after the first */
-
-		memcpy(new_buf + acpi_gbl_integer_byte_width,
-		       &local_operand1->integer.value,
-		       acpi_gbl_integer_byte_width);
-		break;
-
-	case ACPI_TYPE_STRING:
-
-		/* Result of two Strings is a String */
-
-		return_desc = acpi_ut_create_string_object(((acpi_size)
-							    operand0->string.
-							    length +
-							    local_operand1->
-							    string.length));
-		if (!return_desc) {
-			status = AE_NO_MEMORY;
-			goto cleanup;
-		}
-
-		new_buf = return_desc->string.pointer;
-
-		/* Concatenate the strings */
-
-		strcpy(new_buf, operand0->string.pointer);
-		strcat(new_buf, local_operand1->string.pointer);
-		break;
-
-	case ACPI_TYPE_BUFFER:
-
-		/* Result of two Buffers is a Buffer */
-
-		return_desc = acpi_ut_create_buffer_object(((acpi_size)
-							    operand0->buffer.
-							    length +
-							    local_operand1->
-							    buffer.length));
-		if (!return_desc) {
-			status = AE_NO_MEMORY;
-			goto cleanup;
-		}
-
-		new_buf = (char *)return_desc->buffer.pointer;
-
-		/* Concatenate the buffers */
-
-		memcpy(new_buf, operand0->buffer.pointer,
-		       operand0->buffer.length);
-		memcpy(new_buf + operand0->buffer.length,
-		       local_operand1->buffer.pointer,
-		       local_operand1->buffer.length);
-		break;
-
-	default:
-
-		/* Invalid object type, should not happen here */
-
-		ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
-			    operand0->common.type));
-		status = AE_AML_INTERNAL;
-		goto cleanup;
-	}
-
-	*actual_return_desc = return_desc;
-
-cleanup:
-	if (local_operand1 != operand1) {
-		acpi_ut_remove_reference(local_operand1);
-	}
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
  * FUNCTION:    acpi_ex_do_math_op
  *
  * PARAMETERS:  opcode              - AML opcode

diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index 27c11ab..3d6af93 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c

@@ -178,7 +178,7 @@
 
 	for (index = 0;
 	     (index < ACPI_NAME_SIZE)
-	     && (acpi_ut_valid_acpi_char(*aml_address, 0)); index++) {
+	     && (acpi_ut_valid_name_char(*aml_address, 0)); index++) {
 		char_buf[index] = *aml_address++;
 		ACPI_DEBUG_PRINT((ACPI_DB_LOAD, "%c\n", char_buf[index]));
 	}

diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index 5aa21c4..69e4e26 100644
--- a/drivers/acpi/acpica/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c

@@ -184,7 +184,7 @@
 		/* Get the Integer values from the objects */
 
 		index = operand[1]->integer.value;
-		length = (acpi_size) operand[2]->integer.value;
+		length = (acpi_size)operand[2]->integer.value;
 
 		/*
 		 * If the index is beyond the length of the String/Buffer, or if the
@@ -198,8 +198,8 @@
 
 		else if ((index + length) > operand[0]->string.length) {
 			length =
-			    (acpi_size) operand[0]->string.length -
-			    (acpi_size) index;
+			    (acpi_size)operand[0]->string.length -
+			    (acpi_size)index;
 		}
 
 		/* Strings always have a sub-pointer, not so for buffers */
@@ -209,7 +209,7 @@
 
 			/* Always allocate a new buffer for the String */
 
-			buffer = ACPI_ALLOCATE_ZEROED((acpi_size) length + 1);
+			buffer = ACPI_ALLOCATE_ZEROED((acpi_size)length + 1);
 			if (!buffer) {
 				status = AE_NO_MEMORY;
 				goto cleanup;

diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index e2b6348..786d53b 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c

@@ -207,7 +207,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_ex_opcode_6A_0T_1R(struct acpi_walk_state * walk_state)
+acpi_status acpi_ex_opcode_6A_0T_1R(struct acpi_walk_state *walk_state)
 {
 	union acpi_operand_object **operand = &walk_state->operands[0];
 	union acpi_operand_object *return_desc = NULL;

diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index 076074d..31b381c 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c

@@ -325,15 +325,15 @@
 	switch (function) {
 	case ACPI_READ:
 
-		status = acpi_hw_read_port((acpi_io_address) address,
+		status = acpi_hw_read_port((acpi_io_address)address,
 					   &value32, bit_width);
 		*value = value32;
 		break;
 
 	case ACPI_WRITE:
 
-		status = acpi_hw_write_port((acpi_io_address) address,
-					    (u32) * value, bit_width);
+		status = acpi_hw_write_port((acpi_io_address)address,
+					    (u32)*value, bit_width);
 		break;
 
 	default:

diff --git a/drivers/acpi/acpica/exresnte.c b/drivers/acpi/acpica/exresnte.c
index c1e8bfb..a183cb7 100644
--- a/drivers/acpi/acpica/exresnte.c
+++ b/drivers/acpi/acpica/exresnte.c

@@ -93,7 +93,7 @@
 	 */
 	node = *object_ptr;
 	source_desc = acpi_ns_get_attached_object(node);
-	entry_type = acpi_ns_get_type((acpi_handle) node);
+	entry_type = acpi_ns_get_type((acpi_handle)node);
 
 	ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Entry=%p SourceDesc=%p [%s]\n",
 			  node, source_desc,
@@ -106,7 +106,7 @@
 
 		node = ACPI_CAST_PTR(struct acpi_namespace_node, node->object);
 		source_desc = acpi_ns_get_attached_object(node);
-		entry_type = acpi_ns_get_type((acpi_handle) node);
+		entry_type = acpi_ns_get_type((acpi_handle)node);
 		*object_ptr = node;
 	}
 

diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index fedacf1..e1d3878 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c

@@ -334,7 +334,7 @@
 acpi_status
 acpi_ex_resolve_multiple(struct acpi_walk_state *walk_state,
 			 union acpi_operand_object *operand,
-			 acpi_object_type * return_type,
+			 acpi_object_type *return_type,
 			 union acpi_operand_object **return_desc)
 {
 	union acpi_operand_object *obj_desc = ACPI_CAST_PTR(void, operand);

diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index cc2c26c..27b41fd 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c

@@ -131,8 +131,8 @@
 
 acpi_status
 acpi_ex_resolve_operands(u16 opcode,
-			 union acpi_operand_object ** stack_ptr,
-			 struct acpi_walk_state * walk_state)
+			 union acpi_operand_object **stack_ptr,
+			 struct acpi_walk_state *walk_state)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status = AE_OK;

diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c
index 28b7248..1dab827 100644
--- a/drivers/acpi/acpica/exstorob.c
+++ b/drivers/acpi/acpica/exstorob.c

@@ -188,7 +188,7 @@
 		 * Clear old string and copy in the new one
 		 */
 		memset(target_desc->string.pointer, 0,
-		       (acpi_size) target_desc->string.length + 1);
+		       (acpi_size)target_desc->string.length + 1);
 		memcpy(target_desc->string.pointer, buffer, length);
 	} else {
 		/*
@@ -204,7 +204,7 @@
 		}
 
 		target_desc->string.pointer =
-		    ACPI_ALLOCATE_ZEROED((acpi_size) length + 1);
+		    ACPI_ALLOCATE_ZEROED((acpi_size)length + 1);
 
 		if (!target_desc->string.pointer) {
 			return_ACPI_STATUS(AE_NO_MEMORY);

diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 4d44bc1..425f133 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c

@@ -301,8 +301,8 @@
  *
  * FUNCTION:    acpi_ex_eisa_id_to_string
  *
- * PARAMETERS:  compressed_id   - EISAID to be converted
- *              out_string      - Where to put the converted string (8 bytes)
+ * PARAMETERS:  out_string      - Where to put the converted string (8 bytes)
+ *              compressed_id   - EISAID to be converted
  *
  * RETURN:      None
  *
@@ -354,7 +354,7 @@
  *                                possible 64-bit integer.
  *              value           - Value to be converted
  *
- * RETURN:      None, string
+ * RETURN:      Converted string in out_string
  *
  * DESCRIPTION: Convert a 64-bit integer to decimal string representation.
  *              Assumes string buffer is large enough to hold the string. The
@@ -384,9 +384,9 @@
  * FUNCTION:    acpi_ex_pci_cls_to_string
  *
  * PARAMETERS:  out_string      - Where to put the converted string (7 bytes)
- * PARAMETERS:  class_code      - PCI class code to be converted (3 bytes)
+ *              class_code      - PCI class code to be converted (3 bytes)
  *
- * RETURN:      None
+ * RETURN:      Converted string in out_string
  *
  * DESCRIPTION: Convert 3-bytes PCI class code to string representation.
  *              Return buffer must be large enough to hold the string. The
@@ -417,7 +417,7 @@
  *
  * PARAMETERS:  space_id            - ID to be validated
  *
- * RETURN:      TRUE if valid/supported ID.
+ * RETURN:      TRUE if space_id is a valid/supported ID.
  *
  * DESCRIPTION: Validate an operation region space_ID.
  *

diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index 1c4f451..bdecd5e 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c

@@ -166,7 +166,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
+acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info *gpe_event_info)
 {
 	struct acpi_gpe_register_info *gpe_register_info;
 	acpi_status status;
@@ -206,7 +206,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
+acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
 		       acpi_event_status *event_status)
 {
 	u32 in_byte;
@@ -391,7 +391,7 @@
 
 acpi_status
 acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
-				 struct acpi_gpe_block_info * gpe_block,
+				 struct acpi_gpe_block_info *gpe_block,
 				 void *context)
 {
 	u32 i;

diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 5ba0498..0f18dbc 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c

@@ -51,6 +51,10 @@
 
 #if (!ACPI_REDUCED_HARDWARE)
 /* Local Prototypes */
+static u8
+acpi_hw_get_access_bit_width(struct acpi_generic_address *reg,
+			     u8 max_bit_width);
+
 static acpi_status
 acpi_hw_read_multiple(u32 *value,
 		      struct acpi_generic_address *register_a,
@@ -65,6 +69,48 @@
 
 /******************************************************************************
  *
+ * FUNCTION:    acpi_hw_get_access_bit_width
+ *
+ * PARAMETERS:  reg                 - GAS register structure
+ *              max_bit_width       - Max bit_width supported (32 or 64)
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Obtain optimal access bit width
+ *
+ ******************************************************************************/
+
+static u8
+acpi_hw_get_access_bit_width(struct acpi_generic_address *reg, u8 max_bit_width)
+{
+	u64 address;
+
+	if (!reg->access_width) {
+		/*
+		 * Detect old register descriptors where only the bit_width field
+		 * makes senses. The target address is copied to handle possible
+		 * alignment issues.
+		 */
+		ACPI_MOVE_64_TO_64(&address, &reg->address);
+		if (!reg->bit_offset && reg->bit_width &&
+		    ACPI_IS_POWER_OF_TWO(reg->bit_width) &&
+		    ACPI_IS_ALIGNED(reg->bit_width, 8) &&
+		    ACPI_IS_ALIGNED(address, reg->bit_width)) {
+			return (reg->bit_width);
+		} else {
+			if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+				return (32);
+			} else {
+				return (max_bit_width);
+			}
+		}
+	} else {
+		return (1 << (reg->access_width + 2));
+	}
+}
+
+/******************************************************************************
+ *
  * FUNCTION:    acpi_hw_validate_register
  *
  * PARAMETERS:  reg                 - GAS register structure
@@ -83,6 +129,8 @@
 acpi_hw_validate_register(struct acpi_generic_address *reg,
 			  u8 max_bit_width, u64 *address)
 {
+	u8 bit_width;
+	u8 access_width;
 
 	/* Must have a valid pointer to a GAS structure */
 
@@ -109,23 +157,25 @@
 		return (AE_SUPPORT);
 	}
 
-	/* Validate the bit_width */
+	/* Validate the access_width */
 
-	if ((reg->bit_width != 8) &&
-	    (reg->bit_width != 16) &&
-	    (reg->bit_width != 32) && (reg->bit_width != max_bit_width)) {
+	if (reg->access_width > 4) {
 		ACPI_ERROR((AE_INFO,
-			    "Unsupported register bit width: 0x%X",
-			    reg->bit_width));
+			    "Unsupported register access width: 0x%X",
+			    reg->access_width));
 		return (AE_SUPPORT);
 	}
 
-	/* Validate the bit_offset. Just a warning for now. */
+	/* Validate the bit_width, convert access_width into number of bits */
 
-	if (reg->bit_offset != 0) {
+	access_width = acpi_hw_get_access_bit_width(reg, max_bit_width);
+	bit_width =
+	    ACPI_ROUND_UP(reg->bit_offset + reg->bit_width, access_width);
+	if (max_bit_width < bit_width) {
 		ACPI_WARNING((AE_INFO,
-			      "Unsupported register bit offset: 0x%X",
-			      reg->bit_offset));
+			      "Requested bit width 0x%X is smaller than register bit width 0x%X",
+			      max_bit_width, bit_width));
+		return (AE_SUPPORT);
 	}
 
 	return (AE_OK);
@@ -145,17 +195,19 @@
  *              64-bit values is not needed.
  *
  * LIMITATIONS: <These limitations also apply to acpi_hw_write>
- *      bit_width must be exactly 8, 16, or 32.
  *      space_ID must be system_memory or system_IO.
- *      bit_offset and access_width are currently ignored, as there has
- *          not been a need to implement these.
  *
  ******************************************************************************/
 
 acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
 {
 	u64 address;
+	u8 access_width;
+	u32 bit_width;
+	u8 bit_offset;
 	u64 value64;
+	u32 value32;
+	u8 index;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(hw_read);
@@ -167,28 +219,75 @@
 		return (status);
 	}
 
-	/* Initialize entire 32-bit return value to zero */
-
+	/*
+	 * Initialize entire 32-bit return value to zero, convert access_width
+	 * into number of bits based
+	 */
 	*value = 0;
+	access_width = acpi_hw_get_access_bit_width(reg, 32);
+	bit_width = reg->bit_offset + reg->bit_width;
+	bit_offset = reg->bit_offset;
 
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-		status = acpi_os_read_memory((acpi_physical_address)
-					     address, &value64, reg->bit_width);
+	index = 0;
+	while (bit_width) {
+		if (bit_offset >= access_width) {
+			value32 = 0;
+			bit_offset -= access_width;
+		} else {
+			if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+				status =
+				    acpi_os_read_memory((acpi_physical_address)
+							address +
+							index *
+							ACPI_DIV_8
+							(access_width),
+							&value64, access_width);
+				value32 = (u32)value64;
+			} else {	/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		*value = (u32)value64;
-	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+				status = acpi_hw_read_port((acpi_io_address)
+							   address +
+							   index *
+							   ACPI_DIV_8
+							   (access_width),
+							   &value32,
+							   access_width);
+			}
 
-		status = acpi_hw_read_port((acpi_io_address)
-					   address, value, reg->bit_width);
+			/*
+			 * Use offset style bit masks because:
+			 * bit_offset < access_width/bit_width < access_width, and
+			 * access_width is ensured to be less than 32-bits by
+			 * acpi_hw_validate_register().
+			 */
+			if (bit_offset) {
+				value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
+				bit_offset = 0;
+			}
+			if (bit_width < access_width) {
+				value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
+			}
+		}
+
+		/*
+		 * Use offset style bit writes because "Index * AccessWidth" is
+		 * ensured to be less than 32-bits by acpi_hw_validate_register().
+		 */
+		ACPI_SET_BITS(value, index * access_width,
+			      ACPI_MASK_BITS_ABOVE_32(access_width), value32);
+
+		bit_width -=
+		    bit_width > access_width ? access_width : bit_width;
+		index++;
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
 			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
-			  *value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  *value, access_width, ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
@@ -212,6 +311,12 @@
 acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 {
 	u64 address;
+	u8 access_width;
+	u32 bit_width;
+	u8 bit_offset;
+	u64 value64;
+	u32 new_value32, old_value32;
+	u8 index;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(hw_write);
@@ -223,23 +328,145 @@
 		return (status);
 	}
 
+	/* Convert access_width into number of bits based */
+
+	access_width = acpi_hw_get_access_bit_width(reg, 32);
+	bit_width = reg->bit_offset + reg->bit_width;
+	bit_offset = reg->bit_offset;
+
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-		status = acpi_os_write_memory((acpi_physical_address)
-					      address, (u64)value,
-					      reg->bit_width);
-	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+	index = 0;
+	while (bit_width) {
+		/*
+		 * Use offset style bit reads because "Index * AccessWidth" is
+		 * ensured to be less than 32-bits by acpi_hw_validate_register().
+		 */
+		new_value32 = ACPI_GET_BITS(&value, index * access_width,
+					    ACPI_MASK_BITS_ABOVE_32
+					    (access_width));
 
-		status = acpi_hw_write_port((acpi_io_address)
-					    address, value, reg->bit_width);
+		if (bit_offset >= access_width) {
+			bit_offset -= access_width;
+		} else {
+			/*
+			 * Use offset style bit masks because access_width is ensured
+			 * to be less than 32-bits by acpi_hw_validate_register() and
+			 * bit_offset/bit_width is less than access_width here.
+			 */
+			if (bit_offset) {
+				new_value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
+			}
+			if (bit_width < access_width) {
+				new_value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
+			}
+
+			if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+				if (bit_offset || bit_width < access_width) {
+					/*
+					 * Read old values in order not to modify the bits that
+					 * are beyond the register bit_width/bit_offset setting.
+					 */
+					status =
+					    acpi_os_read_memory((acpi_physical_address)
+								address +
+								index *
+								ACPI_DIV_8
+								(access_width),
+								&value64,
+								access_width);
+					old_value32 = (u32)value64;
+
+					/*
+					 * Use offset style bit masks because access_width is
+					 * ensured to be less than 32-bits by
+					 * acpi_hw_validate_register() and bit_offset/bit_width is
+					 * less than access_width here.
+					 */
+					if (bit_offset) {
+						old_value32 &=
+						    ACPI_MASK_BITS_ABOVE
+						    (bit_offset);
+						bit_offset = 0;
+					}
+					if (bit_width < access_width) {
+						old_value32 &=
+						    ACPI_MASK_BITS_BELOW
+						    (bit_width);
+					}
+
+					new_value32 |= old_value32;
+				}
+
+				value64 = (u64)new_value32;
+				status =
+				    acpi_os_write_memory((acpi_physical_address)
+							 address +
+							 index *
+							 ACPI_DIV_8
+							 (access_width),
+							 value64, access_width);
+			} else {	/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+				if (bit_offset || bit_width < access_width) {
+					/*
+					 * Read old values in order not to modify the bits that
+					 * are beyond the register bit_width/bit_offset setting.
+					 */
+					status =
+					    acpi_hw_read_port((acpi_io_address)
+							      address +
+							      index *
+							      ACPI_DIV_8
+							      (access_width),
+							      &old_value32,
+							      access_width);
+
+					/*
+					 * Use offset style bit masks because access_width is
+					 * ensured to be less than 32-bits by
+					 * acpi_hw_validate_register() and bit_offset/bit_width is
+					 * less than access_width here.
+					 */
+					if (bit_offset) {
+						old_value32 &=
+						    ACPI_MASK_BITS_ABOVE
+						    (bit_offset);
+						bit_offset = 0;
+					}
+					if (bit_width < access_width) {
+						old_value32 &=
+						    ACPI_MASK_BITS_BELOW
+						    (bit_width);
+					}
+
+					new_value32 |= old_value32;
+				}
+
+				status = acpi_hw_write_port((acpi_io_address)
+							    address +
+							    index *
+							    ACPI_DIV_8
+							    (access_width),
+							    new_value32,
+							    access_width);
+			}
+		}
+
+		/*
+		 * Index * access_width is ensured to be less than 32-bits by
+		 * acpi_hw_validate_register().
+		 */
+		bit_width -=
+		    bit_width > access_width ? access_width : bit_width;
+		index++;
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
 			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  value, access_width, ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);

diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index a01ddb3..98c26ff 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c

@@ -91,10 +91,9 @@
 		 * compatibility with other ACPI implementations that have allowed
 		 * BIOS code with bad register width values to go unnoticed.
 		 */
-		status =
-		    acpi_os_write_port((acpi_io_address) reset_reg->address,
-				       acpi_gbl_FADT.reset_value,
-				       ACPI_RESET_REGISTER_WIDTH);
+		status = acpi_os_write_port((acpi_io_address)reset_reg->address,
+					    acpi_gbl_FADT.reset_value,
+					    ACPI_RESET_REGISTER_WIDTH);
 	} else {
 		/* Write the reset value to the reset register */
 
@@ -504,9 +503,7 @@
 	 * Evaluate the \_Sx namespace object containing the register values
 	 * for this state
 	 */
-	info->relative_pathname = ACPI_CAST_PTR(char,
-						acpi_gbl_sleep_state_names
-						[sleep_state]);
+	info->relative_pathname = acpi_gbl_sleep_state_names[sleep_state];
 
 	status = acpi_ns_evaluate(info);
 	if (ACPI_FAILURE(status)) {

diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 697af81..426a630 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c

@@ -107,9 +107,10 @@
 			continue;
 		}
 
-		status = acpi_ns_lookup(NULL, init_val->name, init_val->type,
-					ACPI_IMODE_LOAD_PASS2,
-					ACPI_NS_NO_UPSEARCH, NULL, &new_node);
+		status =
+		    acpi_ns_lookup(NULL, (char *)init_val->name, init_val->type,
+				   ACPI_IMODE_LOAD_PASS2, ACPI_NS_NO_UPSEARCH,
+				   NULL, &new_node);
 		if (ACPI_FAILURE(status)) {
 			ACPI_EXCEPTION((AE_INFO, status,
 					"Could not create predefined name %s",

diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index 878e8fb..c803bda 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c

@@ -79,7 +79,8 @@
 		/* String-to-Integer conversion */
 
 		status = acpi_ut_strtoul64(original_object->string.pointer,
-					   ACPI_ANY_BASE, &value);
+					   ACPI_ANY_BASE,
+					   acpi_gbl_integer_byte_width, &value);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -317,7 +318,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ns_convert_to_unicode(struct acpi_namespace_node * scope,
+acpi_ns_convert_to_unicode(struct acpi_namespace_node *scope,
 			   union acpi_operand_object *original_object,
 			   union acpi_operand_object **return_object)
 {
@@ -384,7 +385,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ns_convert_to_resource(struct acpi_namespace_node * scope,
+acpi_ns_convert_to_resource(struct acpi_namespace_node *scope,
 			    union acpi_operand_object *original_object,
 			    union acpi_operand_object **return_object)
 {
@@ -463,7 +464,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ns_convert_to_reference(struct acpi_namespace_node * scope,
+acpi_ns_convert_to_reference(struct acpi_namespace_node *scope,
 			     union acpi_operand_object *original_object,
 			     union acpi_operand_object **return_object)
 {

diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index af236e3..ce1f860 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c

@@ -81,7 +81,7 @@
  *
  ******************************************************************************/
 
-void acpi_ns_print_pathname(u32 num_segments, char *pathname)
+void acpi_ns_print_pathname(u32 num_segments, const char *pathname)
 {
 	u32 i;
 
@@ -114,6 +114,9 @@
 	acpi_os_printf("]\n");
 }
 
+#ifdef ACPI_OBSOLETE_FUNCTIONS
+/* Not used at this time, perhaps later */
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_dump_pathname
@@ -131,7 +134,8 @@
  ******************************************************************************/
 
 void
-acpi_ns_dump_pathname(acpi_handle handle, char *msg, u32 level, u32 component)
+acpi_ns_dump_pathname(acpi_handle handle,
+		      const char *msg, u32 level, u32 component)
 {
 
 	ACPI_FUNCTION_TRACE(ns_dump_pathname);
@@ -148,6 +152,7 @@
 	acpi_os_printf("\n");
 	return_VOID;
 }
+#endif
 
 /*******************************************************************************
  *

diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index d4aa8b6..36643a8 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c

@@ -140,6 +140,7 @@
 {
 	acpi_status status = AE_OK;
 	struct acpi_device_walk_info info;
+	acpi_handle handle;
 
 	ACPI_FUNCTION_TRACE(ns_initialize_devices);
 
@@ -190,6 +191,27 @@
 		if (ACPI_SUCCESS(status)) {
 			info.num_INI++;
 		}
+
+		/*
+		 * Execute \_SB._INI.
+		 * There appears to be a strict order requirement for \_SB._INI,
+		 * which should be evaluated before any _REG evaluations.
+		 */
+		status = acpi_get_handle(NULL, "\\_SB", &handle);
+		if (ACPI_SUCCESS(status)) {
+			memset(info.evaluate_info, 0,
+			       sizeof(struct acpi_evaluate_info));
+			info.evaluate_info->prefix_node = handle;
+			info.evaluate_info->relative_pathname =
+			    METHOD_NAME__INI;
+			info.evaluate_info->parameters = NULL;
+			info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE;
+
+			status = acpi_ns_evaluate(info.evaluate_info);
+			if (ACPI_SUCCESS(status)) {
+				info.num_INI++;
+			}
+		}
 	}
 
 	/*
@@ -198,6 +220,12 @@
 	 * Note: Any objects accessed by the _REG methods will be automatically
 	 * initialized, even if they contain executable AML (see the call to
 	 * acpi_ns_initialize_objects below).
+	 *
+	 * Note: According to the ACPI specification, we actually needn't execute
+	 * _REG for system_memory/system_io operation regions, but for PCI_Config
+	 * operation regions, it is required to evaluate _REG for those on a PCI
+	 * root bus that doesn't contain _BBN object. So this code is kept here
+	 * in order not to break things.
 	 */
 	if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
@@ -592,33 +620,37 @@
 	 * Note: We know there is an _INI within this subtree, but it may not be
 	 * under this particular device, it may be lower in the branch.
 	 */
-	ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname
-			(ACPI_TYPE_METHOD, device_node, METHOD_NAME__INI));
+	if (!ACPI_COMPARE_NAME(device_node->name.ascii, "_SB_") ||
+	    device_node->parent != acpi_gbl_root_node) {
+		ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname
+				(ACPI_TYPE_METHOD, device_node,
+				 METHOD_NAME__INI));
 
-	memset(info, 0, sizeof(struct acpi_evaluate_info));
-	info->prefix_node = device_node;
-	info->relative_pathname = METHOD_NAME__INI;
-	info->parameters = NULL;
-	info->flags = ACPI_IGNORE_RETURN_VALUE;
+		memset(info, 0, sizeof(struct acpi_evaluate_info));
+		info->prefix_node = device_node;
+		info->relative_pathname = METHOD_NAME__INI;
+		info->parameters = NULL;
+		info->flags = ACPI_IGNORE_RETURN_VALUE;
 
-	status = acpi_ns_evaluate(info);
-
-	if (ACPI_SUCCESS(status)) {
-		walk_info->num_INI++;
-	}
+		status = acpi_ns_evaluate(info);
+		if (ACPI_SUCCESS(status)) {
+			walk_info->num_INI++;
+		}
 #ifdef ACPI_DEBUG_OUTPUT
-	else if (status != AE_NOT_FOUND) {
+		else if (status != AE_NOT_FOUND) {
 
-		/* Ignore error and move on to next device */
+			/* Ignore error and move on to next device */
 
-		char *scope_name =
-		    acpi_ns_get_normalized_pathname(device_node, TRUE);
+			char *scope_name =
+			    acpi_ns_get_normalized_pathname(device_node, TRUE);
 
-		ACPI_EXCEPTION((AE_INFO, status, "during %s._INI execution",
-				scope_name));
-		ACPI_FREE(scope_name);
-	}
+			ACPI_EXCEPTION((AE_INFO, status,
+					"during %s._INI execution",
+					scope_name));
+			ACPI_FREE(scope_name);
+		}
 #endif
+	}
 
 	/* Ignore errors from above */
 

diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index 75cdb87..b5e2b0a 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c

@@ -123,8 +123,8 @@
 		(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 		acpi_ns_delete_namespace_by_owner(acpi_gbl_root_table_list.
 						  tables[table_index].owner_id);
-		acpi_tb_release_owner_id(table_index);
 
+		acpi_tb_release_owner_id(table_index);
 		return_ACPI_STATUS(status);
 	}
 

diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c
index eb6e1b8..f03dd41 100644
--- a/drivers/acpi/acpica/nsnames.c
+++ b/drivers/acpi/acpica/nsnames.c

@@ -113,7 +113,7 @@
 
 acpi_status
 acpi_ns_handle_to_pathname(acpi_handle target_handle,
-			   struct acpi_buffer * buffer, u8 no_trailing)
+			   struct acpi_buffer *buffer, u8 no_trailing)
 {
 	acpi_status status;
 	struct acpi_namespace_node *node;

diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c
index 051306f..cfa2bb7 100644
--- a/drivers/acpi/acpica/nsobject.c
+++ b/drivers/acpi/acpica/nsobject.c

@@ -399,7 +399,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ns_detach_data(struct acpi_namespace_node * node,
+acpi_ns_detach_data(struct acpi_namespace_node *node,
 		    acpi_object_handler handler)
 {
 	union acpi_operand_object *obj_desc;
@@ -444,7 +444,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ns_get_attached_data(struct acpi_namespace_node * node,
+acpi_ns_get_attached_data(struct acpi_namespace_node *node,
 			  acpi_object_handler handler, void **data)
 {
 	union acpi_operand_object *obj_desc;

diff --git a/drivers/acpi/acpica/nsprepkg.c b/drivers/acpi/acpica/nsprepkg.c
index 9047f28..fbedc6e 100644
--- a/drivers/acpi/acpica/nsprepkg.c
+++ b/drivers/acpi/acpica/nsprepkg.c

@@ -62,6 +62,10 @@
 			       u32 count1,
 			       u8 type2, u32 count2, u32 start_index);
 
+static acpi_status
+acpi_ns_custom_package(struct acpi_evaluate_info *info,
+		       union acpi_operand_object **elements, u32 count);
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ns_check_package
@@ -135,6 +139,11 @@
 	 * PTYPE2 packages contain subpackages
 	 */
 	switch (package->ret_info.type) {
+	case ACPI_PTYPE_CUSTOM:
+
+		status = acpi_ns_custom_package(info, elements, count);
+		break;
+
 	case ACPI_PTYPE1_FIXED:
 		/*
 		 * The package count is fixed and there are no subpackages
@@ -179,6 +188,7 @@
 			if (ACPI_FAILURE(status)) {
 				return (status);
 			}
+
 			elements++;
 		}
 		break;
@@ -225,6 +235,7 @@
 					return (status);
 				}
 			}
+
 			elements++;
 		}
 		break;
@@ -569,11 +580,13 @@
 			if (sub_package->package.count < expected_count) {
 				goto package_too_small;
 			}
+
 			if (sub_package->package.count <
 			    package->ret_info.count1) {
 				expected_count = package->ret_info.count1;
 				goto package_too_small;
 			}
+
 			if (expected_count == 0) {
 				/*
 				 * Either the num_entries element was originally zero or it was
@@ -622,6 +635,83 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ns_custom_package
+ *
+ * PARAMETERS:  info                - Method execution information block
+ *              elements            - Pointer to the package elements array
+ *              count               - Element count for the package
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Check a returned package object for the correct count and
+ *              correct type of all sub-objects.
+ *
+ * NOTE: Currently used for the _BIX method only. When needed for two or more
+ * methods, probably a detect/dispatch mechanism will be required.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_ns_custom_package(struct acpi_evaluate_info *info,
+		       union acpi_operand_object **elements, u32 count)
+{
+	u32 expected_count;
+	u32 version;
+	acpi_status status = AE_OK;
+
+	ACPI_FUNCTION_NAME(ns_custom_package);
+
+	/* Get version number, must be Integer */
+
+	if ((*elements)->common.type != ACPI_TYPE_INTEGER) {
+		ACPI_WARN_PREDEFINED((AE_INFO, info->full_pathname,
+				      info->node_flags,
+				      "Return Package has invalid object type for version number"));
+		return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
+	}
+
+	version = (u32)(*elements)->integer.value;
+	expected_count = 21;	/* Version 1 */
+
+	if (version == 0) {
+		expected_count = 20;	/* Version 0 */
+	}
+
+	if (count < expected_count) {
+		ACPI_WARN_PREDEFINED((AE_INFO, info->full_pathname,
+				      info->node_flags,
+				      "Return Package is too small - found %u elements, expected %u",
+				      count, expected_count));
+		return_ACPI_STATUS(AE_AML_OPERAND_VALUE);
+	} else if (count > expected_count) {
+		ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+				  "%s: Return Package is larger than needed - "
+				  "found %u, expected %u\n",
+				  info->full_pathname, count, expected_count));
+	}
+
+	/* Validate all elements of the returned package */
+
+	status = acpi_ns_check_package_elements(info, elements,
+						ACPI_RTYPE_INTEGER, 16,
+						ACPI_RTYPE_STRING, 4, 0);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Version 1 has a single trailing integer */
+
+	if (version > 0) {
+		status = acpi_ns_check_package_elements(info, elements + 20,
+							ACPI_RTYPE_INTEGER, 1,
+							0, 0, 20);
+	}
+
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_ns_check_package_elements
  *
  * PARAMETERS:  info            - Method execution information block
@@ -661,6 +751,7 @@
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
+
 		this_element++;
 	}
 
@@ -671,6 +762,7 @@
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
+
 		this_element++;
 	}
 

diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 805e36d..9523d41 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c

@@ -399,7 +399,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ns_repair_null_element(struct acpi_evaluate_info * info,
+acpi_ns_repair_null_element(struct acpi_evaluate_info *info,
 			    u32 expected_btypes,
 			    u32 package_index,
 			    union acpi_operand_object **return_object_ptr)

diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 63edbbb..d533612 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c

@@ -54,9 +54,9 @@
  * be repaired on a per-name basis.
  */
 typedef
-acpi_status(*acpi_repair_function) (struct acpi_evaluate_info * info,
-				    union acpi_operand_object
-				    **return_object_ptr);
+acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info,
+				     union acpi_operand_object **
+				     return_object_ptr);
 
 typedef struct acpi_repair_info {
 	char name[ACPI_NAME_SIZE];

diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index c72cc62..784a30b 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c

@@ -272,11 +272,11 @@
 			result = &internal_name[i];
 		} else if (num_segments == 2) {
 			internal_name[i] = AML_DUAL_NAME_PREFIX;
-			result = &internal_name[(acpi_size) i + 1];
+			result = &internal_name[(acpi_size)i + 1];
 		} else {
 			internal_name[i] = AML_MULTI_NAME_PREFIX_OP;
-			internal_name[(acpi_size) i + 1] = (char)num_segments;
-			result = &internal_name[(acpi_size) i + 2];
+			internal_name[(acpi_size)i + 1] = (char)num_segments;
+			result = &internal_name[(acpi_size)i + 2];
 		}
 	}
 
@@ -456,7 +456,7 @@
 
 			names_index = prefix_length + 2;
 			num_segments = (u8)
-			    internal_name[(acpi_size) prefix_length + 1];
+			    internal_name[(acpi_size)prefix_length + 1];
 			break;
 
 		case AML_DUAL_NAME_PREFIX:

diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c
index a7deeaa..d2a9b4f 100644
--- a/drivers/acpi/acpica/nsxfeval.c
+++ b/drivers/acpi/acpica/nsxfeval.c

@@ -256,7 +256,7 @@
 		 * Allocate a new parameter block for the internal objects
 		 * Add 1 to count to allow for null terminated internal list
 		 */
-		info->parameters = ACPI_ALLOCATE_ZEROED(((acpi_size) info->
+		info->parameters = ACPI_ALLOCATE_ZEROED(((acpi_size)info->
 							 param_count +
 							 1) * sizeof(void *));
 		if (!info->parameters) {
@@ -280,13 +280,12 @@
 		info->parameters[info->param_count] = NULL;
 	}
 
-#if 0
+#ifdef _FUTURE_FEATURE
 
 	/*
 	 * Begin incoming argument count analysis. Check for too few args
 	 * and too many args.
 	 */
-
 	switch (acpi_ns_get_type(info->node)) {
 	case ACPI_TYPE_METHOD:
 
@@ -370,68 +369,68 @@
 	 * If we are expecting a return value, and all went well above,
 	 * copy the return value to an external object.
 	 */
-	if (return_buffer) {
-		if (!info->return_object) {
-			return_buffer->length = 0;
+	if (!return_buffer) {
+		goto cleanup_return_object;
+	}
+
+	if (!info->return_object) {
+		return_buffer->length = 0;
+		goto cleanup;
+	}
+
+	if (ACPI_GET_DESCRIPTOR_TYPE(info->return_object) ==
+	    ACPI_DESC_TYPE_NAMED) {
+		/*
+		 * If we received a NS Node as a return object, this means that
+		 * the object we are evaluating has nothing interesting to
+		 * return (such as a mutex, etc.)  We return an error because
+		 * these types are essentially unsupported by this interface.
+		 * We don't check up front because this makes it easier to add
+		 * support for various types at a later date if necessary.
+		 */
+		status = AE_TYPE;
+		info->return_object = NULL;	/* No need to delete a NS Node */
+		return_buffer->length = 0;
+	}
+
+	if (ACPI_FAILURE(status)) {
+		goto cleanup_return_object;
+	}
+
+	/* Dereference Index and ref_of references */
+
+	acpi_ns_resolve_references(info);
+
+	/* Get the size of the returned object */
+
+	status = acpi_ut_get_object_size(info->return_object,
+					 &buffer_space_needed);
+	if (ACPI_SUCCESS(status)) {
+
+		/* Validate/Allocate/Clear caller buffer */
+
+		status = acpi_ut_initialize_buffer(return_buffer,
+						   buffer_space_needed);
+		if (ACPI_FAILURE(status)) {
+			/*
+			 * Caller's buffer is too small or a new one can't
+			 * be allocated
+			 */
+			ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+					  "Needed buffer size %X, %s\n",
+					  (u32)buffer_space_needed,
+					  acpi_format_exception(status)));
 		} else {
-			if (ACPI_GET_DESCRIPTOR_TYPE(info->return_object) ==
-			    ACPI_DESC_TYPE_NAMED) {
-				/*
-				 * If we received a NS Node as a return object, this means that
-				 * the object we are evaluating has nothing interesting to
-				 * return (such as a mutex, etc.)  We return an error because
-				 * these types are essentially unsupported by this interface.
-				 * We don't check up front because this makes it easier to add
-				 * support for various types at a later date if necessary.
-				 */
-				status = AE_TYPE;
-				info->return_object = NULL;	/* No need to delete a NS Node */
-				return_buffer->length = 0;
-			}
+			/* We have enough space for the object, build it */
 
-			if (ACPI_SUCCESS(status)) {
-
-				/* Dereference Index and ref_of references */
-
-				acpi_ns_resolve_references(info);
-
-				/* Get the size of the returned object */
-
-				status =
-				    acpi_ut_get_object_size(info->return_object,
-							    &buffer_space_needed);
-				if (ACPI_SUCCESS(status)) {
-
-					/* Validate/Allocate/Clear caller buffer */
-
-					status =
-					    acpi_ut_initialize_buffer
-					    (return_buffer,
-					     buffer_space_needed);
-					if (ACPI_FAILURE(status)) {
-						/*
-						 * Caller's buffer is too small or a new one can't
-						 * be allocated
-						 */
-						ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-								  "Needed buffer size %X, %s\n",
-								  (u32)
-								  buffer_space_needed,
-								  acpi_format_exception
-								  (status)));
-					} else {
-						/* We have enough space for the object, build it */
-
-						status =
-						    acpi_ut_copy_iobject_to_eobject
-						    (info->return_object,
-						     return_buffer);
-					}
-				}
-			}
+			status =
+			    acpi_ut_copy_iobject_to_eobject(info->return_object,
+							    return_buffer);
 		}
 	}
 
+cleanup_return_object:
+
 	if (info->return_object) {
 		/*
 		 * Delete the internal return object. NOTE: Interpreter must be

diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index 285b820..76a1bd4 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c

@@ -78,7 +78,7 @@
 
 acpi_status
 acpi_get_handle(acpi_handle parent,
-		acpi_string pathname, acpi_handle * ret_handle)
+		acpi_string pathname, acpi_handle *ret_handle)
 {
 	acpi_status status;
 	struct acpi_namespace_node *node = NULL;
@@ -155,7 +155,7 @@
  *
  ******************************************************************************/
 acpi_status
-acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer * buffer)
+acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer *buffer)
 {
 	acpi_status status;
 	struct acpi_namespace_node *node;
@@ -448,7 +448,7 @@
 		/* Point past the CID PNP_DEVICE_ID array */
 
 		next_id_string +=
-		    ((acpi_size) cid_list->count *
+		    ((acpi_size)cid_list->count *
 		     sizeof(struct acpi_pnp_device_id));
 	}
 

diff --git a/drivers/acpi/acpica/nsxfobj.c b/drivers/acpi/acpica/nsxfobj.c
index c312cd4..32d372b 100644
--- a/drivers/acpi/acpica/nsxfobj.c
+++ b/drivers/acpi/acpica/nsxfobj.c

@@ -63,7 +63,7 @@
  * DESCRIPTION: This routine returns the type associatd with a particular handle
  *
  ******************************************************************************/
-acpi_status acpi_get_type(acpi_handle handle, acpi_object_type * ret_type)
+acpi_status acpi_get_type(acpi_handle handle, acpi_object_type *ret_type)
 {
 	struct acpi_namespace_node *node;
 	acpi_status status;
@@ -115,7 +115,7 @@
  *              Handle.
  *
  ******************************************************************************/
-acpi_status acpi_get_parent(acpi_handle handle, acpi_handle * ret_handle)
+acpi_status acpi_get_parent(acpi_handle handle, acpi_handle *ret_handle)
 {
 	struct acpi_namespace_node *node;
 	struct acpi_namespace_node *parent_node;
@@ -183,7 +183,7 @@
 acpi_status
 acpi_get_next_object(acpi_object_type type,
 		     acpi_handle parent,
-		     acpi_handle child, acpi_handle * ret_handle)
+		     acpi_handle child, acpi_handle *ret_handle)
 {
 	acpi_status status;
 	struct acpi_namespace_node *node;

diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index d48cbed..c29c930 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c

@@ -87,7 +87,7 @@
 	 * used to encode the package length, either 0,1,2, or 3
 	 */
 	byte_count = (aml[0] >> 6);
-	parser_state->aml += ((acpi_size) byte_count + 1);
+	parser_state->aml += ((acpi_size)byte_count + 1);
 
 	/* Get bytes 3, 2, 1 as needed */
 

diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index cfd17a4..177b05b 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c

@@ -158,7 +158,7 @@
  *
  ******************************************************************************/
 
-char *acpi_ps_get_opcode_name(u16 opcode)
+const char *acpi_ps_get_opcode_name(u16 opcode)
 {
 #if defined(ACPI_DISASSEMBLER) || defined (ACPI_DEBUG_OUTPUT)
 

diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 8038ed2ac..0a23897 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c

@@ -130,8 +130,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ps_complete_this_op(struct acpi_walk_state * walk_state,
-			 union acpi_parse_object * op)
+acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
+			 union acpi_parse_object *op)
 {
 	union acpi_parse_object *prev;
 	union acpi_parse_object *next;

diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index b28b0da..89cb4bf 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c

@@ -128,7 +128,7 @@
 	if (op_info->flags & AML_DEFER) {
 		flags = ACPI_PARSEOP_DEFERRED;
 	} else if (op_info->flags & AML_NAMED) {
-		flags = ACPI_PARSEOP_NAMED;
+		flags = ACPI_PARSEOP_NAMED_OBJECT;
 	} else if (opcode == AML_INT_BYTELIST_OP) {
 		flags = ACPI_PARSEOP_BYTELIST;
 	}

diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index 04b37fc..cf30cd82 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c

@@ -115,7 +115,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_ps_execute_method(struct acpi_evaluate_info * info)
+acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info)
 {
 	acpi_status status;
 	union acpi_parse_object *op;

diff --git a/drivers/acpi/acpica/rscalc.c b/drivers/acpi/acpica/rscalc.c
index 2b1209d..f1e83ad 100644
--- a/drivers/acpi/acpica/rscalc.c
+++ b/drivers/acpi/acpica/rscalc.c

@@ -112,7 +112,7 @@
 	 * resource_source_index (1).
 	 */
 	if (resource_source->string_ptr) {
-		return ((acpi_rs_length) (resource_source->string_length + 1));
+		return ((acpi_rs_length)(resource_source->string_length + 1));
 	}
 
 	return (0);
@@ -188,7 +188,7 @@
 
 acpi_status
 acpi_rs_get_aml_length(struct acpi_resource *resource,
-		       acpi_size resource_list_size, acpi_size * size_needed)
+		       acpi_size resource_list_size, acpi_size *size_needed)
 {
 	acpi_size aml_size_needed = 0;
 	struct acpi_resource *resource_end;
@@ -278,11 +278,11 @@
 			 * 16-Bit Address Resource:
 			 * Add the size of the optional resource_source info
 			 */
-			total_size = (acpi_rs_length) (total_size +
-						       acpi_rs_struct_option_length
-						       (&resource->data.
-							address16.
-							resource_source));
+			total_size = (acpi_rs_length)(total_size +
+						      acpi_rs_struct_option_length
+						      (&resource->data.
+						       address16.
+						       resource_source));
 			break;
 
 		case ACPI_RESOURCE_TYPE_ADDRESS32:
@@ -290,11 +290,11 @@
 			 * 32-Bit Address Resource:
 			 * Add the size of the optional resource_source info
 			 */
-			total_size = (acpi_rs_length) (total_size +
-						       acpi_rs_struct_option_length
-						       (&resource->data.
-							address32.
-							resource_source));
+			total_size = (acpi_rs_length)(total_size +
+						      acpi_rs_struct_option_length
+						      (&resource->data.
+						       address32.
+						       resource_source));
 			break;
 
 		case ACPI_RESOURCE_TYPE_ADDRESS64:
@@ -302,11 +302,11 @@
 			 * 64-Bit Address Resource:
 			 * Add the size of the optional resource_source info
 			 */
-			total_size = (acpi_rs_length) (total_size +
-						       acpi_rs_struct_option_length
-						       (&resource->data.
-							address64.
-							resource_source));
+			total_size = (acpi_rs_length)(total_size +
+						      acpi_rs_struct_option_length
+						      (&resource->data.
+						       address64.
+						       resource_source));
 			break;
 
 		case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
@@ -315,28 +315,28 @@
 			 * Add the size of each additional optional interrupt beyond the
 			 * required 1 (4 bytes for each u32 interrupt number)
 			 */
-			total_size = (acpi_rs_length) (total_size +
-						       ((resource->data.
-							 extended_irq.
-							 interrupt_count -
-							 1) * 4) +
-						       /* Add the size of the optional resource_source info */
-						       acpi_rs_struct_option_length
-						       (&resource->data.
+			total_size = (acpi_rs_length)(total_size +
+						      ((resource->data.
 							extended_irq.
-							resource_source));
+							interrupt_count -
+							1) * 4) +
+						      /* Add the size of the optional resource_source info */
+						      acpi_rs_struct_option_length
+						      (&resource->data.
+						       extended_irq.
+						       resource_source));
 			break;
 
 		case ACPI_RESOURCE_TYPE_GPIO:
 
-			total_size = (acpi_rs_length) (total_size +
-						       (resource->data.gpio.
-							pin_table_length * 2) +
-						       resource->data.gpio.
-						       resource_source.
-						       string_length +
-						       resource->data.gpio.
-						       vendor_length);
+			total_size = (acpi_rs_length)(total_size +
+						      (resource->data.gpio.
+						       pin_table_length * 2) +
+						      resource->data.gpio.
+						      resource_source.
+						      string_length +
+						      resource->data.gpio.
+						      vendor_length);
 
 			break;
 
@@ -348,14 +348,14 @@
 								   common_serial_bus.
 								   type];
 
-			total_size = (acpi_rs_length) (total_size +
-						       resource->data.
-						       i2c_serial_bus.
-						       resource_source.
-						       string_length +
-						       resource->data.
-						       i2c_serial_bus.
-						       vendor_length);
+			total_size = (acpi_rs_length)(total_size +
+						      resource->data.
+						      i2c_serial_bus.
+						      resource_source.
+						      string_length +
+						      resource->data.
+						      i2c_serial_bus.
+						      vendor_length);
 
 			break;
 
@@ -397,8 +397,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_rs_get_list_length(u8 * aml_buffer,
-			u32 aml_buffer_length, acpi_size * size_needed)
+acpi_rs_get_list_length(u8 *aml_buffer,
+			u32 aml_buffer_length, acpi_size *size_needed)
 {
 	acpi_status status;
 	u8 *end_aml;
@@ -610,7 +610,7 @@
 
 acpi_status
 acpi_rs_get_pci_routing_table_length(union acpi_operand_object *package_object,
-				     acpi_size * buffer_size_needed)
+				     acpi_size *buffer_size_needed)
 {
 	u32 number_of_elements;
 	acpi_size temp_size_needed = 0;

diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c
index 1297889..809b61c 100644
--- a/drivers/acpi/acpica/rscreate.c
+++ b/drivers/acpi/acpica/rscreate.c

@@ -347,7 +347,7 @@
 					   (u8 *) output_buffer->pointer);
 				path_buffer.pointer = user_prt->source;
 
-				status = acpi_ns_handle_to_pathname((acpi_handle) node, &path_buffer, FALSE);
+				status = acpi_ns_handle_to_pathname((acpi_handle)node, &path_buffer, FALSE);
 
 				/* +1 to include null terminator */
 

diff --git a/drivers/acpi/acpica/rsdump.c b/drivers/acpi/acpica/rsdump.c
index 23a17c8..5ffdb56 100644
--- a/drivers/acpi/acpica/rsdump.c
+++ b/drivers/acpi/acpica/rsdump.c

@@ -52,17 +52,17 @@
  * All functions in this module are used by the AML Debugger only
  */
 /* Local prototypes */
-static void acpi_rs_out_string(char *title, char *value);
+static void acpi_rs_out_string(const char *title, const char *value);
 
-static void acpi_rs_out_integer8(char *title, u8 value);
+static void acpi_rs_out_integer8(const char *title, u8 value);
 
-static void acpi_rs_out_integer16(char *title, u16 value);
+static void acpi_rs_out_integer16(const char *title, u16 value);
 
-static void acpi_rs_out_integer32(char *title, u32 value);
+static void acpi_rs_out_integer32(const char *title, u32 value);
 
-static void acpi_rs_out_integer64(char *title, u64 value);
+static void acpi_rs_out_integer64(const char *title, u64 value);
 
-static void acpi_rs_out_title(char *title);
+static void acpi_rs_out_title(const char *title);
 
 static void acpi_rs_dump_byte_list(u16 length, u8 *data);
 
@@ -208,7 +208,7 @@
 {
 	u8 *target = NULL;
 	u8 *previous_target;
-	char *name;
+	const char *name;
 	u8 count;
 
 	/* First table entry must contain the table length (# of table entries) */
@@ -248,10 +248,8 @@
 		case ACPI_RSD_UINT8:
 
 			if (table->pointer) {
-				acpi_rs_out_string(name, ACPI_CAST_PTR(char,
-								       table->
-								       pointer
-								       [*target]));
+				acpi_rs_out_string(name,
+						   table->pointer[*target]);
 			} else {
 				acpi_rs_out_integer8(name, ACPI_GET8(target));
 			}
@@ -276,26 +274,20 @@
 
 		case ACPI_RSD_1BITFLAG:
 
-			acpi_rs_out_string(name, ACPI_CAST_PTR(char,
-							       table->
-							       pointer[*target &
-								       0x01]));
+			acpi_rs_out_string(name,
+					   table->pointer[*target & 0x01]);
 			break;
 
 		case ACPI_RSD_2BITFLAG:
 
-			acpi_rs_out_string(name, ACPI_CAST_PTR(char,
-							       table->
-							       pointer[*target &
-								       0x03]));
+			acpi_rs_out_string(name,
+					   table->pointer[*target & 0x03]);
 			break;
 
 		case ACPI_RSD_3BITFLAG:
 
-			acpi_rs_out_string(name, ACPI_CAST_PTR(char,
-							       table->
-							       pointer[*target &
-								       0x07]));
+			acpi_rs_out_string(name,
+					   table->pointer[*target & 0x07]);
 			break;
 
 		case ACPI_RSD_SHORTLIST:
@@ -481,7 +473,7 @@
  *
  ******************************************************************************/
 
-static void acpi_rs_out_string(char *title, char *value)
+static void acpi_rs_out_string(const char *title, const char *value)
 {
 
 	acpi_os_printf("%27s : %s", title, value);
@@ -491,30 +483,30 @@
 	acpi_os_printf("\n");
 }
 
-static void acpi_rs_out_integer8(char *title, u8 value)
+static void acpi_rs_out_integer8(const char *title, u8 value)
 {
 	acpi_os_printf("%27s : %2.2X\n", title, value);
 }
 
-static void acpi_rs_out_integer16(char *title, u16 value)
+static void acpi_rs_out_integer16(const char *title, u16 value)
 {
 
 	acpi_os_printf("%27s : %4.4X\n", title, value);
 }
 
-static void acpi_rs_out_integer32(char *title, u32 value)
+static void acpi_rs_out_integer32(const char *title, u32 value)
 {
 
 	acpi_os_printf("%27s : %8.8X\n", title, value);
 }
 
-static void acpi_rs_out_integer64(char *title, u64 value)
+static void acpi_rs_out_integer64(const char *title, u64 value)
 {
 
 	acpi_os_printf("%27s : %8.8X%8.8X\n", title, ACPI_FORMAT_UINT64(value));
 }
 
-static void acpi_rs_out_title(char *title)
+static void acpi_rs_out_title(const char *title)
 {
 
 	acpi_os_printf("%27s : ", title);

diff --git a/drivers/acpi/acpica/rsdumpinfo.c b/drivers/acpi/acpica/rsdumpinfo.c
index 5c34913..61e8f16 100644
--- a/drivers/acpi/acpica/rsdumpinfo.c
+++ b/drivers/acpi/acpica/rsdumpinfo.c

@@ -330,19 +330,20 @@
 	{ACPI_RSD_UINT8,    ACPI_RSD_OFFSET (common_serial_bus.type),           "Type",                     acpi_gbl_sbt_decode}, \
 	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (common_serial_bus.producer_consumer), "ProducerConsumer",      acpi_gbl_consume_decode}, \
 	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (common_serial_bus.slave_mode),     "SlaveMode",                acpi_gbl_sm_decode}, \
+	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (common_serial_bus.connection_sharing),"ConnectionSharing",     acpi_gbl_shr_decode}, \
 	{ACPI_RSD_UINT8,    ACPI_RSD_OFFSET (common_serial_bus.type_revision_id), "TypeRevisionId",         NULL}, \
 	{ACPI_RSD_UINT16,   ACPI_RSD_OFFSET (common_serial_bus.type_data_length), "TypeDataLength",         NULL}, \
 	{ACPI_RSD_SOURCE,   ACPI_RSD_OFFSET (common_serial_bus.resource_source), "ResourceSource",          NULL}, \
 	{ACPI_RSD_UINT16,   ACPI_RSD_OFFSET (common_serial_bus.vendor_length),  "VendorLength",             NULL}, \
 	{ACPI_RSD_SHORTLISTX,ACPI_RSD_OFFSET (common_serial_bus.vendor_data),   "VendorData",               NULL},
 
-struct acpi_rsdump_info acpi_rs_dump_common_serial_bus[10] = {
+struct acpi_rsdump_info acpi_rs_dump_common_serial_bus[11] = {
 	{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_common_serial_bus),
 	 "Common Serial Bus", NULL},
 	ACPI_RS_DUMP_COMMON_SERIAL_BUS
 };
 
-struct acpi_rsdump_info acpi_rs_dump_i2c_serial_bus[13] = {
+struct acpi_rsdump_info acpi_rs_dump_i2c_serial_bus[14] = {
 	{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_i2c_serial_bus),
 	 "I2C Serial Bus", NULL},
 	ACPI_RS_DUMP_COMMON_SERIAL_BUS {ACPI_RSD_1BITFLAG,
@@ -355,7 +356,7 @@
 	 "SlaveAddress", NULL},
 };
 
-struct acpi_rsdump_info acpi_rs_dump_spi_serial_bus[17] = {
+struct acpi_rsdump_info acpi_rs_dump_spi_serial_bus[18] = {
 	{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_spi_serial_bus),
 	 "Spi Serial Bus", NULL},
 	ACPI_RS_DUMP_COMMON_SERIAL_BUS {ACPI_RSD_1BITFLAG,
@@ -376,7 +377,7 @@
 	 "ConnectionSpeed", NULL},
 };
 
-struct acpi_rsdump_info acpi_rs_dump_uart_serial_bus[19] = {
+struct acpi_rsdump_info acpi_rs_dump_uart_serial_bus[20] = {
 	{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_uart_serial_bus),
 	 "Uart Serial Bus", NULL},
 	ACPI_RS_DUMP_COMMON_SERIAL_BUS {ACPI_RSD_2BITFLAG,

diff --git a/drivers/acpi/acpica/rsmisc.c b/drivers/acpi/acpica/rsmisc.c
index ce3d0b7..25165ca 100644
--- a/drivers/acpi/acpica/rsmisc.c
+++ b/drivers/acpi/acpica/rsmisc.c

@@ -87,7 +87,7 @@
 		return_ACPI_STATUS(AE_BAD_PARAMETER);
 	}
 
-	if (((acpi_size) resource) & 0x3) {
+	if (((acpi_size)resource) & 0x3) {
 
 		/* Each internal resource struct is expected to be 32-bit aligned */
 

diff --git a/drivers/acpi/acpica/rsserial.c b/drivers/acpi/acpica/rsserial.c
index 8a01296..b82c061 100644
--- a/drivers/acpi/acpica/rsserial.c
+++ b/drivers/acpi/acpica/rsserial.c

@@ -151,7 +151,7 @@
  *
  ******************************************************************************/
 
-struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[16] = {
+struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[17] = {
 	{ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
 	 ACPI_RS_SIZE(struct acpi_resource_i2c_serialbus),
 	 ACPI_RSC_TABLE_SIZE(acpi_rs_convert_i2c_serial_bus)},
@@ -177,6 +177,11 @@
 	 AML_OFFSET(common_serial_bus.flags),
 	 1},
 
+	{ACPI_RSC_1BITFLAG,
+	 ACPI_RS_OFFSET(data.common_serial_bus.connection_sharing),
+	 AML_OFFSET(common_serial_bus.flags),
+	 2},
+
 	{ACPI_RSC_MOVE8,
 	 ACPI_RS_OFFSET(data.common_serial_bus.type_revision_id),
 	 AML_OFFSET(common_serial_bus.type_revision_id),
@@ -237,7 +242,7 @@
  *
  ******************************************************************************/
 
-struct acpi_rsconvert_info acpi_rs_convert_spi_serial_bus[20] = {
+struct acpi_rsconvert_info acpi_rs_convert_spi_serial_bus[21] = {
 	{ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
 	 ACPI_RS_SIZE(struct acpi_resource_spi_serialbus),
 	 ACPI_RSC_TABLE_SIZE(acpi_rs_convert_spi_serial_bus)},
@@ -263,6 +268,11 @@
 	 AML_OFFSET(common_serial_bus.flags),
 	 1},
 
+	{ACPI_RSC_1BITFLAG,
+	 ACPI_RS_OFFSET(data.common_serial_bus.connection_sharing),
+	 AML_OFFSET(common_serial_bus.flags),
+	 2},
+
 	{ACPI_RSC_MOVE8,
 	 ACPI_RS_OFFSET(data.common_serial_bus.type_revision_id),
 	 AML_OFFSET(common_serial_bus.type_revision_id),
@@ -339,7 +349,7 @@
  *
  ******************************************************************************/
 
-struct acpi_rsconvert_info acpi_rs_convert_uart_serial_bus[22] = {
+struct acpi_rsconvert_info acpi_rs_convert_uart_serial_bus[23] = {
 	{ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
 	 ACPI_RS_SIZE(struct acpi_resource_uart_serialbus),
 	 ACPI_RSC_TABLE_SIZE(acpi_rs_convert_uart_serial_bus)},
@@ -365,6 +375,11 @@
 	 AML_OFFSET(common_serial_bus.flags),
 	 1},
 
+	{ACPI_RSC_1BITFLAG,
+	 ACPI_RS_OFFSET(data.common_serial_bus.connection_sharing),
+	 AML_OFFSET(common_serial_bus.flags),
+	 2},
+
 	{ACPI_RSC_MOVE8,
 	 ACPI_RS_OFFSET(data.common_serial_bus.type_revision_id),
 	 AML_OFFSET(common_serial_bus.type_revision_id),

diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c
index cf06e49..fa491c6 100644
--- a/drivers/acpi/acpica/rsutils.c
+++ b/drivers/acpi/acpica/rsutils.c

@@ -338,7 +338,7 @@
 	 * Note: Some resource descriptors will have an additional null, so
 	 * we add 1 to the minimum length.
 	 */
-	if (total_length > (acpi_rsdesc_size) (minimum_length + 1)) {
+	if (total_length > (acpi_rsdesc_size)(minimum_length + 1)) {
 
 		/* Get the resource_source_index */
 
@@ -377,7 +377,7 @@
 				   ACPI_CAST_PTR(char,
 						 &aml_resource_source[1]));
 
-		return ((acpi_rs_length) total_length);
+		return ((acpi_rs_length)total_length);
 	}
 
 	/* resource_source is not present */
@@ -406,9 +406,9 @@
  ******************************************************************************/
 
 acpi_rsdesc_size
-acpi_rs_set_resource_source(union aml_resource * aml,
+acpi_rs_set_resource_source(union aml_resource *aml,
 			    acpi_rs_length minimum_length,
-			    struct acpi_resource_source * resource_source)
+			    struct acpi_resource_source *resource_source)
 {
 	u8 *aml_resource_source;
 	acpi_rsdesc_size descriptor_length;
@@ -466,8 +466,8 @@
  ******************************************************************************/
 
 acpi_status
-acpi_rs_get_prt_method_data(struct acpi_namespace_node * node,
-			    struct acpi_buffer * ret_buffer)
+acpi_rs_get_prt_method_data(struct acpi_namespace_node *node,
+			    struct acpi_buffer *ret_buffer)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
@@ -671,7 +671,7 @@
 
 acpi_status
 acpi_rs_get_method_data(acpi_handle handle,
-			char *path, struct acpi_buffer *ret_buffer)
+			const char *path, struct acpi_buffer *ret_buffer)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;

diff --git a/drivers/acpi/acpica/rsxface.c b/drivers/acpi/acpica/rsxface.c
index 900933b..465ed81 100644
--- a/drivers/acpi/acpica/rsxface.c
+++ b/drivers/acpi/acpica/rsxface.c

@@ -433,8 +433,8 @@
 acpi_status
 acpi_get_vendor_resource(acpi_handle device_handle,
 			 char *name,
-			 struct acpi_vendor_uuid * uuid,
-			 struct acpi_buffer * ret_buffer)
+			 struct acpi_vendor_uuid *uuid,
+			 struct acpi_buffer *ret_buffer)
 {
 	struct acpi_vendor_walk_info info;
 	acpi_status status;
@@ -539,7 +539,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_walk_resource_buffer(struct acpi_buffer * buffer,
+acpi_walk_resource_buffer(struct acpi_buffer *buffer,
 			  acpi_walk_resource_callback user_function,
 			  void *context)
 {

diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 7da79ce..1388a19 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c

@@ -368,7 +368,7 @@
  *****************************************************************************/
 
 acpi_status
-acpi_tb_verify_temp_table(struct acpi_table_desc * table_desc, char *signature)
+acpi_tb_verify_temp_table(struct acpi_table_desc *table_desc, char *signature)
 {
 	acpi_status status = AE_OK;
 
@@ -401,9 +401,9 @@
 			ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY,
 					"%4.4s 0x%8.8X%8.8X"
 					" Attempted table install failed",
-					acpi_ut_valid_acpi_name(table_desc->
-								signature.
-								ascii) ?
+					acpi_ut_valid_nameseg(table_desc->
+							      signature.
+							      ascii) ?
 					table_desc->signature.ascii : "????",
 					ACPI_FORMAT_UINT64(table_desc->
 							   address)));
@@ -454,7 +454,7 @@
 		table_count = acpi_gbl_root_table_list.current_table_count;
 	}
 
-	tables = ACPI_ALLOCATE_ZEROED(((acpi_size) table_count +
+	tables = ACPI_ALLOCATE_ZEROED(((acpi_size)table_count +
 				       ACPI_ROOT_TABLE_SIZE_INCREMENT) *
 				      sizeof(struct acpi_table_desc));
 	if (!tables) {
@@ -467,8 +467,7 @@
 
 	if (acpi_gbl_root_table_list.tables) {
 		memcpy(tables, acpi_gbl_root_table_list.tables,
-		       (acpi_size) table_count *
-		       sizeof(struct acpi_table_desc));
+		       (acpi_size)table_count * sizeof(struct acpi_table_desc));
 
 		if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) {
 			ACPI_FREE(acpi_gbl_root_table_list.tables);
@@ -701,7 +700,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id * owner_id)
+acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id)
 {
 	acpi_status status = AE_BAD_PARAMETER;
 

diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index a79e4f3..6208069 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c

@@ -53,7 +53,7 @@
 acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
 			     u8 space_id,
 			     u8 byte_width,
-			     u64 address, char *register_name, u8 flags);
+			     u64 address, const char *register_name, u8 flags);
 
 static void acpi_tb_convert_fadt(void);
 
@@ -65,7 +65,7 @@
 /* Table for conversion of FADT to common internal format and FADT validation */
 
 typedef struct acpi_fadt_info {
-	char *name;
+	const char *name;
 	u16 address64;
 	u16 address32;
 	u16 length;
@@ -192,7 +192,7 @@
 acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
 			     u8 space_id,
 			     u8 byte_width,
-			     u64 address, char *register_name, u8 flags)
+			     u64 address, const char *register_name, u8 flags)
 {
 	u8 bit_width;
 
@@ -344,7 +344,7 @@
 
 	/* Obtain the DSDT and FACS tables via their addresses within the FADT */
 
-	acpi_tb_install_fixed_table((acpi_physical_address) acpi_gbl_FADT.Xdsdt,
+	acpi_tb_install_fixed_table((acpi_physical_address)acpi_gbl_FADT.Xdsdt,
 				    ACPI_SIG_DSDT, &acpi_gbl_dsdt_index);
 
 	/* If Hardware Reduced flag is set, there is no FACS */
@@ -385,14 +385,15 @@
 {
 	/*
 	 * Check if the FADT is larger than the largest table that we expect
-	 * (the ACPI 5.0 version). If so, truncate the table, and issue
-	 * a warning.
+	 * (typically the current ACPI specification version). If so, truncate
+	 * the table, and issue a warning.
 	 */
 	if (length > sizeof(struct acpi_table_fadt)) {
 		ACPI_BIOS_WARNING((AE_INFO,
-				   "FADT (revision %u) is longer than ACPI 5.0 version, "
+				   "FADT (revision %u) is longer than %s length, "
 				   "truncating length %u to %u",
-				   table->revision, length,
+				   table->revision, ACPI_FADT_CONFORMANCE,
+				   length,
 				   (u32)sizeof(struct acpi_table_fadt)));
 	}
 
@@ -467,7 +468,7 @@
 
 static void acpi_tb_convert_fadt(void)
 {
-	char *name;
+	const char *name;
 	struct acpi_generic_address *address64;
 	u32 address32;
 	u8 length;
@@ -646,9 +647,12 @@
 			if ((address64->address && !length) ||
 			    (!address64->address && length)) {
 				ACPI_BIOS_WARNING((AE_INFO,
-						   "Optional FADT field %s has zero address or length: "
-						   "0x%8.8X%8.8X/0x%X",
-						   name,
+						   "Optional FADT field %s has valid %s but zero %s: "
+						   "0x%8.8X%8.8X/0x%X", name,
+						   (length ? "Length" :
+						    "Address"),
+						   (length ? "Address" :
+						    "Length"),
 						   ACPI_FORMAT_UINT64
 						   (address64->address),
 						   length));

diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index f2d0803..e348d61 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c

@@ -76,7 +76,7 @@
 
 	/* Validate the input table signature */
 
-	if (!acpi_is_valid_signature(signature)) {
+	if (!acpi_ut_valid_nameseg(signature)) {
 		return_ACPI_STATUS(AE_BAD_SIGNATURE);
 	}
 

diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 4dc6108..8b13052 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c

@@ -299,9 +299,9 @@
 			ACPI_BIOS_ERROR((AE_INFO,
 					 "Table has invalid signature [%4.4s] (0x%8.8X), "
 					 "must be SSDT or OEMx",
-					 acpi_ut_valid_acpi_name(new_table_desc.
-								 signature.
-								 ascii) ?
+					 acpi_ut_valid_nameseg(new_table_desc.
+							       signature.
+							       ascii) ?
 					 new_table_desc.signature.
 					 ascii : "????",
 					 new_table_desc.signature.integer));

diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 9240c76..e285539 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c

@@ -231,7 +231,7 @@
 					   ACPI_FORMAT_UINT64(address64)));
 		}
 #endif
-		return ((acpi_physical_address) (address64));
+		return ((acpi_physical_address)(address64));
 	}
 }
 
@@ -287,12 +287,12 @@
 		 * the XSDT if the revision is > 1 and the XSDT pointer is present,
 		 * as per the ACPI specification.
 		 */
-		address = (acpi_physical_address) rsdp->xsdt_physical_address;
+		address = (acpi_physical_address)rsdp->xsdt_physical_address;
 		table_entry_size = ACPI_XSDT_ENTRY_SIZE;
 	} else {
 		/* Root table is an RSDT (32-bit physical addresses) */
 
-		address = (acpi_physical_address) rsdp->rsdt_physical_address;
+		address = (acpi_physical_address)rsdp->rsdt_physical_address;
 		table_entry_size = ACPI_RSDT_ENTRY_SIZE;
 	}
 
@@ -380,30 +380,3 @@
 	acpi_os_unmap_memory(table, length);
 	return_ACPI_STATUS(AE_OK);
 }
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_is_valid_signature
- *
- * PARAMETERS:  signature           - Sig string to be validated
- *
- * RETURN:      TRUE if signature is has 4 valid ACPI characters
- *
- * DESCRIPTION: Validate an ACPI table signature.
- *
- ******************************************************************************/
-
-u8 acpi_is_valid_signature(char *signature)
-{
-	u32 i;
-
-	/* Validate each character in the signature */
-
-	for (i = 0; i < ACPI_NAME_SIZE; i++) {
-		if (!acpi_ut_valid_acpi_char(signature[i], i)) {
-			return (FALSE);
-		}
-	}
-
-	return (TRUE);
-}

diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 326df65..3ecec93 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c

@@ -99,7 +99,7 @@
  ******************************************************************************/
 
 acpi_status __init
-acpi_initialize_tables(struct acpi_table_desc * initial_table_array,
+acpi_initialize_tables(struct acpi_table_desc *initial_table_array,
 		       u32 initial_table_count, u8 allow_resize)
 {
 	acpi_physical_address rsdp_address;
@@ -120,7 +120,7 @@
 		/* Root Table Array has been statically allocated by the host */
 
 		memset(initial_table_array, 0,
-		       (acpi_size) initial_table_count *
+		       (acpi_size)initial_table_count *
 		       sizeof(struct acpi_table_desc));
 
 		acpi_gbl_root_table_list.tables = initial_table_array;
@@ -352,7 +352,7 @@
  *
  ******************************************************************************/
 acpi_status
-acpi_get_table_by_index(u32 table_index, struct acpi_table_header ** table)
+acpi_get_table_by_index(u32 table_index, struct acpi_table_header **table)
 {
 	acpi_status status;
 

diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 3151968..ac71abc 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c

@@ -82,7 +82,7 @@
 	 * their customized default region handlers.
 	 */
 	status = acpi_ev_install_region_handlers();
-	if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
+	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status,
 				"During Region initialization"));
 		return_ACPI_STATUS(status);

diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index b9a78e4..adb6cfc 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c

@@ -90,7 +90,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp * rsdp)
+acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp)
 {
 
 	/*
@@ -142,7 +142,7 @@
  *
  ******************************************************************************/
 
-acpi_status __init acpi_find_root_pointer(acpi_physical_address * table_address)
+acpi_status __init acpi_find_root_pointer(acpi_physical_address *table_address)
 {
 	u8 *table_ptr;
 	u8 *mem_rover;
@@ -201,7 +201,7 @@
 			    (u32) ACPI_PTR_DIFF(mem_rover, table_ptr);
 
 			*table_address =
-			    (acpi_physical_address) physical_address;
+			    (acpi_physical_address)physical_address;
 			return_ACPI_STATUS(AE_OK);
 		}
 	}
@@ -234,7 +234,7 @@
 		    (ACPI_HI_RSDP_WINDOW_BASE +
 		     ACPI_PTR_DIFF(mem_rover, table_ptr));
 
-		*table_address = (acpi_physical_address) physical_address;
+		*table_address = (acpi_physical_address)physical_address;
 		return_ACPI_STATUS(AE_OK);
 	}
 

diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index 3dbdc3a..13324a2 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c

@@ -231,7 +231,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_ut_validate_buffer(struct acpi_buffer * buffer)
+acpi_status acpi_ut_validate_buffer(struct acpi_buffer *buffer)
 {
 
 	/* Obviously, the structure pointer must be valid */
@@ -272,8 +272,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ut_initialize_buffer(struct acpi_buffer * buffer,
-			  acpi_size required_length)
+acpi_ut_initialize_buffer(struct acpi_buffer *buffer, acpi_size required_length)
 {
 	acpi_size input_buffer_length;
 

diff --git a/drivers/acpi/acpica/utascii.c b/drivers/acpi/acpica/utascii.c
new file mode 100644
index 0000000..706c1f3
--- /dev/null
+++ b/drivers/acpi/acpica/utascii.c

@@ -0,0 +1,140 @@
+/******************************************************************************
+ *
+ * Module Name: utascii - Utility ascii functions
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_valid_nameseg
+ *
+ * PARAMETERS:  name            - The name or table signature to be examined.
+ *                                Four characters, does not have to be a
+ *                                NULL terminated string.
+ *
+ * RETURN:      TRUE if signature is has 4 valid ACPI characters
+ *
+ * DESCRIPTION: Validate an ACPI table signature.
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_valid_nameseg(char *name)
+{
+	u32 i;
+
+	/* Validate each character in the signature */
+
+	for (i = 0; i < ACPI_NAME_SIZE; i++) {
+		if (!acpi_ut_valid_name_char(name[i], i)) {
+			return (FALSE);
+		}
+	}
+
+	return (TRUE);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_valid_name_char
+ *
+ * PARAMETERS:  char            - The character to be examined
+ *              position        - Byte position (0-3)
+ *
+ * RETURN:      TRUE if the character is valid, FALSE otherwise
+ *
+ * DESCRIPTION: Check for a valid ACPI character. Must be one of:
+ *              1) Upper case alpha
+ *              2) numeric
+ *              3) underscore
+ *
+ *              We allow a '!' as the last character because of the ASF! table
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_valid_name_char(char character, u32 position)
+{
+
+	if (!((character >= 'A' && character <= 'Z') ||
+	      (character >= '0' && character <= '9') || (character == '_'))) {
+
+		/* Allow a '!' in the last position */
+
+		if (character == '!' && position == 3) {
+			return (TRUE);
+		}
+
+		return (FALSE);
+	}
+
+	return (TRUE);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_check_and_repair_ascii
+ *
+ * PARAMETERS:  name                - Ascii string
+ *              count               - Number of characters to check
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Ensure that the requested number of characters are printable
+ *              Ascii characters. Sets non-printable and null chars to <space>.
+ *
+ ******************************************************************************/
+
+void acpi_ut_check_and_repair_ascii(u8 *name, char *repaired_name, u32 count)
+{
+	u32 i;
+
+	for (i = 0; i < count; i++) {
+		repaired_name[i] = (char)name[i];
+
+		if (!name[i]) {
+			return;
+		}
+		if (!isprint(name[i])) {
+			repaired_name[i] = ' ';
+		}
+	}
+}

diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index 0cfb2b8..bd31faf 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c

@@ -106,31 +106,31 @@
 			default:	/* Default is BYTE display */
 
 				acpi_os_printf("%02X ",
-					       buffer[(acpi_size) i + j]);
+					       buffer[(acpi_size)i + j]);
 				break;
 
 			case DB_WORD_DISPLAY:
 
 				ACPI_MOVE_16_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j]);
+						   &buffer[(acpi_size)i + j]);
 				acpi_os_printf("%04X ", temp32);
 				break;
 
 			case DB_DWORD_DISPLAY:
 
 				ACPI_MOVE_32_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j]);
+						   &buffer[(acpi_size)i + j]);
 				acpi_os_printf("%08X ", temp32);
 				break;
 
 			case DB_QWORD_DISPLAY:
 
 				ACPI_MOVE_32_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j]);
+						   &buffer[(acpi_size)i + j]);
 				acpi_os_printf("%08X", temp32);
 
 				ACPI_MOVE_32_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j +
+						   &buffer[(acpi_size)i + j +
 							   4]);
 				acpi_os_printf("%08X ", temp32);
 				break;
@@ -158,7 +158,7 @@
 				acpi_os_printf("// ");
 			}
 
-			buf_char = buffer[(acpi_size) i + j];
+			buf_char = buffer[(acpi_size)i + j];
 			if (isprint(buf_char)) {
 				acpi_os_printf("%c", buf_char);
 			} else {
@@ -274,31 +274,31 @@
 			default:	/* Default is BYTE display */
 
 				acpi_ut_file_printf(file, "%02X ",
-						    buffer[(acpi_size) i + j]);
+						    buffer[(acpi_size)i + j]);
 				break;
 
 			case DB_WORD_DISPLAY:
 
 				ACPI_MOVE_16_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j]);
+						   &buffer[(acpi_size)i + j]);
 				acpi_ut_file_printf(file, "%04X ", temp32);
 				break;
 
 			case DB_DWORD_DISPLAY:
 
 				ACPI_MOVE_32_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j]);
+						   &buffer[(acpi_size)i + j]);
 				acpi_ut_file_printf(file, "%08X ", temp32);
 				break;
 
 			case DB_QWORD_DISPLAY:
 
 				ACPI_MOVE_32_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j]);
+						   &buffer[(acpi_size)i + j]);
 				acpi_ut_file_printf(file, "%08X", temp32);
 
 				ACPI_MOVE_32_TO_32(&temp32,
-						   &buffer[(acpi_size) i + j +
+						   &buffer[(acpi_size)i + j +
 							   4]);
 				acpi_ut_file_printf(file, "%08X ", temp32);
 				break;
@@ -318,7 +318,7 @@
 				return;
 			}
 
-			buf_char = buffer[(acpi_size) i + j];
+			buf_char = buffer[(acpi_size)i + j];
 			if (isprint(buf_char)) {
 				acpi_ut_file_printf(file, "%c", buf_char);
 			} else {

diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index f8e9978..3b8d23e 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c

@@ -105,7 +105,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_os_purge_cache(struct acpi_memory_list * cache)
+acpi_status acpi_os_purge_cache(struct acpi_memory_list *cache)
 {
 	void *next;
 	acpi_status status;
@@ -151,7 +151,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_os_delete_cache(struct acpi_memory_list * cache)
+acpi_status acpi_os_delete_cache(struct acpi_memory_list *cache)
 {
 	acpi_status status;
 
@@ -184,8 +184,7 @@
  *
  ******************************************************************************/
 
-acpi_status
-acpi_os_release_object(struct acpi_memory_list * cache, void *object)
+acpi_status acpi_os_release_object(struct acpi_memory_list *cache, void *object)
 {
 	acpi_status status;
 

diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
index 98d53e5..82f9714 100644
--- a/drivers/acpi/acpica/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c

@@ -53,7 +53,7 @@
 static acpi_status
 acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
 				union acpi_object *external_object,
-				u8 * data_space, acpi_size * buffer_space_used);
+				u8 *data_space, acpi_size *buffer_space_used);
 
 static acpi_status
 acpi_ut_copy_ielement_to_ielement(u8 object_type,
@@ -63,7 +63,7 @@
 
 static acpi_status
 acpi_ut_copy_ipackage_to_epackage(union acpi_operand_object *internal_object,
-				  u8 * buffer, acpi_size * space_used);
+				  u8 *buffer, acpi_size *space_used);
 
 static acpi_status
 acpi_ut_copy_esimple_to_isimple(union acpi_object *user_obj,
@@ -111,7 +111,7 @@
 static acpi_status
 acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
 				union acpi_object *external_object,
-				u8 * data_space, acpi_size * buffer_space_used)
+				u8 *data_space, acpi_size *buffer_space_used)
 {
 	acpi_status status = AE_OK;
 
@@ -151,7 +151,7 @@
 
 		memcpy((void *)data_space,
 		       (void *)internal_object->string.pointer,
-		       (acpi_size) internal_object->string.length + 1);
+		       (acpi_size)internal_object->string.length + 1);
 		break;
 
 	case ACPI_TYPE_BUFFER:
@@ -331,7 +331,7 @@
 
 static acpi_status
 acpi_ut_copy_ipackage_to_epackage(union acpi_operand_object *internal_object,
-				  u8 * buffer, acpi_size * space_used)
+				  u8 *buffer, acpi_size *space_used)
 {
 	union acpi_object *external_object;
 	acpi_status status;
@@ -362,7 +362,7 @@
 	 * Leave room for an array of ACPI_OBJECTS in the buffer
 	 * and move the free space past it
 	 */
-	info.length += (acpi_size) external_object->package.count *
+	info.length += (acpi_size)external_object->package.count *
 	    ACPI_ROUND_UP_TO_NATIVE_WORD(sizeof(union acpi_object));
 	info.free_space += external_object->package.count *
 	    ACPI_ROUND_UP_TO_NATIVE_WORD(sizeof(union acpi_object));
@@ -738,7 +738,7 @@
 		 */
 		if (source_desc->string.pointer) {
 			dest_desc->string.pointer =
-			    ACPI_ALLOCATE((acpi_size) source_desc->string.
+			    ACPI_ALLOCATE((acpi_size)source_desc->string.
 					  length + 1);
 			if (!dest_desc->string.pointer) {
 				return (AE_NO_MEMORY);
@@ -748,7 +748,7 @@
 
 			memcpy(dest_desc->string.pointer,
 			       source_desc->string.pointer,
-			       (acpi_size) source_desc->string.length + 1);
+			       (acpi_size)source_desc->string.length + 1);
 		}
 		break;
 

diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 1cfc5f6..5744222 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c

@@ -51,13 +51,9 @@
 ACPI_MODULE_NAME("utdebug")
 
 #ifdef ACPI_DEBUG_OUTPUT
-static acpi_thread_id acpi_gbl_prev_thread_id = (acpi_thread_id) 0xFFFFFFFF;
-static char *acpi_gbl_fn_entry_str = "----Entry";
-static char *acpi_gbl_fn_exit_str = "----Exit-";
-
-/* Local prototypes */
-
-static const char *acpi_ut_trim_function_name(const char *function_name);
+static acpi_thread_id acpi_gbl_previous_thread_id = (acpi_thread_id) 0xFFFFFFFF;
+static const char *acpi_gbl_function_entry_prefix = "----Entry";
+static const char *acpi_gbl_function_exit_prefix = "----Exit-";
 
 /*******************************************************************************
  *
@@ -178,14 +174,14 @@
 	 * Thread tracking and context switch notification
 	 */
 	thread_id = acpi_os_get_thread_id();
-	if (thread_id != acpi_gbl_prev_thread_id) {
+	if (thread_id != acpi_gbl_previous_thread_id) {
 		if (ACPI_LV_THREADS & acpi_dbg_level) {
 			acpi_os_printf
 			    ("\n**** Context Switch from TID %u to TID %u ****\n\n",
-			     (u32)acpi_gbl_prev_thread_id, (u32)thread_id);
+			     (u32)acpi_gbl_previous_thread_id, (u32)thread_id);
 		}
 
-		acpi_gbl_prev_thread_id = thread_id;
+		acpi_gbl_previous_thread_id = thread_id;
 		acpi_gbl_nesting_level = 0;
 	}
 
@@ -287,7 +283,8 @@
 	if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
-				 component_id, "%s\n", acpi_gbl_fn_entry_str);
+				 component_id, "%s\n",
+				 acpi_gbl_function_entry_prefix);
 	}
 }
 
@@ -312,7 +309,8 @@
 void
 acpi_ut_trace_ptr(u32 line_number,
 		  const char *function_name,
-		  const char *module_name, u32 component_id, void *pointer)
+		  const char *module_name,
+		  u32 component_id, const void *pointer)
 {
 
 	acpi_gbl_nesting_level++;
@@ -323,8 +321,8 @@
 	if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
-				 component_id, "%s %p\n", acpi_gbl_fn_entry_str,
-				 pointer);
+				 component_id, "%s %p\n",
+				 acpi_gbl_function_entry_prefix, pointer);
 	}
 }
 
@@ -348,7 +346,7 @@
 void
 acpi_ut_trace_str(u32 line_number,
 		  const char *function_name,
-		  const char *module_name, u32 component_id, char *string)
+		  const char *module_name, u32 component_id, const char *string)
 {
 
 	acpi_gbl_nesting_level++;
@@ -359,8 +357,8 @@
 	if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
-				 component_id, "%s %s\n", acpi_gbl_fn_entry_str,
-				 string);
+				 component_id, "%s %s\n",
+				 acpi_gbl_function_entry_prefix, string);
 	}
 }
 
@@ -396,7 +394,7 @@
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
 				 component_id, "%s %08X\n",
-				 acpi_gbl_fn_entry_str, integer);
+				 acpi_gbl_function_entry_prefix, integer);
 	}
 }
 
@@ -427,7 +425,8 @@
 	if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
-				 component_id, "%s\n", acpi_gbl_fn_exit_str);
+				 component_id, "%s\n",
+				 acpi_gbl_function_exit_prefix);
 	}
 
 	if (acpi_gbl_nesting_level) {
@@ -467,14 +466,14 @@
 			acpi_debug_print(ACPI_LV_FUNCTIONS,
 					 line_number, function_name,
 					 module_name, component_id, "%s %s\n",
-					 acpi_gbl_fn_exit_str,
+					 acpi_gbl_function_exit_prefix,
 					 acpi_format_exception(status));
 		} else {
 			acpi_debug_print(ACPI_LV_FUNCTIONS,
 					 line_number, function_name,
 					 module_name, component_id,
 					 "%s ****Exception****: %s\n",
-					 acpi_gbl_fn_exit_str,
+					 acpi_gbl_function_exit_prefix,
 					 acpi_format_exception(status));
 		}
 	}
@@ -514,7 +513,7 @@
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
 				 component_id, "%s %8.8X%8.8X\n",
-				 acpi_gbl_fn_exit_str,
+				 acpi_gbl_function_exit_prefix,
 				 ACPI_FORMAT_UINT64(value));
 	}
 
@@ -552,8 +551,8 @@
 	if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
 		acpi_debug_print(ACPI_LV_FUNCTIONS,
 				 line_number, function_name, module_name,
-				 component_id, "%s %p\n", acpi_gbl_fn_exit_str,
-				 ptr);
+				 component_id, "%s %p\n",
+				 acpi_gbl_function_exit_prefix, ptr);
 	}
 
 	if (acpi_gbl_nesting_level) {

diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 6ba65b0..efd7988 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c

@@ -446,7 +446,7 @@
 
 /* Names for Notify() values, used for debug output */
 
-static const char *acpi_gbl_generic_notify[ACPI_NOTIFY_MAX + 1] = {
+static const char *acpi_gbl_generic_notify[ACPI_GENERIC_NOTIFY_MAX + 1] = {
 	/* 00 */ "Bus Check",
 	/* 01 */ "Device Check",
 	/* 02 */ "Device Wake",
@@ -459,49 +459,53 @@
 	/* 09 */ "Device PLD Check",
 	/* 0A */ "Reserved",
 	/* 0B */ "System Locality Update",
-	/* 0C */ "Shutdown Request",
+					/* 0C */ "Shutdown Request",
+					/* Reserved in ACPI 6.0 */
 	/* 0D */ "System Resource Affinity Update"
 };
 
-static const char *acpi_gbl_device_notify[4] = {
+static const char *acpi_gbl_device_notify[5] = {
 	/* 80 */ "Status Change",
 	/* 81 */ "Information Change",
 	/* 82 */ "Device-Specific Change",
-	/* 83 */ "Device-Specific Change"
+	/* 83 */ "Device-Specific Change",
+	/* 84 */ "Reserved"
 };
 
-static const char *acpi_gbl_processor_notify[4] = {
+static const char *acpi_gbl_processor_notify[5] = {
 	/* 80 */ "Performance Capability Change",
 	/* 81 */ "C-State Change",
 	/* 82 */ "Throttling Capability Change",
-	/* 83 */ "Device-Specific Change"
+	/* 83 */ "Guaranteed Change",
+	/* 84 */ "Minimum Excursion"
 };
 
-static const char *acpi_gbl_thermal_notify[4] = {
+static const char *acpi_gbl_thermal_notify[5] = {
 	/* 80 */ "Thermal Status Change",
 	/* 81 */ "Thermal Trip Point Change",
 	/* 82 */ "Thermal Device List Change",
-	/* 83 */ "Thermal Relationship Change"
+	/* 83 */ "Thermal Relationship Change",
+	/* 84 */ "Reserved"
 };
 
 const char *acpi_ut_get_notify_name(u32 notify_value, acpi_object_type type)
 {
 
-	/* 00 - 0D are common to all object types */
+	/* 00 - 0D are "common to all object types" (from ACPI Spec) */
 
-	if (notify_value <= ACPI_NOTIFY_MAX) {
+	if (notify_value <= ACPI_GENERIC_NOTIFY_MAX) {
 		return (acpi_gbl_generic_notify[notify_value]);
 	}
 
-	/* 0D - 7F are reserved */
+	/* 0E - 7F are reserved */
 
 	if (notify_value <= ACPI_MAX_SYS_NOTIFY) {
 		return ("Reserved");
 	}
 
-	/* 80 - 83 are per-object-type */
+	/* 80 - 84 are per-object-type */
 
-	if (notify_value <= 0x83) {
+	if (notify_value <= ACPI_SPECIFIC_NOTIFY_MAX) {
 		switch (type) {
 		case ACPI_TYPE_ANY:
 		case ACPI_TYPE_DEVICE:

diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 17b9f3e..7bad13f 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c

@@ -69,7 +69,7 @@
 
 acpi_status
 acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
-			char *path,
+			const char *path,
 			u32 expected_return_btypes,
 			union acpi_operand_object **return_desc)
 {
@@ -204,7 +204,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ut_evaluate_numeric_object(char *object_name,
+acpi_ut_evaluate_numeric_object(const char *object_name,
 				struct acpi_namespace_node *device_node,
 				u64 *value)
 {

diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 48fffcf..dd3fd7f 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c

@@ -80,6 +80,11 @@
 	"_S4D"
 };
 
+/* Hex-to-ascii */
+
+const char acpi_gbl_lower_hex_digits[] = "0123456789abcdef";
+const char acpi_gbl_upper_hex_digits[] = "0123456789ABCDEF";
+
 /*******************************************************************************
  *
  * Namespace globals
@@ -221,6 +226,49 @@
 };
 #endif				/* !ACPI_REDUCED_HARDWARE */
 
+#if defined (ACPI_DISASSEMBLER) || defined (ACPI_ASL_COMPILER)
+
+/* to_pld macro: compile/disassemble strings */
+
+const char *acpi_gbl_pld_panel_list[] = {
+	"TOP",
+	"BOTTOM",
+	"LEFT",
+	"RIGHT",
+	"FRONT",
+	"BACK",
+	"UNKNOWN",
+	NULL
+};
+
+const char *acpi_gbl_pld_vertical_position_list[] = {
+	"UPPER",
+	"CENTER",
+	"LOWER",
+	NULL
+};
+
+const char *acpi_gbl_pld_horizontal_position_list[] = {
+	"LEFT",
+	"CENTER",
+	"RIGHT",
+	NULL
+};
+
+const char *acpi_gbl_pld_shape_list[] = {
+	"ROUND",
+	"OVAL",
+	"SQUARE",
+	"VERTICALRECTANGLE",
+	"HORIZONTALRECTANGLE",
+	"VERTICALTRAPEZOID",
+	"HORIZONTALTRAPEZOID",
+	"UNKNOWN",
+	"CHAMFERED",
+	NULL
+};
+#endif
+
 /* Public globals */
 
 ACPI_EXPORT_SYMBOL(acpi_gbl_FADT)

diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
index 6fb4ec3..f7cd2d5 100644
--- a/drivers/acpi/acpica/utids.c
+++ b/drivers/acpi/acpica/utids.c

@@ -95,7 +95,7 @@
 
 	hid =
 	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
-				 (acpi_size) length);
+				 (acpi_size)length);
 	if (!hid) {
 		status = AE_NO_MEMORY;
 		goto cleanup;
@@ -173,7 +173,7 @@
 
 	uid =
 	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
-				 (acpi_size) length);
+				 (acpi_size)length);
 	if (!uid) {
 		status = AE_NO_MEMORY;
 		goto cleanup;
@@ -309,7 +309,7 @@
 	/* Area for CID strings starts after the CID PNP_DEVICE_ID array */
 
 	next_id_string = ACPI_CAST_PTR(char, cid_list->ids) +
-	    ((acpi_size) count * sizeof(struct acpi_pnp_device_id));
+	    ((acpi_size)count * sizeof(struct acpi_pnp_device_id));
 
 	/* Copy/convert the CIDs to the return buffer */
 
@@ -413,7 +413,7 @@
 
 	cls =
 	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
-				 (acpi_size) length);
+				 (acpi_size)length);
 	if (!cls) {
 		status = AE_NO_MEMORY;
 		goto cleanup;

diff --git a/drivers/acpi/acpica/utmath.c b/drivers/acpi/acpica/utmath.c
index 6673720..2d6530e 100644
--- a/drivers/acpi/acpica/utmath.c
+++ b/drivers/acpi/acpica/utmath.c

@@ -236,8 +236,8 @@
 			}
 
 			remainder.full = remainder.full - dividend.full;
-			remainder.part.hi = (u32) - ((s32) remainder.part.hi);
-			remainder.part.lo = (u32) - ((s32) remainder.part.lo);
+			remainder.part.hi = (u32)-((s32)remainder.part.hi);
+			remainder.part.lo = (u32)-((s32)remainder.part.lo);
 
 			if (remainder.part.lo) {
 				remainder.part.hi--;

diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index d938c27..389de3b 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c

@@ -361,7 +361,7 @@
 void
 acpi_ut_display_init_pathname(u8 type,
 			      struct acpi_namespace_node *obj_handle,
-			      char *path)
+			      const char *path)
 {
 	acpi_status status;
 	struct acpi_buffer buffer;

diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c
index d5c3adf..3465fe2 100644
--- a/drivers/acpi/acpica/utnonansi.c
+++ b/drivers/acpi/acpica/utnonansi.c

@@ -205,37 +205,41 @@
  *
  * FUNCTION:    acpi_ut_strtoul64
  *
- * PARAMETERS:  string          - Null terminated string
- *              base            - Radix of the string: 16 or ACPI_ANY_BASE;
- *                                ACPI_ANY_BASE means 'in behalf of to_integer'
- *              ret_integer     - Where the converted integer is returned
+ * PARAMETERS:  string                  - Null terminated string
+ *              base                    - Radix of the string: 16 or 10 or
+ *                                        ACPI_ANY_BASE
+ *              max_integer_byte_width  - Maximum allowable integer,in bytes:
+ *                                        4 or 8 (32 or 64 bits)
+ *              ret_integer             - Where the converted integer is
+ *                                        returned
  *
  * RETURN:      Status and Converted value
  *
  * DESCRIPTION: Convert a string into an unsigned value. Performs either a
- *              32-bit or 64-bit conversion, depending on the current mode
- *              of the interpreter.
+ *              32-bit or 64-bit conversion, depending on the input integer
+ *              size (often the current mode of the interpreter).
  *
- * NOTES:       acpi_gbl_integer_byte_width should be set to the proper width.
+ * NOTES:       Negative numbers are not supported, as they are not supported
+ *              by ACPI.
+ *
+ *              acpi_gbl_integer_byte_width should be set to the proper width.
  *              For the core ACPICA code, this width depends on the DSDT
- *              version. For iASL, the default byte width is always 8.
+ *              version. For iASL, the default byte width is always 8 for the
+ *              parser, but error checking is performed later to flag cases
+ *              where a 64-bit constant is defined in a 32-bit DSDT/SSDT.
  *
  *              Does not support Octal strings, not needed at this time.
  *
- *              There is an earlier version of the function after this one,
- *              below. It is slightly different than this one, and the two
- *              may eventually may need to be merged. (01/2016).
- *
  ******************************************************************************/
 
-acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
+acpi_status
+acpi_ut_strtoul64(char *string,
+		  u32 base, u32 max_integer_byte_width, u64 *ret_integer)
 {
 	u32 this_digit = 0;
 	u64 return_value = 0;
 	u64 quotient;
 	u64 dividend;
-	u32 to_integer_op = (base == ACPI_ANY_BASE);
-	u32 mode32 = (acpi_gbl_integer_byte_width == 4);
 	u8 valid_digits = 0;
 	u8 sign_of0x = 0;
 	u8 term = 0;
@@ -244,6 +248,7 @@
 
 	switch (base) {
 	case ACPI_ANY_BASE:
+	case 10:
 	case 16:
 
 		break;
@@ -265,9 +270,9 @@
 		string++;
 	}
 
-	if (to_integer_op) {
+	if (base == ACPI_ANY_BASE) {
 		/*
-		 * Base equal to ACPI_ANY_BASE means 'ToInteger operation case'.
+		 * Base equal to ACPI_ANY_BASE means 'Either decimal or hex'.
 		 * We need to determine if it is decimal or hexadecimal.
 		 */
 		if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) {
@@ -284,7 +289,7 @@
 	/* Any string left? Check that '0x' is not followed by white space. */
 
 	if (!(*string) || isspace((int)*string) || *string == '\t') {
-		if (to_integer_op) {
+		if (base == ACPI_ANY_BASE) {
 			goto error_exit;
 		} else {
 			goto all_done;
@@ -292,10 +297,11 @@
 	}
 
 	/*
-	 * Perform a 32-bit or 64-bit conversion, depending upon the current
-	 * execution mode of the interpreter
+	 * Perform a 32-bit or 64-bit conversion, depending upon the input
+	 * byte width
 	 */
-	dividend = (mode32) ? ACPI_UINT32_MAX : ACPI_UINT64_MAX;
+	dividend = (max_integer_byte_width <= ACPI_MAX32_BYTE_WIDTH) ?
+	    ACPI_UINT32_MAX : ACPI_UINT64_MAX;
 
 	/* Main loop: convert the string to a 32- or 64-bit integer */
 
@@ -323,7 +329,7 @@
 		}
 
 		if (term) {
-			if (to_integer_op) {
+			if (base == ACPI_ANY_BASE) {
 				goto error_exit;
 			} else {
 				break;
@@ -338,12 +344,13 @@
 
 		valid_digits++;
 
-		if (sign_of0x
-		    && ((valid_digits > 16)
-			|| ((valid_digits > 8) && mode32))) {
+		if (sign_of0x && ((valid_digits > 16) ||
+				  ((valid_digits > 8)
+				   && (max_integer_byte_width <=
+				       ACPI_MAX32_BYTE_WIDTH)))) {
 			/*
 			 * This is to_integer operation case.
-			 * No any restrictions for string-to-integer conversion,
+			 * No restrictions for string-to-integer conversion,
 			 * see ACPI spec.
 			 */
 			goto error_exit;
@@ -355,7 +362,7 @@
 					   &quotient, NULL);
 
 		if (return_value > quotient) {
-			if (to_integer_op) {
+			if (base == ACPI_ANY_BASE) {
 				goto error_exit;
 			} else {
 				break;
@@ -378,7 +385,8 @@
 	return_ACPI_STATUS(AE_OK);
 
 error_exit:
-	/* Base was set/validated above */
+
+	/* Base was set/validated above (10 or 16) */
 
 	if (base == 10) {
 		return_ACPI_STATUS(AE_BAD_DECIMAL_CONSTANT);
@@ -388,8 +396,7 @@
 }
 
 #ifdef _OBSOLETE_FUNCTIONS
-/* TBD: use version in ACPICA main code base? */
-/* DONE: 01/2016 */
+/* Removed: 01/2016 */
 
 /*******************************************************************************
  *

diff --git a/drivers/acpi/acpica/utobject.c b/drivers/acpi/acpica/utobject.c
index edad3f0..72b9a06 100644
--- a/drivers/acpi/acpica/utobject.c
+++ b/drivers/acpi/acpica/utobject.c

@@ -51,11 +51,11 @@
 /* Local prototypes */
 static acpi_status
 acpi_ut_get_simple_object_size(union acpi_operand_object *obj,
-			       acpi_size * obj_length);
+			       acpi_size *obj_length);
 
 static acpi_status
 acpi_ut_get_package_object_size(union acpi_operand_object *obj,
-				acpi_size * obj_length);
+				acpi_size *obj_length);
 
 static acpi_status
 acpi_ut_get_element_length(u8 object_type,
@@ -177,7 +177,7 @@
 	 * Create the element array. Count+1 allows the array to be null
 	 * terminated.
 	 */
-	package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size) count +
+	package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size)count +
 						 1) * sizeof(void *));
 	if (!package_elements) {
 		ACPI_FREE(package_desc);
@@ -454,7 +454,7 @@
 
 static acpi_status
 acpi_ut_get_simple_object_size(union acpi_operand_object *internal_object,
-			       acpi_size * obj_length)
+			       acpi_size *obj_length)
 {
 	acpi_size length;
 	acpi_size size;
@@ -495,12 +495,12 @@
 	switch (internal_object->common.type) {
 	case ACPI_TYPE_STRING:
 
-		length += (acpi_size) internal_object->string.length + 1;
+		length += (acpi_size)internal_object->string.length + 1;
 		break;
 
 	case ACPI_TYPE_BUFFER:
 
-		length += (acpi_size) internal_object->buffer.length;
+		length += (acpi_size)internal_object->buffer.length;
 		break;
 
 	case ACPI_TYPE_INTEGER:
@@ -640,7 +640,7 @@
 
 static acpi_status
 acpi_ut_get_package_object_size(union acpi_operand_object *internal_object,
-				acpi_size * obj_length)
+				acpi_size *obj_length)
 {
 	acpi_status status;
 	struct acpi_pkg_info info;
@@ -665,7 +665,7 @@
 	 */
 	info.length +=
 	    ACPI_ROUND_UP_TO_NATIVE_WORD(sizeof(union acpi_object)) *
-	    (acpi_size) info.num_packages;
+	    (acpi_size)info.num_packages;
 
 	/* Return the total package length */
 
@@ -689,7 +689,7 @@
 
 acpi_status
 acpi_ut_get_object_size(union acpi_operand_object *internal_object,
-			acpi_size * obj_length)
+			acpi_size *obj_length)
 {
 	acpi_status status;
 

diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index b5cfe57..3f5fed6 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c

@@ -150,7 +150,7 @@
 	     i < (ACPI_ARRAY_LENGTH(acpi_default_supported_interfaces) - 1);
 	     i++) {
 		acpi_default_supported_interfaces[i].next =
-		    &acpi_default_supported_interfaces[(acpi_size) i + 1];
+		    &acpi_default_supported_interfaces[(acpi_size)i + 1];
 	}
 
 	acpi_os_release_mutex(acpi_gbl_osi_mutex);
@@ -397,7 +397,7 @@
  *
  ******************************************************************************/
 
-acpi_status acpi_ut_osi_implementation(struct acpi_walk_state * walk_state)
+acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
 {
 	union acpi_operand_object *string_desc;
 	union acpi_operand_object *return_desc;

diff --git a/drivers/acpi/acpica/utownerid.c b/drivers/acpi/acpica/utownerid.c
index 813520a..3cd573c 100644
--- a/drivers/acpi/acpica/utownerid.c
+++ b/drivers/acpi/acpica/utownerid.c

@@ -61,7 +61,7 @@
  *              when the method exits or the table is unloaded.
  *
  ******************************************************************************/
-acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id)
+acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id)
 {
 	u32 i;
 	u32 j;
@@ -122,7 +122,7 @@
 				 * permanently allocated (prevents +1 overflow)
 				 */
 				*owner_id =
-				    (acpi_owner_id) ((k + 1) + ACPI_MUL_32(j));
+				    (acpi_owner_id)((k + 1) + ACPI_MUL_32(j));
 
 				ACPI_DEBUG_PRINT((ACPI_DB_VALUES,
 						  "Allocated OwnerId: %2.2X\n",
@@ -167,7 +167,7 @@
  *
  ******************************************************************************/
 
-void acpi_ut_release_owner_id(acpi_owner_id * owner_id_ptr)
+void acpi_ut_release_owner_id(acpi_owner_id *owner_id_ptr)
 {
 	acpi_owner_id owner_id = *owner_id_ptr;
 	acpi_status status;

diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index 8c218ad..dd084cf 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c

@@ -67,11 +67,6 @@
 
 static char *acpi_ut_put_number(char *string, u64 number, u8 base, u8 upper);
 
-/* Module globals */
-
-static const char acpi_gbl_lower_hex_digits[] = "0123456789abcdef";
-static const char acpi_gbl_upper_hex_digits[] = "0123456789ABCDEF";
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_bound_string_length
@@ -269,9 +264,9 @@
 
 	sign = '\0';
 	if (type & ACPI_FORMAT_SIGN) {
-		if ((s64) number < 0) {
+		if ((s64)number < 0) {
 			sign = '-';
-			number = -(s64) number;
+			number = -(s64)number;
 			width--;
 		} else if (type & ACPI_FORMAT_SIGN_PLUS) {
 			sign = '+';
@@ -409,7 +404,7 @@
 		width = -1;
 		if (isdigit((int)*format)) {
 			format = acpi_ut_scan_number(format, &number);
-			width = (s32) number;
+			width = (s32)number;
 		} else if (*format == '*') {
 			++format;
 			width = va_arg(args, int);
@@ -426,7 +421,7 @@
 			++format;
 			if (isdigit((int)*format)) {
 				format = acpi_ut_scan_number(format, &number);
-				precision = (s32) number;
+				precision = (s32)number;
 			} else if (*format == '*') {
 				++format;
 				precision = va_arg(args, int);
@@ -555,17 +550,17 @@
 		if (qualifier == 'L') {
 			number = va_arg(args, u64);
 			if (type & ACPI_FORMAT_SIGN) {
-				number = (s64) number;
+				number = (s64)number;
 			}
 		} else if (qualifier == 'l') {
 			number = va_arg(args, unsigned long);
 			if (type & ACPI_FORMAT_SIGN) {
-				number = (s32) number;
+				number = (s32)number;
 			}
 		} else if (qualifier == 'h') {
 			number = (u16)va_arg(args, int);
 			if (type & ACPI_FORMAT_SIGN) {
-				number = (s16) number;
+				number = (s16)number;
 			}
 		} else {
 			number = va_arg(args, unsigned int);

diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c
index 0b00572..288913a 100644
--- a/drivers/acpi/acpica/utstring.c
+++ b/drivers/acpi/acpica/utstring.c

@@ -130,7 +130,7 @@
 			} else {
 				/* All others will be Hex escapes */
 
-				acpi_os_printf("\\x%2.2X", (s32) string[i]);
+				acpi_os_printf("\\x%2.2X", (s32)string[i]);
 			}
 			break;
 		}
@@ -145,73 +145,6 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_valid_acpi_char
- *
- * PARAMETERS:  char            - The character to be examined
- *              position        - Byte position (0-3)
- *
- * RETURN:      TRUE if the character is valid, FALSE otherwise
- *
- * DESCRIPTION: Check for a valid ACPI character. Must be one of:
- *              1) Upper case alpha
- *              2) numeric
- *              3) underscore
- *
- *              We allow a '!' as the last character because of the ASF! table
- *
- ******************************************************************************/
-
-u8 acpi_ut_valid_acpi_char(char character, u32 position)
-{
-
-	if (!((character >= 'A' && character <= 'Z') ||
-	      (character >= '0' && character <= '9') || (character == '_'))) {
-
-		/* Allow a '!' in the last position */
-
-		if (character == '!' && position == 3) {
-			return (TRUE);
-		}
-
-		return (FALSE);
-	}
-
-	return (TRUE);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_valid_acpi_name
- *
- * PARAMETERS:  name            - The name to be examined. Does not have to
- *                                be NULL terminated string.
- *
- * RETURN:      TRUE if the name is valid, FALSE otherwise
- *
- * DESCRIPTION: Check for a valid ACPI name. Each character must be one of:
- *              1) Upper case alpha
- *              2) numeric
- *              3) underscore
- *
- ******************************************************************************/
-
-u8 acpi_ut_valid_acpi_name(char *name)
-{
-	u32 i;
-
-	ACPI_FUNCTION_ENTRY();
-
-	for (i = 0; i < ACPI_NAME_SIZE; i++) {
-		if (!acpi_ut_valid_acpi_char(name[i], i)) {
-			return (FALSE);
-		}
-	}
-
-	return (TRUE);
-}
-
-/*******************************************************************************
- *
  * FUNCTION:    acpi_ut_repair_name
  *
  * PARAMETERS:  name            - The ACPI name to be repaired
@@ -253,7 +186,7 @@
 	/* Check each character in the name */
 
 	for (i = 0; i < ACPI_NAME_SIZE; i++) {
-		if (acpi_ut_valid_acpi_char(name[i], i)) {
+		if (acpi_ut_valid_name_char(name[i], i)) {
 			continue;
 		}
 

diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index 60c406a..0df07df 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c

@@ -90,7 +90,7 @@
  ******************************************************************************/
 
 acpi_status
-acpi_ut_create_list(char *list_name,
+acpi_ut_create_list(const char *list_name,
 		    u16 object_size, struct acpi_memory_list **return_cache)
 {
 	struct acpi_memory_list *cache;

diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index 68d4673..d9e6aac 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c

@@ -127,7 +127,7 @@
  *              and the value of out_buffer is undefined.
  *
  ******************************************************************************/
-acpi_status acpi_get_system_info(struct acpi_buffer * out_buffer)
+acpi_status acpi_get_system_info(struct acpi_buffer *out_buffer)
 {
 	struct acpi_system_info *info_ptr;
 	acpi_status status;
@@ -483,7 +483,7 @@
  ******************************************************************************/
 acpi_status
 acpi_decode_pld_buffer(u8 *in_buffer,
-		       acpi_size length, struct acpi_pld_info ** return_buffer)
+		       acpi_size length, struct acpi_pld_info **return_buffer)
 {
 	struct acpi_pld_info *pld_info;
 	u32 *buffer = ACPI_CAST_PTR(u32, in_buffer);

diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 96809cd..bdc67ba 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c

@@ -3,7 +3,7 @@
  *
  *  Check to see if the given machine has a known bad ACPI BIOS
  *  or if the BIOS is too old.
- *  Check given machine against acpi_osi_dmi_table[].
+ *  Check given machine against acpi_rev_dmi_table[].
  *
  *  Copyright (C) 2004 Len Brown <len.brown@intel.com>
  *  Copyright (C) 2002 Andy Grover <andrew.grover@intel.com>
@@ -47,7 +47,7 @@
 	u32 is_critical_error;
 };
 
-static struct dmi_system_id acpi_osi_dmi_table[] __initdata;
+static struct dmi_system_id acpi_rev_dmi_table[] __initdata;
 
 /*
  * POLICY: If *anything* doesn't work, put it on the blacklist.
@@ -128,36 +128,12 @@
 		}
 	}
 
-	dmi_check_system(acpi_osi_dmi_table);
+	(void)early_acpi_osi_init();
+	dmi_check_system(acpi_rev_dmi_table);
 
 	return blacklisted;
 }
 #ifdef CONFIG_DMI
-static int __init dmi_enable_osi_linux(const struct dmi_system_id *d)
-{
-	acpi_dmi_osi_linux(1, d);	/* enable */
-	return 0;
-}
-static int __init dmi_disable_osi_vista(const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
-	acpi_osi_setup("!Windows 2006");
-	acpi_osi_setup("!Windows 2006 SP1");
-	acpi_osi_setup("!Windows 2006 SP2");
-	return 0;
-}
-static int __init dmi_disable_osi_win7(const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
-	acpi_osi_setup("!Windows 2009");
-	return 0;
-}
-static int __init dmi_disable_osi_win8(const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
-	acpi_osi_setup("!Windows 2012");
-	return 0;
-}
 #ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
 static int __init dmi_enable_rev_override(const struct dmi_system_id *d)
 {
@@ -168,169 +144,7 @@
 }
 #endif
 
-static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "Fujitsu Siemens",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
-		},
-	},
-	{
-	/*
-	 * There have a NVIF method in MSI GX723 DSDT need call by Nvidia
-	 * driver (e.g. nouveau) when user press brightness hotkey.
-	 * Currently, nouveau driver didn't do the job and it causes there
-	 * have a infinite while loop in DSDT when user press hotkey.
-	 * We add MSI GX723's dmi information to this table for workaround
-	 * this issue.
-	 * Will remove MSI GX723 from the table after nouveau grows support.
-	 */
-	.callback = dmi_disable_osi_vista,
-	.ident = "MSI GX723",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "GX723"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "Sony VGN-NS10J_S",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS10J_S"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "Sony VGN-SR290J",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR290J"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "VGN-NS50B_L",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS50B_L"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "VGN-SR19XN",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR19XN"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "Toshiba Satellite L355",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "Satellite L355"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_win7,
-	.ident = "ASUS K50IJ",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "Toshiba P305D",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_vista,
-	.ident = "Toshiba NB100",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "NB100"),
-		},
-	},
-
-	/*
-	 * The wireless hotkey does not work on those machines when
-	 * returning true for _OSI("Windows 2012")
-	 */
-	{
-	.callback = dmi_disable_osi_win8,
-	.ident = "Dell Inspiron 7737",
-	.matches = {
-		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7737"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_win8,
-	.ident = "Dell Inspiron 7537",
-	.matches = {
-		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7537"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_win8,
-	.ident = "Dell Inspiron 5437",
-	.matches = {
-		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5437"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_win8,
-	.ident = "Dell Inspiron 3437",
-	.matches = {
-		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 3437"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_win8,
-	.ident = "Dell Vostro 3446",
-	.matches = {
-		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		    DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3446"),
-		},
-	},
-	{
-	.callback = dmi_disable_osi_win8,
-	.ident = "Dell Vostro 3546",
-	.matches = {
-		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-		    DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3546"),
-		},
-	},
-
-	/*
-	 * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
-	 * Linux ignores it, except for the machines enumerated below.
-	 */
-
-	/*
-	 * Without this this EEEpc exports a non working WMI interface, with
-	 * this it exports a working "good old" eeepc_laptop interface, fixing
-	 * both brightness control, and rfkill not working.
-	 */
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Asus EEE PC 1015PX",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer INC."),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"),
-		},
-	},
-
+static struct dmi_system_id acpi_rev_dmi_table[] __initdata = {
 #ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
 	/*
 	 * DELL XPS 13 (2015) switches sound between HDA and I2S

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index c068c82..31e8da6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c

@@ -925,11 +925,13 @@
 		goto error0;
 	}
 
-	status = acpi_load_tables();
-	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR PREFIX
-		       "Unable to load the System Description Tables\n");
-		goto error0;
+	if (acpi_gbl_group_module_level_code) {
+		status = acpi_load_tables();
+		if (ACPI_FAILURE(status)) {
+			printk(KERN_ERR PREFIX
+			       "Unable to load the System Description Tables\n");
+			goto error0;
+		}
 	}
 
 #ifdef CONFIG_X86
@@ -995,17 +997,10 @@
 
 	acpi_os_initialize1();
 
-	status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE);
-	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR PREFIX
-		       "Unable to start the ACPI Interpreter\n");
-		goto error1;
-	}
-
 	/*
 	 * ACPI 2.0 requires the EC driver to be loaded and work before
-	 * the EC device is found in the namespace (i.e. before acpi_initialize_objects()
-	 * is called).
+	 * the EC device is found in the namespace (i.e. before
+	 * acpi_load_tables() is called).
 	 *
 	 * This is accomplished by looking for the ECDT table, and getting
 	 * the EC parameters out of that.
@@ -1013,6 +1008,22 @@
 	status = acpi_ec_ecdt_probe();
 	/* Ignore result. Not having an ECDT is not fatal. */
 
+	if (!acpi_gbl_group_module_level_code) {
+		status = acpi_load_tables();
+		if (ACPI_FAILURE(status)) {
+			printk(KERN_ERR PREFIX
+			       "Unable to load the System Description Tables\n");
+			goto error1;
+		}
+	}
+
+	status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR PREFIX
+		       "Unable to start the ACPI Interpreter\n");
+		goto error1;
+	}
+
 	status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
 	if (ACPI_FAILURE(status)) {
 		printk(KERN_ERR PREFIX "Unable to initialize ACPI objects\n");

diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index b9afb47..7b2c48f 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c

@@ -35,7 +35,7 @@
 	if (result)
 		return result;
 
-	result = sprintf(buf, "%s\n", (char*)path.pointer);
+	result = sprintf(buf, "%s\n", (char *)path.pointer);
 	kfree(path.pointer);
 	return result;
 }
@@ -333,7 +333,8 @@
 EXPORT_SYMBOL_GPL(acpi_device_modalias);
 
 static ssize_t
-acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) {
+acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
 	return __acpi_device_modalias(to_acpi_device(dev), buf, 1024);
 }
 static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL);
@@ -397,7 +398,8 @@
 static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store);
 
 static ssize_t
-acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf) {
+acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
 
 	return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev));
@@ -467,12 +469,27 @@
 
 	status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun);
 	if (ACPI_FAILURE(status))
-		return -ENODEV;
+		return -EIO;
 
 	return sprintf(buf, "%llu\n", sun);
 }
 static DEVICE_ATTR(sun, 0444, acpi_device_sun_show, NULL);
 
+static ssize_t
+acpi_device_hrv_show(struct device *dev, struct device_attribute *attr,
+		     char *buf) {
+	struct acpi_device *acpi_dev = to_acpi_device(dev);
+	acpi_status status;
+	unsigned long long hrv;
+
+	status = acpi_evaluate_integer(acpi_dev->handle, "_HRV", NULL, &hrv);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	return sprintf(buf, "%llu\n", hrv);
+}
+static DEVICE_ATTR(hrv, 0444, acpi_device_hrv_show, NULL);
+
 static ssize_t status_show(struct device *dev, struct device_attribute *attr,
 				char *buf) {
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
@@ -481,7 +498,7 @@
 
 	status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta);
 	if (ACPI_FAILURE(status))
-		return -ENODEV;
+		return -EIO;
 
 	return sprintf(buf, "%llu\n", sta);
 }
@@ -541,16 +558,22 @@
 			goto end;
 	}
 
+	if (acpi_has_method(dev->handle, "_HRV")) {
+		result = device_create_file(&dev->dev, &dev_attr_hrv);
+		if (result)
+			goto end;
+	}
+
 	if (acpi_has_method(dev->handle, "_STA")) {
 		result = device_create_file(&dev->dev, &dev_attr_status);
 		if (result)
 			goto end;
 	}
 
-        /*
-         * If device has _EJ0, 'eject' file is created that is used to trigger
-         * hot-removal function from userland.
-         */
+	/*
+	 * If device has _EJ0, 'eject' file is created that is used to trigger
+	 * hot-removal function from userland.
+	 */
 	if (acpi_has_method(dev->handle, "_EJ0")) {
 		result = device_create_file(&dev->dev, &dev_attr_eject);
 		if (result)
@@ -604,6 +627,9 @@
 	if (acpi_has_method(dev->handle, "_SUN"))
 		device_remove_file(&dev->dev, &dev_attr_sun);
 
+	if (acpi_has_method(dev->handle, "_HRV"))
+		device_remove_file(&dev->dev, &dev_attr_hrv);
+
 	if (dev->pnp.unique_id)
 		device_remove_file(&dev->dev, &dev_attr_uid);
 	if (dev->pnp.type.bus_address)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index b420fb4..0e70181 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c

@@ -105,8 +105,8 @@
 enum {
 	EC_FLAGS_QUERY_PENDING,		/* Query is pending */
 	EC_FLAGS_QUERY_GUARDING,	/* Guard for SCI_EVT check */
-	EC_FLAGS_HANDLERS_INSTALLED,	/* Handlers for GPE and
-					 * OpReg are installed */
+	EC_FLAGS_GPE_HANDLER_INSTALLED,	/* GPE handler installed */
+	EC_FLAGS_EC_HANDLER_INSTALLED,	/* OpReg handler installed */
 	EC_FLAGS_STARTED,		/* Driver is started */
 	EC_FLAGS_STOPPED,		/* Driver is stopped */
 	EC_FLAGS_COMMAND_STORM,		/* GPE storms occurred to the
@@ -175,10 +175,9 @@
 struct acpi_ec *boot_ec, *first_ec;
 EXPORT_SYMBOL(first_ec);
 
-static int EC_FLAGS_VALIDATE_ECDT; /* ASUStec ECDTs need to be validated */
-static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */
 static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
 static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
+static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */
 
 /* --------------------------------------------------------------------------
  *                           Logging/Debugging
@@ -367,7 +366,8 @@
 static void acpi_ec_submit_request(struct acpi_ec *ec)
 {
 	ec->reference_count++;
-	if (ec->reference_count == 1)
+	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags) &&
+	    ec->reference_count == 1)
 		acpi_ec_enable_gpe(ec, true);
 }
 
@@ -376,7 +376,8 @@
 	bool flushed = false;
 
 	ec->reference_count--;
-	if (ec->reference_count == 0)
+	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags) &&
+	    ec->reference_count == 0)
 		acpi_ec_disable_gpe(ec, true);
 	flushed = acpi_ec_flushed(ec);
 	if (flushed)
@@ -1287,52 +1288,64 @@
 {
 	acpi_status status;
 
-	if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
-		return 0;
-	status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
-				  ACPI_GPE_EDGE_TRIGGERED,
-				  &acpi_ec_gpe_handler, ec);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
 	acpi_ec_start(ec, false);
-	status = acpi_install_address_space_handler(ec->handle,
-						    ACPI_ADR_SPACE_EC,
-						    &acpi_ec_space_handler,
-						    NULL, ec);
-	if (ACPI_FAILURE(status)) {
-		if (status == AE_NOT_FOUND) {
-			/*
-			 * Maybe OS fails in evaluating the _REG object.
-			 * The AE_NOT_FOUND error will be ignored and OS
-			 * continue to initialize EC.
-			 */
-			pr_err("Fail in evaluating the _REG object"
-				" of EC device. Broken bios is suspected.\n");
-		} else {
-			acpi_ec_stop(ec, false);
-			acpi_remove_gpe_handler(NULL, ec->gpe,
-				&acpi_ec_gpe_handler);
-			return -ENODEV;
+
+	if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
+		status = acpi_install_address_space_handler(ec->handle,
+							    ACPI_ADR_SPACE_EC,
+							    &acpi_ec_space_handler,
+							    NULL, ec);
+		if (ACPI_FAILURE(status)) {
+			if (status == AE_NOT_FOUND) {
+				/*
+				 * Maybe OS fails in evaluating the _REG
+				 * object. The AE_NOT_FOUND error will be
+				 * ignored and OS * continue to initialize
+				 * EC.
+				 */
+				pr_err("Fail in evaluating the _REG object"
+					" of EC device. Broken bios is suspected.\n");
+			} else {
+				acpi_ec_stop(ec, false);
+				return -ENODEV;
+			}
+		}
+		set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
+	}
+
+	if (!test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
+		status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
+					  ACPI_GPE_EDGE_TRIGGERED,
+					  &acpi_ec_gpe_handler, ec);
+		/* This is not fatal as we can poll EC events */
+		if (ACPI_SUCCESS(status)) {
+			set_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
+			if (test_bit(EC_FLAGS_STARTED, &ec->flags) &&
+			    ec->reference_count >= 1)
+				acpi_ec_enable_gpe(ec, true);
 		}
 	}
 
-	set_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags);
 	return 0;
 }
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
-	if (!test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
-		return;
 	acpi_ec_stop(ec, false);
-	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
-				ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
-		pr_err("failed to remove space handler\n");
-	if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
-				&acpi_ec_gpe_handler)))
-		pr_err("failed to remove gpe handler\n");
-	clear_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags);
+
+	if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
+		if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
+					ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
+			pr_err("failed to remove space handler\n");
+		clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
+	}
+
+	if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
+		if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
+					&acpi_ec_gpe_handler)))
+			pr_err("failed to remove gpe handler\n");
+		clear_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
+	}
 }
 
 static int acpi_ec_add(struct acpi_device *device)
@@ -1344,11 +1357,12 @@
 	strcpy(acpi_device_class(device), ACPI_EC_CLASS);
 
 	/* Check for boot EC */
-	if (boot_ec &&
-	    (boot_ec->handle == device->handle ||
-	     boot_ec->handle == ACPI_ROOT_OBJECT)) {
+	if (boot_ec) {
 		ec = boot_ec;
 		boot_ec = NULL;
+		ec_remove_handlers(ec);
+		if (first_ec == ec)
+			first_ec = NULL;
 	} else {
 		ec = make_acpi_ec();
 		if (!ec)
@@ -1434,7 +1448,7 @@
 
 int __init acpi_boot_ec_enable(void)
 {
-	if (!boot_ec || test_bit(EC_FLAGS_HANDLERS_INSTALLED, &boot_ec->flags))
+	if (!boot_ec)
 		return 0;
 	if (!ec_install_handlers(boot_ec)) {
 		first_ec = boot_ec;
@@ -1448,20 +1462,6 @@
 	{"", 0},
 };
 
-/* Some BIOS do not survive early DSDT scan, skip it */
-static int ec_skip_dsdt_scan(const struct dmi_system_id *id)
-{
-	EC_FLAGS_SKIP_DSDT_SCAN = 1;
-	return 0;
-}
-
-/* ASUStek often supplies us with broken ECDT, validate it */
-static int ec_validate_ecdt(const struct dmi_system_id *id)
-{
-	EC_FLAGS_VALIDATE_ECDT = 1;
-	return 0;
-}
-
 #if 0
 /*
  * Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not
@@ -1503,30 +1503,29 @@
 	return 0;
 }
 
+static int ec_correct_ecdt(const struct dmi_system_id *id)
+{
+	pr_debug("Detected system needing ECDT address correction.\n");
+	EC_FLAGS_CORRECT_ECDT = 1;
+	return 0;
+}
+
 static struct dmi_system_id ec_dmi_table[] __initdata = {
 	{
-	ec_skip_dsdt_scan, "Compal JFL92", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
-	DMI_MATCH(DMI_BOARD_NAME, "JFL92") }, NULL},
+	ec_correct_ecdt, "Asus L4R", {
+	DMI_MATCH(DMI_BIOS_VERSION, "1008.006"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),
+	DMI_MATCH(DMI_BOARD_NAME, "L4R") }, NULL},
 	{
-	ec_validate_ecdt, "MSI MS-171F", {
+	ec_correct_ecdt, "Asus M6R", {
+	DMI_MATCH(DMI_BIOS_VERSION, "0207"),
+	DMI_MATCH(DMI_PRODUCT_NAME, "M6R"),
+	DMI_MATCH(DMI_BOARD_NAME, "M6R") }, NULL},
+	{
+	ec_correct_ecdt, "MSI MS-171F", {
 	DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star"),
 	DMI_MATCH(DMI_PRODUCT_NAME, "MS-171F"),}, NULL},
 	{
-	ec_validate_ecdt, "ASUS hardware", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL},
-	{
-	ec_validate_ecdt, "ASUS hardware", {
-	DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL},
-	{
-	ec_skip_dsdt_scan, "HP Folio 13", {
-	DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
-	DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13"),}, NULL},
-	{
-	ec_validate_ecdt, "ASUS hardware", {
-	DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek Computer Inc."),
-	DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),}, NULL},
-	{
 	ec_clear_on_resume, "Samsung hardware", {
 	DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL},
 	{},
@@ -1534,8 +1533,8 @@
 
 int __init acpi_ec_ecdt_probe(void)
 {
+	int ret = 0;
 	acpi_status status;
-	struct acpi_ec *saved_ec = NULL;
 	struct acpi_table_ecdt *ecdt_ptr;
 
 	boot_ec = make_acpi_ec();
@@ -1547,67 +1546,45 @@
 	dmi_check_system(ec_dmi_table);
 	status = acpi_get_table(ACPI_SIG_ECDT, 1,
 				(struct acpi_table_header **)&ecdt_ptr);
-	if (ACPI_SUCCESS(status)) {
-		pr_info("EC description table is found, configuring boot EC\n");
+	if (ACPI_FAILURE(status)) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	if (!ecdt_ptr->control.address || !ecdt_ptr->data.address) {
+		/*
+		 * Asus X50GL:
+		 * https://bugzilla.kernel.org/show_bug.cgi?id=11880
+		 */
+		ret = -ENODEV;
+		goto error;
+	}
+
+	pr_info("EC description table is found, configuring boot EC\n");
+	if (EC_FLAGS_CORRECT_ECDT) {
+		/*
+		 * Asus L4R, Asus M6R
+		 * https://bugzilla.kernel.org/show_bug.cgi?id=9399
+		 * MSI MS-171F
+		 * https://bugzilla.kernel.org/show_bug.cgi?id=12461
+		 */
+		boot_ec->command_addr = ecdt_ptr->data.address;
+		boot_ec->data_addr = ecdt_ptr->control.address;
+	} else {
 		boot_ec->command_addr = ecdt_ptr->control.address;
 		boot_ec->data_addr = ecdt_ptr->data.address;
-		boot_ec->gpe = ecdt_ptr->gpe;
-		boot_ec->handle = ACPI_ROOT_OBJECT;
-		acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id,
-				&boot_ec->handle);
-		/* Don't trust ECDT, which comes from ASUSTek */
-		if (!EC_FLAGS_VALIDATE_ECDT)
-			goto install;
-		saved_ec = kmemdup(boot_ec, sizeof(struct acpi_ec), GFP_KERNEL);
-		if (!saved_ec)
-			return -ENOMEM;
-	/* fall through */
 	}
-
-	if (EC_FLAGS_SKIP_DSDT_SCAN) {
-		kfree(saved_ec);
-		return -ENODEV;
-	}
-
-	/* This workaround is needed only on some broken machines,
-	 * which require early EC, but fail to provide ECDT */
-	pr_debug("Look up EC in DSDT\n");
-	status = acpi_get_devices(ec_device_ids[0].id, ec_parse_device,
-					boot_ec, NULL);
-	/* Check that acpi_get_devices actually find something */
-	if (ACPI_FAILURE(status) || !boot_ec->handle)
-		goto error;
-	if (saved_ec) {
-		/* try to find good ECDT from ASUSTek */
-		if (saved_ec->command_addr != boot_ec->command_addr ||
-		    saved_ec->data_addr != boot_ec->data_addr ||
-		    saved_ec->gpe != boot_ec->gpe ||
-		    saved_ec->handle != boot_ec->handle)
-			pr_info("ASUSTek keeps feeding us with broken "
-			"ECDT tables, which are very hard to workaround. "
-			"Trying to use DSDT EC info instead. Please send "
-			"output of acpidump to linux-acpi@vger.kernel.org\n");
-		kfree(saved_ec);
-		saved_ec = NULL;
-	} else {
-		/* We really need to limit this workaround, the only ASUS,
-		* which needs it, has fake EC._INI method, so use it as flag.
-		* Keep boot_ec struct as it will be needed soon.
-		*/
-		if (!dmi_name_in_vendors("ASUS") ||
-		    !acpi_has_method(boot_ec->handle, "_INI"))
-			return -ENODEV;
-	}
-install:
-	if (!ec_install_handlers(boot_ec)) {
+	boot_ec->gpe = ecdt_ptr->gpe;
+	boot_ec->handle = ACPI_ROOT_OBJECT;
+	ret = ec_install_handlers(boot_ec);
+	if (!ret)
 		first_ec = boot_ec;
-		return 0;
-	}
 error:
-	kfree(boot_ec);
-	kfree(saved_ec);
-	boot_ec = NULL;
-	return -ENODEV;
+	if (ret) {
+		kfree(boot_ec);
+		boot_ec = NULL;
+	}
+	return ret;
 }
 
 static int param_set_event_clearing(const char *val, struct kernel_param *kp)

diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c
new file mode 100644
index 0000000..46f0603
--- /dev/null
+++ b/drivers/acpi/evged.c

@@ -0,0 +1,154 @@
+/*
+ * Generic Event Device for ACPI.
+ *
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Generic Event Device allows platforms to handle interrupts in ACPI
+ * ASL statements. It follows very similar to  _EVT method approach
+ * from GPIO events. All interrupts are listed in _CRS and the handler
+ * is written in _EVT method. Here is an example.
+ *
+ * Device (GED0)
+ * {
+ *
+ *     Name (_HID, "ACPI0013")
+ *     Name (_UID, 0)
+ *     Method (_CRS, 0x0, Serialized)
+ *     {
+ *		Name (RBUF, ResourceTemplate ()
+ *		{
+ *		Interrupt(ResourceConsumer, Edge, ActiveHigh, Shared, , , )
+ *		{123}
+ *		}
+ *     })
+ *
+ *     Method (_EVT, 1) {
+ *             if (Lequal(123, Arg0))
+ *             {
+ *             }
+ *     }
+ * }
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+#define MODULE_NAME	"acpi-ged"
+
+struct acpi_ged_event {
+	struct list_head node;
+	struct device *dev;
+	unsigned int gsi;
+	unsigned int irq;
+	acpi_handle handle;
+};
+
+static irqreturn_t acpi_ged_irq_handler(int irq, void *data)
+{
+	struct acpi_ged_event *event = data;
+	acpi_status acpi_ret;
+
+	acpi_ret = acpi_execute_simple_method(event->handle, NULL, event->gsi);
+	if (ACPI_FAILURE(acpi_ret))
+		dev_err_once(event->dev, "IRQ method execution failed\n");
+
+	return IRQ_HANDLED;
+}
+
+static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares,
+					      void *context)
+{
+	struct acpi_ged_event *event;
+	unsigned int irq;
+	unsigned int gsi;
+	unsigned int irqflags = IRQF_ONESHOT;
+	struct device *dev = context;
+	acpi_handle handle = ACPI_HANDLE(dev);
+	acpi_handle evt_handle;
+	struct resource r;
+	struct acpi_resource_irq *p = &ares->data.irq;
+	struct acpi_resource_extended_irq *pext = &ares->data.extended_irq;
+
+	if (ares->type == ACPI_RESOURCE_TYPE_END_TAG)
+		return AE_OK;
+
+	if (!acpi_dev_resource_interrupt(ares, 0, &r)) {
+		dev_err(dev, "unable to parse IRQ resource\n");
+		return AE_ERROR;
+	}
+	if (ares->type == ACPI_RESOURCE_TYPE_IRQ)
+		gsi = p->interrupts[0];
+	else
+		gsi = pext->interrupts[0];
+
+	irq = r.start;
+
+	if (ACPI_FAILURE(acpi_get_handle(handle, "_EVT", &evt_handle))) {
+		dev_err(dev, "cannot locate _EVT method\n");
+		return AE_ERROR;
+	}
+
+	dev_info(dev, "GED listening GSI %u @ IRQ %u\n", gsi, irq);
+
+	event = devm_kzalloc(dev, sizeof(*event), GFP_KERNEL);
+	if (!event)
+		return AE_ERROR;
+
+	event->gsi = gsi;
+	event->dev = dev;
+	event->irq = irq;
+	event->handle = evt_handle;
+
+	if (r.flags & IORESOURCE_IRQ_SHAREABLE)
+		irqflags |= IRQF_SHARED;
+
+	if (devm_request_threaded_irq(dev, irq, NULL, acpi_ged_irq_handler,
+				      irqflags, "ACPI:Ged", event)) {
+		dev_err(dev, "failed to setup event handler for irq %u\n", irq);
+		return AE_ERROR;
+	}
+
+	return AE_OK;
+}
+
+static int ged_probe(struct platform_device *pdev)
+{
+	acpi_status acpi_ret;
+
+	acpi_ret = acpi_walk_resources(ACPI_HANDLE(&pdev->dev), "_CRS",
+				       acpi_ged_request_interrupt, &pdev->dev);
+	if (ACPI_FAILURE(acpi_ret)) {
+		dev_err(&pdev->dev, "unable to parse the _CRS record\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct acpi_device_id ged_acpi_ids[] = {
+	{"ACPI0013"},
+	{},
+};
+
+static struct platform_driver ged_driver = {
+	.probe = ged_probe,
+	.driver = {
+		.name = MODULE_NAME,
+		.acpi_match_table = ACPI_PTR(ged_acpi_ids),
+	},
+};
+builtin_platform_driver(ged_driver);

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 7c18847..9bb0773 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h

@@ -20,7 +20,8 @@
 
 #define PREFIX "ACPI: "
 
-void acpi_initrd_initialize_tables(void);
+int early_acpi_osi_init(void);
+int acpi_osi_init(void);
 acpi_status acpi_os_initialize1(void);
 void init_acpi_device_notify(void);
 int acpi_scan_init(void);

diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 72b6e9e..d176e0e 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c

@@ -327,10 +327,18 @@
 
 	/* SRAT: Static Resource Affinity Table */
 	if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
-		acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
-				     acpi_parse_x2apic_affinity, 0);
-		acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
-				     acpi_parse_processor_affinity, 0);
+		struct acpi_subtable_proc srat_proc[2];
+
+		memset(srat_proc, 0, sizeof(srat_proc));
+		srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+		srat_proc[0].handler = acpi_parse_processor_affinity;
+		srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+		srat_proc[1].handler = acpi_parse_x2apic_affinity;
+
+		acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+					sizeof(struct acpi_table_srat),
+					srat_proc, ARRAY_SIZE(srat_proc), 0);
+
 		cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 					    acpi_parse_memory_affinity,
 					    NR_NODE_MEMBLKS);

diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
new file mode 100644
index 0000000..849f9d2
--- /dev/null
+++ b/drivers/acpi/osi.c

@@ -0,0 +1,522 @@
+/*
+ *  osi.c - _OSI implementation
+ *
+ *  Copyright (C) 2016 Intel Corporation
+ *    Author: Lv Zheng <lv.zheng@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+/* Uncomment next line to get verbose printout */
+/* #define DEBUG */
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+
+#include "internal.h"
+
+
+#define OSI_STRING_LENGTH_MAX	64
+#define OSI_STRING_ENTRIES_MAX	16
+
+struct acpi_osi_entry {
+	char string[OSI_STRING_LENGTH_MAX];
+	bool enable;
+};
+
+static struct acpi_osi_config {
+	u8		default_disabling;
+	unsigned int	linux_enable:1;
+	unsigned int	linux_dmi:1;
+	unsigned int	linux_cmdline:1;
+	unsigned int	darwin_enable:1;
+	unsigned int	darwin_dmi:1;
+	unsigned int	darwin_cmdline:1;
+} osi_config;
+
+static struct acpi_osi_config osi_config;
+static struct acpi_osi_entry
+osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = {
+	{"Module Device", true},
+	{"Processor Device", true},
+	{"3.0 _SCP Extensions", true},
+	{"Processor Aggregator Device", true},
+};
+
+static u32 acpi_osi_handler(acpi_string interface, u32 supported)
+{
+	if (!strcmp("Linux", interface)) {
+		pr_notice_once(FW_BUG
+			"BIOS _OSI(Linux) query %s%s\n",
+			osi_config.linux_enable ? "honored" : "ignored",
+			osi_config.linux_cmdline ? " via cmdline" :
+			osi_config.linux_dmi ? " via DMI" : "");
+	}
+	if (!strcmp("Darwin", interface)) {
+		pr_notice_once(
+			"BIOS _OSI(Darwin) query %s%s\n",
+			osi_config.darwin_enable ? "honored" : "ignored",
+			osi_config.darwin_cmdline ? " via cmdline" :
+			osi_config.darwin_dmi ? " via DMI" : "");
+	}
+
+	return supported;
+}
+
+void __init acpi_osi_setup(char *str)
+{
+	struct acpi_osi_entry *osi;
+	bool enable = true;
+	int i;
+
+	if (!acpi_gbl_create_osi_method)
+		return;
+
+	if (str == NULL || *str == '\0') {
+		pr_info("_OSI method disabled\n");
+		acpi_gbl_create_osi_method = FALSE;
+		return;
+	}
+
+	if (*str == '!') {
+		str++;
+		if (*str == '\0') {
+			/* Do not override acpi_osi=!* */
+			if (!osi_config.default_disabling)
+				osi_config.default_disabling =
+					ACPI_DISABLE_ALL_VENDOR_STRINGS;
+			return;
+		} else if (*str == '*') {
+			osi_config.default_disabling = ACPI_DISABLE_ALL_STRINGS;
+			for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+				osi = &osi_setup_entries[i];
+				osi->enable = false;
+			}
+			return;
+		} else if (*str == '!') {
+			osi_config.default_disabling = 0;
+			return;
+		}
+		enable = false;
+	}
+
+	for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+		osi = &osi_setup_entries[i];
+		if (!strcmp(osi->string, str)) {
+			osi->enable = enable;
+			break;
+		} else if (osi->string[0] == '\0') {
+			osi->enable = enable;
+			strncpy(osi->string, str, OSI_STRING_LENGTH_MAX);
+			break;
+		}
+	}
+}
+
+static void __init __acpi_osi_setup_darwin(bool enable)
+{
+	osi_config.darwin_enable = !!enable;
+	if (enable) {
+		acpi_osi_setup("!");
+		acpi_osi_setup("Darwin");
+	} else {
+		acpi_osi_setup("!!");
+		acpi_osi_setup("!Darwin");
+	}
+}
+
+static void __init acpi_osi_setup_darwin(bool enable)
+{
+	/* Override acpi_osi_dmi_blacklisted() */
+	osi_config.darwin_dmi = 0;
+	osi_config.darwin_cmdline = 1;
+	__acpi_osi_setup_darwin(enable);
+}
+
+/*
+ * The story of _OSI(Linux)
+ *
+ * From pre-history through Linux-2.6.22, Linux responded TRUE upon a BIOS
+ * OSI(Linux) query.
+ *
+ * Unfortunately, reference BIOS writers got wind of this and put
+ * OSI(Linux) in their example code, quickly exposing this string as
+ * ill-conceived and opening the door to an un-bounded number of BIOS
+ * incompatibilities.
+ *
+ * For example, OSI(Linux) was used on resume to re-POST a video card on
+ * one system, because Linux at that time could not do a speedy restore in
+ * its native driver. But then upon gaining quick native restore
+ * capability, Linux has no way to tell the BIOS to skip the time-consuming
+ * POST -- putting Linux at a permanent performance disadvantage. On
+ * another system, the BIOS writer used OSI(Linux) to infer native OS
+ * support for IPMI!  On other systems, OSI(Linux) simply got in the way of
+ * Linux claiming to be compatible with other operating systems, exposing
+ * BIOS issues such as skipped device initialization.
+ *
+ * So "Linux" turned out to be a really poor chose of OSI string, and from
+ * Linux-2.6.23 onward we respond FALSE.
+ *
+ * BIOS writers should NOT query _OSI(Linux) on future systems. Linux will
+ * complain on the console when it sees it, and return FALSE. To get Linux
+ * to return TRUE for your system  will require a kernel source update to
+ * add a DMI entry, or boot with "acpi_osi=Linux"
+ */
+static void __init __acpi_osi_setup_linux(bool enable)
+{
+	osi_config.linux_enable = !!enable;
+	if (enable)
+		acpi_osi_setup("Linux");
+	else
+		acpi_osi_setup("!Linux");
+}
+
+static void __init acpi_osi_setup_linux(bool enable)
+{
+	/* Override acpi_osi_dmi_blacklisted() */
+	osi_config.linux_dmi = 0;
+	osi_config.linux_cmdline = 1;
+	__acpi_osi_setup_linux(enable);
+}
+
+/*
+ * Modify the list of "OS Interfaces" reported to BIOS via _OSI
+ *
+ * empty string disables _OSI
+ * string starting with '!' disables that string
+ * otherwise string is added to list, augmenting built-in strings
+ */
+static void __init acpi_osi_setup_late(void)
+{
+	struct acpi_osi_entry *osi;
+	char *str;
+	int i;
+	acpi_status status;
+
+	if (osi_config.default_disabling) {
+		status = acpi_update_interfaces(osi_config.default_disabling);
+		if (ACPI_SUCCESS(status))
+			pr_info("Disabled all _OSI OS vendors%s\n",
+				osi_config.default_disabling ==
+				ACPI_DISABLE_ALL_STRINGS ?
+				" and feature groups" : "");
+	}
+
+	for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+		osi = &osi_setup_entries[i];
+		str = osi->string;
+		if (*str == '\0')
+			break;
+		if (osi->enable) {
+			status = acpi_install_interface(str);
+			if (ACPI_SUCCESS(status))
+				pr_info("Added _OSI(%s)\n", str);
+		} else {
+			status = acpi_remove_interface(str);
+			if (ACPI_SUCCESS(status))
+				pr_info("Deleted _OSI(%s)\n", str);
+		}
+	}
+}
+
+static int __init osi_setup(char *str)
+{
+	if (str && !strcmp("Linux", str))
+		acpi_osi_setup_linux(true);
+	else if (str && !strcmp("!Linux", str))
+		acpi_osi_setup_linux(false);
+	else if (str && !strcmp("Darwin", str))
+		acpi_osi_setup_darwin(true);
+	else if (str && !strcmp("!Darwin", str))
+		acpi_osi_setup_darwin(false);
+	else
+		acpi_osi_setup(str);
+
+	return 1;
+}
+__setup("acpi_osi=", osi_setup);
+
+bool acpi_osi_is_win8(void)
+{
+	return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
+}
+EXPORT_SYMBOL(acpi_osi_is_win8);
+
+static void __init acpi_osi_dmi_darwin(bool enable,
+				       const struct dmi_system_id *d)
+{
+	pr_notice("DMI detected to setup _OSI(\"Darwin\"): %s\n", d->ident);
+	osi_config.darwin_dmi = 1;
+	__acpi_osi_setup_darwin(enable);
+}
+
+void __init acpi_osi_dmi_linux(bool enable, const struct dmi_system_id *d)
+{
+	pr_notice("DMI detected to setup _OSI(\"Linux\"): %s\n", d->ident);
+	osi_config.linux_dmi = 1;
+	__acpi_osi_setup_linux(enable);
+}
+
+static int __init dmi_enable_osi_darwin(const struct dmi_system_id *d)
+{
+	acpi_osi_dmi_darwin(true, d);
+
+	return 0;
+}
+
+static int __init dmi_enable_osi_linux(const struct dmi_system_id *d)
+{
+	acpi_osi_dmi_linux(true, d);
+
+	return 0;
+}
+
+static int __init dmi_disable_osi_vista(const struct dmi_system_id *d)
+{
+	pr_notice("DMI detected: %s\n", d->ident);
+	acpi_osi_setup("!Windows 2006");
+	acpi_osi_setup("!Windows 2006 SP1");
+	acpi_osi_setup("!Windows 2006 SP2");
+
+	return 0;
+}
+
+static int __init dmi_disable_osi_win7(const struct dmi_system_id *d)
+{
+	pr_notice("DMI detected: %s\n", d->ident);
+	acpi_osi_setup("!Windows 2009");
+
+	return 0;
+}
+
+static int __init dmi_disable_osi_win8(const struct dmi_system_id *d)
+{
+	pr_notice("DMI detected: %s\n", d->ident);
+	acpi_osi_setup("!Windows 2012");
+
+	return 0;
+}
+
+/*
+ * Linux default _OSI response behavior is determined by this DMI table.
+ *
+ * Note that _OSI("Linux")/_OSI("Darwin") determined here can be overridden
+ * by acpi_osi=!Linux/acpi_osi=!Darwin command line options.
+ */
+static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Fujitsu Siemens",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
+		},
+	},
+	{
+	/*
+	 * There have a NVIF method in MSI GX723 DSDT need call by Nvidia
+	 * driver (e.g. nouveau) when user press brightness hotkey.
+	 * Currently, nouveau driver didn't do the job and it causes there
+	 * have a infinite while loop in DSDT when user press hotkey.
+	 * We add MSI GX723's dmi information to this table for workaround
+	 * this issue.
+	 * Will remove MSI GX723 from the table after nouveau grows support.
+	 */
+	.callback = dmi_disable_osi_vista,
+	.ident = "MSI GX723",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "GX723"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Sony VGN-NS10J_S",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS10J_S"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Sony VGN-SR290J",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR290J"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "VGN-NS50B_L",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS50B_L"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "VGN-SR19XN",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR19XN"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Toshiba Satellite L355",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+		     DMI_MATCH(DMI_PRODUCT_VERSION, "Satellite L355"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win7,
+	.ident = "ASUS K50IJ",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Toshiba P305D",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_vista,
+	.ident = "Toshiba NB100",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "NB100"),
+		},
+	},
+
+	/*
+	 * The wireless hotkey does not work on those machines when
+	 * returning true for _OSI("Windows 2012")
+	 */
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Dell Inspiron 7737",
+	.matches = {
+		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7737"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Dell Inspiron 7537",
+	.matches = {
+		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7537"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Dell Inspiron 5437",
+	.matches = {
+		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5437"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Dell Inspiron 3437",
+	.matches = {
+		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		    DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 3437"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Dell Vostro 3446",
+	.matches = {
+		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		    DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3446"),
+		},
+	},
+	{
+	.callback = dmi_disable_osi_win8,
+	.ident = "Dell Vostro 3546",
+	.matches = {
+		    DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		    DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3546"),
+		},
+	},
+
+	/*
+	 * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
+	 * Linux ignores it, except for the machines enumerated below.
+	 */
+
+	/*
+	 * Without this this EEEpc exports a non working WMI interface, with
+	 * this it exports a working "good old" eeepc_laptop interface, fixing
+	 * both brightness control, and rfkill not working.
+	 */
+	{
+	.callback = dmi_enable_osi_linux,
+	.ident = "Asus EEE PC 1015PX",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer INC."),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"),
+		},
+	},
+
+	/*
+	 * Enable _OSI("Darwin") for all apple platforms.
+	 */
+	{
+	.callback = dmi_enable_osi_darwin,
+	.ident = "Apple hardware",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+		},
+	},
+	{
+	.callback = dmi_enable_osi_darwin,
+	.ident = "Apple hardware",
+	.matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."),
+		},
+	},
+	{}
+};
+
+static __init void acpi_osi_dmi_blacklisted(void)
+{
+	dmi_check_system(acpi_osi_dmi_table);
+}
+
+int __init early_acpi_osi_init(void)
+{
+	acpi_osi_dmi_blacklisted();
+
+	return 0;
+}
+
+int __init acpi_osi_init(void)
+{
+	acpi_install_interface_handler(acpi_osi_handler);
+	acpi_osi_setup_late();
+
+	return 0;
+}

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 814d5f8..b108f13 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c

@@ -56,10 +56,6 @@
 	struct work_struct work;
 };
 
-#ifdef CONFIG_ACPI_CUSTOM_DSDT
-#include CONFIG_ACPI_CUSTOM_DSDT_FILE
-#endif
-
 #ifdef ENABLE_DEBUGGER
 #include <linux/kdb.h>
 
@@ -96,72 +92,6 @@
 static LIST_HEAD(acpi_ioremaps);
 static DEFINE_MUTEX(acpi_ioremap_lock);
 
-static void __init acpi_osi_setup_late(void);
-
-/*
- * The story of _OSI(Linux)
- *
- * From pre-history through Linux-2.6.22,
- * Linux responded TRUE upon a BIOS OSI(Linux) query.
- *
- * Unfortunately, reference BIOS writers got wind of this
- * and put OSI(Linux) in their example code, quickly exposing
- * this string as ill-conceived and opening the door to
- * an un-bounded number of BIOS incompatibilities.
- *
- * For example, OSI(Linux) was used on resume to re-POST a
- * video card on one system, because Linux at that time
- * could not do a speedy restore in its native driver.
- * But then upon gaining quick native restore capability,
- * Linux has no way to tell the BIOS to skip the time-consuming
- * POST -- putting Linux at a permanent performance disadvantage.
- * On another system, the BIOS writer used OSI(Linux)
- * to infer native OS support for IPMI!  On other systems,
- * OSI(Linux) simply got in the way of Linux claiming to
- * be compatible with other operating systems, exposing
- * BIOS issues such as skipped device initialization.
- *
- * So "Linux" turned out to be a really poor chose of
- * OSI string, and from Linux-2.6.23 onward we respond FALSE.
- *
- * BIOS writers should NOT query _OSI(Linux) on future systems.
- * Linux will complain on the console when it sees it, and return FALSE.
- * To get Linux to return TRUE for your system  will require
- * a kernel source update to add a DMI entry,
- * or boot with "acpi_osi=Linux"
- */
-
-static struct osi_linux {
-	unsigned int	enable:1;
-	unsigned int	dmi:1;
-	unsigned int	cmdline:1;
-	unsigned int	default_disabling:1;
-} osi_linux = {0, 0, 0, 0};
-
-static u32 acpi_osi_handler(acpi_string interface, u32 supported)
-{
-	if (!strcmp("Linux", interface)) {
-
-		printk_once(KERN_NOTICE FW_BUG PREFIX
-			"BIOS _OSI(Linux) query %s%s\n",
-			osi_linux.enable ? "honored" : "ignored",
-			osi_linux.cmdline ? " via cmdline" :
-			osi_linux.dmi ? " via DMI" : "");
-	}
-
-	if (!strcmp("Darwin", interface)) {
-		/*
-		 * Apple firmware will behave poorly if it receives positive
-		 * answers to "Darwin" and any other OS. Respond positively
-		 * to Darwin and then disable all other vendor strings.
-		 */
-		acpi_update_interfaces(ACPI_DISABLE_ALL_VENDOR_STRINGS);
-		supported = ACPI_UINT32_MAX;
-	}
-
-	return supported;
-}
-
 static void __init acpi_request_region (struct acpi_generic_address *gas,
 	unsigned int length, char *desc)
 {
@@ -582,7 +512,7 @@
 
 acpi_status
 acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
-			    char **new_val)
+			    acpi_string *new_val)
 {
 	if (!init_val || !new_val)
 		return AE_BAD_PARAMETER;
@@ -602,280 +532,6 @@
 	return AE_OK;
 }
 
-static void acpi_table_taint(struct acpi_table_header *table)
-{
-	pr_warn(PREFIX
-		"Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
-		table->signature, table->oem_table_id);
-	add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
-}
-
-#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
-#include <linux/earlycpio.h>
-#include <linux/memblock.h>
-
-static u64 acpi_tables_addr;
-static int all_tables_size;
-
-/* Copied from acpica/tbutils.c:acpi_tb_checksum() */
-static u8 __init acpi_table_checksum(u8 *buffer, u32 length)
-{
-	u8 sum = 0;
-	u8 *end = buffer + length;
-
-	while (buffer < end)
-		sum = (u8) (sum + *(buffer++));
-	return sum;
-}
-
-/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
-static const char * const table_sigs[] = {
-	ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ,
-	ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT,
-	ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF,
-	ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET,
-	ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI,
-	ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA,
-	ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
-	ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
-	ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL };
-
-#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
-
-#define ACPI_OVERRIDE_TABLES 64
-static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES];
-static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES);
-
-#define MAP_CHUNK_SIZE   (NR_FIX_BTMAPS << PAGE_SHIFT)
-
-void __init acpi_initrd_override(void *data, size_t size)
-{
-	int sig, no, table_nr = 0, total_offset = 0;
-	long offset = 0;
-	struct acpi_table_header *table;
-	char cpio_path[32] = "kernel/firmware/acpi/";
-	struct cpio_data file;
-
-	if (data == NULL || size == 0)
-		return;
-
-	for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) {
-		file = find_cpio_data(cpio_path, data, size, &offset);
-		if (!file.data)
-			break;
-
-		data += offset;
-		size -= offset;
-
-		if (file.size < sizeof(struct acpi_table_header)) {
-			pr_err("ACPI OVERRIDE: Table smaller than ACPI header [%s%s]\n",
-				cpio_path, file.name);
-			continue;
-		}
-
-		table = file.data;
-
-		for (sig = 0; table_sigs[sig]; sig++)
-			if (!memcmp(table->signature, table_sigs[sig], 4))
-				break;
-
-		if (!table_sigs[sig]) {
-			pr_err("ACPI OVERRIDE: Unknown signature [%s%s]\n",
-				cpio_path, file.name);
-			continue;
-		}
-		if (file.size != table->length) {
-			pr_err("ACPI OVERRIDE: File length does not match table length [%s%s]\n",
-				cpio_path, file.name);
-			continue;
-		}
-		if (acpi_table_checksum(file.data, table->length)) {
-			pr_err("ACPI OVERRIDE: Bad table checksum [%s%s]\n",
-				cpio_path, file.name);
-			continue;
-		}
-
-		pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n",
-			table->signature, cpio_path, file.name, table->length);
-
-		all_tables_size += table->length;
-		acpi_initrd_files[table_nr].data = file.data;
-		acpi_initrd_files[table_nr].size = file.size;
-		table_nr++;
-	}
-	if (table_nr == 0)
-		return;
-
-	acpi_tables_addr =
-		memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
-				       all_tables_size, PAGE_SIZE);
-	if (!acpi_tables_addr) {
-		WARN_ON(1);
-		return;
-	}
-	/*
-	 * Only calling e820_add_reserve does not work and the
-	 * tables are invalid (memory got used) later.
-	 * memblock_reserve works as expected and the tables won't get modified.
-	 * But it's not enough on X86 because ioremap will
-	 * complain later (used by acpi_os_map_memory) that the pages
-	 * that should get mapped are not marked "reserved".
-	 * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area)
-	 * works fine.
-	 */
-	memblock_reserve(acpi_tables_addr, all_tables_size);
-	arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
-
-	/*
-	 * early_ioremap only can remap 256k one time. If we map all
-	 * tables one time, we will hit the limit. Need to map chunks
-	 * one by one during copying the same as that in relocate_initrd().
-	 */
-	for (no = 0; no < table_nr; no++) {
-		unsigned char *src_p = acpi_initrd_files[no].data;
-		phys_addr_t size = acpi_initrd_files[no].size;
-		phys_addr_t dest_addr = acpi_tables_addr + total_offset;
-		phys_addr_t slop, clen;
-		char *dest_p;
-
-		total_offset += size;
-
-		while (size) {
-			slop = dest_addr & ~PAGE_MASK;
-			clen = size;
-			if (clen > MAP_CHUNK_SIZE - slop)
-				clen = MAP_CHUNK_SIZE - slop;
-			dest_p = early_ioremap(dest_addr & PAGE_MASK,
-						 clen + slop);
-			memcpy(dest_p + slop, src_p, clen);
-			early_iounmap(dest_p, clen + slop);
-			src_p += clen;
-			dest_addr += clen;
-			size -= clen;
-		}
-	}
-}
-
-acpi_status
-acpi_os_physical_table_override(struct acpi_table_header *existing_table,
-				acpi_physical_address *address, u32 *length)
-{
-	int table_offset = 0;
-	int table_index = 0;
-	struct acpi_table_header *table;
-	u32 table_length;
-
-	*length = 0;
-	*address = 0;
-	if (!acpi_tables_addr)
-		return AE_OK;
-
-	while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
-		table = acpi_os_map_memory(acpi_tables_addr + table_offset,
-					   ACPI_HEADER_SIZE);
-		if (table_offset + table->length > all_tables_size) {
-			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
-			WARN_ON(1);
-			return AE_OK;
-		}
-
-		table_length = table->length;
-
-		/* Only override tables matched */
-		if (test_bit(table_index, acpi_initrd_installed) ||
-		    memcmp(existing_table->signature, table->signature, 4) ||
-		    memcmp(table->oem_table_id, existing_table->oem_table_id,
-			   ACPI_OEM_TABLE_ID_SIZE)) {
-			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
-			goto next_table;
-		}
-
-		*length = table_length;
-		*address = acpi_tables_addr + table_offset;
-		acpi_table_taint(existing_table);
-		acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
-		set_bit(table_index, acpi_initrd_installed);
-		break;
-
-next_table:
-		table_offset += table_length;
-		table_index++;
-	}
-	return AE_OK;
-}
-
-void __init acpi_initrd_initialize_tables(void)
-{
-	int table_offset = 0;
-	int table_index = 0;
-	u32 table_length;
-	struct acpi_table_header *table;
-
-	if (!acpi_tables_addr)
-		return;
-
-	while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
-		table = acpi_os_map_memory(acpi_tables_addr + table_offset,
-					   ACPI_HEADER_SIZE);
-		if (table_offset + table->length > all_tables_size) {
-			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
-			WARN_ON(1);
-			return;
-		}
-
-		table_length = table->length;
-
-		/* Skip RSDT/XSDT which should only be used for override */
-		if (test_bit(table_index, acpi_initrd_installed) ||
-		    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) ||
-		    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) {
-			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
-			goto next_table;
-		}
-
-		acpi_table_taint(table);
-		acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
-		acpi_install_table(acpi_tables_addr + table_offset, TRUE);
-		set_bit(table_index, acpi_initrd_installed);
-next_table:
-		table_offset += table_length;
-		table_index++;
-	}
-}
-#else
-acpi_status
-acpi_os_physical_table_override(struct acpi_table_header *existing_table,
-				acpi_physical_address *address,
-				u32 *table_length)
-{
-	*table_length = 0;
-	*address = 0;
-	return AE_OK;
-}
-
-void __init acpi_initrd_initialize_tables(void)
-{
-}
-#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */
-
-acpi_status
-acpi_os_table_override(struct acpi_table_header *existing_table,
-		       struct acpi_table_header **new_table)
-{
-	if (!existing_table || !new_table)
-		return AE_BAD_PARAMETER;
-
-	*new_table = NULL;
-
-#ifdef CONFIG_ACPI_CUSTOM_DSDT
-	if (strncmp(existing_table->signature, "DSDT", 4) == 0)
-		*new_table = (struct acpi_table_header *)AmlCode;
-#endif
-	if (*new_table != NULL)
-		acpi_table_taint(existing_table);
-	return AE_OK;
-}
-
 static irqreturn_t acpi_irq(int irq, void *dev_id)
 {
 	u32 handled;
@@ -1717,156 +1373,6 @@
 
 __setup("acpi_os_name=", acpi_os_name_setup);
 
-#define	OSI_STRING_LENGTH_MAX 64	/* arbitrary */
-#define	OSI_STRING_ENTRIES_MAX 16	/* arbitrary */
-
-struct osi_setup_entry {
-	char string[OSI_STRING_LENGTH_MAX];
-	bool enable;
-};
-
-static struct osi_setup_entry
-		osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = {
-	{"Module Device", true},
-	{"Processor Device", true},
-	{"3.0 _SCP Extensions", true},
-	{"Processor Aggregator Device", true},
-};
-
-void __init acpi_osi_setup(char *str)
-{
-	struct osi_setup_entry *osi;
-	bool enable = true;
-	int i;
-
-	if (!acpi_gbl_create_osi_method)
-		return;
-
-	if (str == NULL || *str == '\0') {
-		printk(KERN_INFO PREFIX "_OSI method disabled\n");
-		acpi_gbl_create_osi_method = FALSE;
-		return;
-	}
-
-	if (*str == '!') {
-		str++;
-		if (*str == '\0') {
-			osi_linux.default_disabling = 1;
-			return;
-		} else if (*str == '*') {
-			acpi_update_interfaces(ACPI_DISABLE_ALL_STRINGS);
-			for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
-				osi = &osi_setup_entries[i];
-				osi->enable = false;
-			}
-			return;
-		}
-		enable = false;
-	}
-
-	for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
-		osi = &osi_setup_entries[i];
-		if (!strcmp(osi->string, str)) {
-			osi->enable = enable;
-			break;
-		} else if (osi->string[0] == '\0') {
-			osi->enable = enable;
-			strncpy(osi->string, str, OSI_STRING_LENGTH_MAX);
-			break;
-		}
-	}
-}
-
-static void __init set_osi_linux(unsigned int enable)
-{
-	if (osi_linux.enable != enable)
-		osi_linux.enable = enable;
-
-	if (osi_linux.enable)
-		acpi_osi_setup("Linux");
-	else
-		acpi_osi_setup("!Linux");
-
-	return;
-}
-
-static void __init acpi_cmdline_osi_linux(unsigned int enable)
-{
-	osi_linux.cmdline = 1;	/* cmdline set the default and override DMI */
-	osi_linux.dmi = 0;
-	set_osi_linux(enable);
-
-	return;
-}
-
-void __init acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d)
-{
-	printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
-
-	if (enable == -1)
-		return;
-
-	osi_linux.dmi = 1;	/* DMI knows that this box asks OSI(Linux) */
-	set_osi_linux(enable);
-
-	return;
-}
-
-/*
- * Modify the list of "OS Interfaces" reported to BIOS via _OSI
- *
- * empty string disables _OSI
- * string starting with '!' disables that string
- * otherwise string is added to list, augmenting built-in strings
- */
-static void __init acpi_osi_setup_late(void)
-{
-	struct osi_setup_entry *osi;
-	char *str;
-	int i;
-	acpi_status status;
-
-	if (osi_linux.default_disabling) {
-		status = acpi_update_interfaces(ACPI_DISABLE_ALL_VENDOR_STRINGS);
-
-		if (ACPI_SUCCESS(status))
-			printk(KERN_INFO PREFIX "Disabled all _OSI OS vendors\n");
-	}
-
-	for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
-		osi = &osi_setup_entries[i];
-		str = osi->string;
-
-		if (*str == '\0')
-			break;
-		if (osi->enable) {
-			status = acpi_install_interface(str);
-
-			if (ACPI_SUCCESS(status))
-				printk(KERN_INFO PREFIX "Added _OSI(%s)\n", str);
-		} else {
-			status = acpi_remove_interface(str);
-
-			if (ACPI_SUCCESS(status))
-				printk(KERN_INFO PREFIX "Deleted _OSI(%s)\n", str);
-		}
-	}
-}
-
-static int __init osi_setup(char *str)
-{
-	if (str && !strcmp("Linux", str))
-		acpi_cmdline_osi_linux(1);
-	else if (str && !strcmp("!Linux", str))
-		acpi_cmdline_osi_linux(0);
-	else
-		acpi_osi_setup(str);
-
-	return 1;
-}
-
-__setup("acpi_osi=", osi_setup);
-
 /*
  * Disable the auto-serialization of named objects creation methods.
  *
@@ -1986,12 +1492,6 @@
 }
 EXPORT_SYMBOL(acpi_resources_are_enforced);
 
-bool acpi_osi_is_win8(void)
-{
-	return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
-}
-EXPORT_SYMBOL(acpi_osi_is_win8);
-
 /*
  * Deallocate the memory for a spinlock.
  */
@@ -2157,8 +1657,7 @@
 	BUG_ON(!kacpid_wq);
 	BUG_ON(!kacpi_notify_wq);
 	BUG_ON(!kacpi_hotplug_wq);
-	acpi_install_interface_handler(acpi_osi_handler);
-	acpi_osi_setup_late();
+	acpi_osi_init();
 	return AE_OK;
 }
 

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index ededa90..8fc7323 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c

@@ -36,6 +36,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/irq.h>
 
 #include "internal.h"
 
@@ -437,17 +438,15 @@
  * enabled system.
  */
 
-#define ACPI_MAX_IRQS		256
-#define ACPI_MAX_ISA_IRQ	16
+#define ACPI_MAX_ISA_IRQS	16
 
-#define PIRQ_PENALTY_PCI_AVAILABLE	(0)
 #define PIRQ_PENALTY_PCI_POSSIBLE	(16*16)
 #define PIRQ_PENALTY_PCI_USING		(16*16*16)
 #define PIRQ_PENALTY_ISA_TYPICAL	(16*16*16*16)
 #define PIRQ_PENALTY_ISA_USED		(16*16*16*16*16)
 #define PIRQ_PENALTY_ISA_ALWAYS		(16*16*16*16*16*16)
 
-static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
+static int acpi_isa_irq_penalty[ACPI_MAX_ISA_IRQS] = {
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ0 timer */
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ1 keyboard */
 	PIRQ_PENALTY_ISA_ALWAYS,	/* IRQ2 cascade */
@@ -457,9 +456,9 @@
 	PIRQ_PENALTY_ISA_TYPICAL,	/* IRQ6 */
 	PIRQ_PENALTY_ISA_TYPICAL,	/* IRQ7 parallel, spurious */
 	PIRQ_PENALTY_ISA_TYPICAL,	/* IRQ8 rtc, sometimes */
-	PIRQ_PENALTY_PCI_AVAILABLE,	/* IRQ9  PCI, often acpi */
-	PIRQ_PENALTY_PCI_AVAILABLE,	/* IRQ10 PCI */
-	PIRQ_PENALTY_PCI_AVAILABLE,	/* IRQ11 PCI */
+	0,				/* IRQ9  PCI, often acpi */
+	0,				/* IRQ10 PCI */
+	0,				/* IRQ11 PCI */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ12 mouse */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ13 fpe, sometimes */
 	PIRQ_PENALTY_ISA_USED,		/* IRQ14 ide0 */
@@ -467,39 +466,58 @@
 	/* >IRQ15 */
 };
 
-int __init acpi_irq_penalty_init(void)
+static int acpi_irq_pci_sharing_penalty(int irq)
 {
 	struct acpi_pci_link *link;
-	int i;
+	int penalty = 0;
 
-	/*
-	 * Update penalties to facilitate IRQ balancing.
-	 */
 	list_for_each_entry(link, &acpi_link_list, list) {
-
 		/*
-		 * reflect the possible and active irqs in the penalty table --
-		 * useful for breaking ties.
+		 * If a link is active, penalize its IRQ heavily
+		 * so we try to choose a different IRQ.
 		 */
-		if (link->irq.possible_count) {
-			int penalty =
-			    PIRQ_PENALTY_PCI_POSSIBLE /
-			    link->irq.possible_count;
+		if (link->irq.active && link->irq.active == irq)
+			penalty += PIRQ_PENALTY_PCI_USING;
+		else {
+			int i;
 
-			for (i = 0; i < link->irq.possible_count; i++) {
-				if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
-					acpi_irq_penalty[link->irq.
-							 possible[i]] +=
-					    penalty;
-			}
-
-		} else if (link->irq.active) {
-			acpi_irq_penalty[link->irq.active] +=
-			    PIRQ_PENALTY_PCI_POSSIBLE;
+			/*
+			 * If a link is inactive, penalize the IRQs it
+			 * might use, but not as severely.
+			 */
+			for (i = 0; i < link->irq.possible_count; i++)
+				if (link->irq.possible[i] == irq)
+					penalty += PIRQ_PENALTY_PCI_POSSIBLE /
+						link->irq.possible_count;
 		}
 	}
 
-	return 0;
+	return penalty;
+}
+
+static int acpi_irq_get_penalty(int irq)
+{
+	int penalty = 0;
+
+	if (irq < ACPI_MAX_ISA_IRQS)
+		penalty += acpi_isa_irq_penalty[irq];
+
+	/*
+	* Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
+	* with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
+	* use for PCI IRQs.
+	*/
+	if (irq == acpi_gbl_FADT.sci_interrupt) {
+		u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK;
+
+		if (type != IRQ_TYPE_LEVEL_LOW)
+			penalty += PIRQ_PENALTY_ISA_ALWAYS;
+		else
+			penalty += PIRQ_PENALTY_PCI_USING;
+	}
+
+	penalty += acpi_irq_pci_sharing_penalty(irq);
+	return penalty;
 }
 
 static int acpi_irq_balance = -1;	/* 0: static, 1: balance */
@@ -547,12 +565,12 @@
 		 * the use of IRQs 9, 10, 11, and >15.
 		 */
 		for (i = (link->irq.possible_count - 1); i >= 0; i--) {
-			if (acpi_irq_penalty[irq] >
-			    acpi_irq_penalty[link->irq.possible[i]])
+			if (acpi_irq_get_penalty(irq) >
+			    acpi_irq_get_penalty(link->irq.possible[i]))
 				irq = link->irq.possible[i];
 		}
 	}
-	if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
+	if (acpi_irq_get_penalty(irq) >= PIRQ_PENALTY_ISA_ALWAYS) {
 		printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
 			    "Try pci=noacpi or acpi=off\n",
 			    acpi_device_name(link->device),
@@ -568,7 +586,6 @@
 			    acpi_device_bid(link->device));
 		return -ENODEV;
 	} else {
-		acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING;
 		printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
 		       acpi_device_name(link->device),
 		       acpi_device_bid(link->device), link->irq.active);
@@ -778,7 +795,7 @@
 }
 
 /*
- * modify acpi_irq_penalty[] from cmdline
+ * modify acpi_isa_irq_penalty[] from cmdline
  */
 static int __init acpi_irq_penalty_update(char *str, int used)
 {
@@ -787,23 +804,24 @@
 	for (i = 0; i < 16; i++) {
 		int retval;
 		int irq;
+		int new_penalty;
 
 		retval = get_option(&str, &irq);
 
 		if (!retval)
 			break;	/* no number found */
 
-		if (irq < 0)
-			continue;
-
-		if (irq >= ARRAY_SIZE(acpi_irq_penalty))
+		/* see if this is a ISA IRQ */
+		if ((irq < 0) || (irq >= ACPI_MAX_ISA_IRQS))
 			continue;
 
 		if (used)
-			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
+			new_penalty = acpi_irq_get_penalty(irq) +
+					PIRQ_PENALTY_ISA_USED;
 		else
-			acpi_irq_penalty[irq] = PIRQ_PENALTY_PCI_AVAILABLE;
+			new_penalty = 0;
 
+		acpi_isa_irq_penalty[irq] = new_penalty;
 		if (retval != 2)	/* no next number */
 			break;
 	}
@@ -819,34 +837,15 @@
  */
 void acpi_penalize_isa_irq(int irq, int active)
 {
-	if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
-		if (active)
-			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
-		else
-			acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
-	}
+	if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
+		acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
+			active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING;
 }
 
 bool acpi_isa_irq_available(int irq)
 {
-	return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
-			    acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
-}
-
-/*
- * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with
- * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for
- * PCI IRQs.
- */
-void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
-{
-	if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
-		if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
-		    polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
-			acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS;
-		else
-			acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
-	}
+	return irq >= 0 && (irq >= ARRAY_SIZE(acpi_isa_irq_penalty) ||
+		    acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
 }
 
 /*

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2a8b596..7a2e4d4 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c

@@ -26,6 +26,11 @@
 #include "internal.h"
 #include "sleep.h"
 
+/*
+ * Some HW-full platforms do not have _S5, so they may need
+ * to leverage efi power off for a shutdown.
+ */
+bool acpi_no_s5;
 static u8 sleep_states[ACPI_S_STATE_COUNT];
 
 static void acpi_sleep_tts_switch(u32 acpi_state)
@@ -882,6 +887,8 @@
 		sleep_states[ACPI_STATE_S5] = 1;
 		pm_power_off_prepare = acpi_power_off_prepare;
 		pm_power_off = acpi_power_off;
+	} else {
+		acpi_no_s5 = true;
 	}
 
 	supported[0] = 0;

diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 0243d37..4b3a9e2 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c

@@ -555,23 +555,22 @@
 static int get_status(u32 index, acpi_event_status *status,
 		      acpi_handle *handle)
 {
-	int result = 0;
+	int result;
 
 	if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS)
-		goto end;
+		return -EINVAL;
 
 	if (index < num_gpes) {
 		result = acpi_get_gpe_device(index, handle);
 		if (result) {
 			ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND,
 					"Invalid GPE 0x%x", index));
-			goto end;
+			return result;
 		}
 		result = acpi_get_gpe_status(*handle, index, status);
 	} else if (index < (num_gpes + ACPI_NUM_FIXED_EVENTS))
 		result = acpi_get_event_status(index - num_gpes, status);
 
-end:
 	return result;
 }
 

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index f49c024..a372f9e 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c

@@ -32,8 +32,14 @@
 #include <linux/errno.h>
 #include <linux/acpi.h>
 #include <linux/bootmem.h>
+#include <linux/earlycpio.h>
+#include <linux/memblock.h>
 #include "internal.h"
 
+#ifdef CONFIG_ACPI_CUSTOM_DSDT
+#include CONFIG_ACPI_CUSTOM_DSDT_FILE
+#endif
+
 #define ACPI_MAX_TABLES		128
 
 static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
@@ -433,6 +439,314 @@
 	return;
 }
 
+static void acpi_table_taint(struct acpi_table_header *table)
+{
+	pr_warn("Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
+		table->signature, table->oem_table_id);
+	add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
+}
+
+#ifdef CONFIG_ACPI_TABLE_UPGRADE
+static u64 acpi_tables_addr;
+static int all_tables_size;
+
+/* Copied from acpica/tbutils.c:acpi_tb_checksum() */
+static u8 __init acpi_table_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+	return sum;
+}
+
+/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
+static const char * const table_sigs[] = {
+	ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ,
+	ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT,
+	ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF,
+	ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET,
+	ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI,
+	ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA,
+	ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
+	ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
+	ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL };
+
+#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
+
+#define NR_ACPI_INITRD_TABLES 64
+static struct cpio_data __initdata acpi_initrd_files[NR_ACPI_INITRD_TABLES];
+static DECLARE_BITMAP(acpi_initrd_installed, NR_ACPI_INITRD_TABLES);
+
+#define MAP_CHUNK_SIZE   (NR_FIX_BTMAPS << PAGE_SHIFT)
+
+static void __init acpi_table_initrd_init(void *data, size_t size)
+{
+	int sig, no, table_nr = 0, total_offset = 0;
+	long offset = 0;
+	struct acpi_table_header *table;
+	char cpio_path[32] = "kernel/firmware/acpi/";
+	struct cpio_data file;
+
+	if (data == NULL || size == 0)
+		return;
+
+	for (no = 0; no < NR_ACPI_INITRD_TABLES; no++) {
+		file = find_cpio_data(cpio_path, data, size, &offset);
+		if (!file.data)
+			break;
+
+		data += offset;
+		size -= offset;
+
+		if (file.size < sizeof(struct acpi_table_header)) {
+			pr_err("ACPI OVERRIDE: Table smaller than ACPI header [%s%s]\n",
+				cpio_path, file.name);
+			continue;
+		}
+
+		table = file.data;
+
+		for (sig = 0; table_sigs[sig]; sig++)
+			if (!memcmp(table->signature, table_sigs[sig], 4))
+				break;
+
+		if (!table_sigs[sig]) {
+			pr_err("ACPI OVERRIDE: Unknown signature [%s%s]\n",
+				cpio_path, file.name);
+			continue;
+		}
+		if (file.size != table->length) {
+			pr_err("ACPI OVERRIDE: File length does not match table length [%s%s]\n",
+				cpio_path, file.name);
+			continue;
+		}
+		if (acpi_table_checksum(file.data, table->length)) {
+			pr_err("ACPI OVERRIDE: Bad table checksum [%s%s]\n",
+				cpio_path, file.name);
+			continue;
+		}
+
+		pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n",
+			table->signature, cpio_path, file.name, table->length);
+
+		all_tables_size += table->length;
+		acpi_initrd_files[table_nr].data = file.data;
+		acpi_initrd_files[table_nr].size = file.size;
+		table_nr++;
+	}
+	if (table_nr == 0)
+		return;
+
+	acpi_tables_addr =
+		memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
+				       all_tables_size, PAGE_SIZE);
+	if (!acpi_tables_addr) {
+		WARN_ON(1);
+		return;
+	}
+	/*
+	 * Only calling e820_add_reserve does not work and the
+	 * tables are invalid (memory got used) later.
+	 * memblock_reserve works as expected and the tables won't get modified.
+	 * But it's not enough on X86 because ioremap will
+	 * complain later (used by acpi_os_map_memory) that the pages
+	 * that should get mapped are not marked "reserved".
+	 * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area)
+	 * works fine.
+	 */
+	memblock_reserve(acpi_tables_addr, all_tables_size);
+	arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
+
+	/*
+	 * early_ioremap only can remap 256k one time. If we map all
+	 * tables one time, we will hit the limit. Need to map chunks
+	 * one by one during copying the same as that in relocate_initrd().
+	 */
+	for (no = 0; no < table_nr; no++) {
+		unsigned char *src_p = acpi_initrd_files[no].data;
+		phys_addr_t size = acpi_initrd_files[no].size;
+		phys_addr_t dest_addr = acpi_tables_addr + total_offset;
+		phys_addr_t slop, clen;
+		char *dest_p;
+
+		total_offset += size;
+
+		while (size) {
+			slop = dest_addr & ~PAGE_MASK;
+			clen = size;
+			if (clen > MAP_CHUNK_SIZE - slop)
+				clen = MAP_CHUNK_SIZE - slop;
+			dest_p = early_ioremap(dest_addr & PAGE_MASK,
+						 clen + slop);
+			memcpy(dest_p + slop, src_p, clen);
+			early_iounmap(dest_p, clen + slop);
+			src_p += clen;
+			dest_addr += clen;
+			size -= clen;
+		}
+	}
+}
+
+static acpi_status
+acpi_table_initrd_override(struct acpi_table_header *existing_table,
+			   acpi_physical_address *address, u32 *length)
+{
+	int table_offset = 0;
+	int table_index = 0;
+	struct acpi_table_header *table;
+	u32 table_length;
+
+	*length = 0;
+	*address = 0;
+	if (!acpi_tables_addr)
+		return AE_OK;
+
+	while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
+		table = acpi_os_map_memory(acpi_tables_addr + table_offset,
+					   ACPI_HEADER_SIZE);
+		if (table_offset + table->length > all_tables_size) {
+			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+			WARN_ON(1);
+			return AE_OK;
+		}
+
+		table_length = table->length;
+
+		/* Only override tables matched */
+		if (memcmp(existing_table->signature, table->signature, 4) ||
+		    memcmp(table->oem_id, existing_table->oem_id,
+			   ACPI_OEM_ID_SIZE) ||
+		    memcmp(table->oem_table_id, existing_table->oem_table_id,
+			   ACPI_OEM_TABLE_ID_SIZE)) {
+			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+			goto next_table;
+		}
+		/*
+		 * Mark the table to avoid being used in
+		 * acpi_table_initrd_scan() and check the revision.
+		 */
+		if (test_and_set_bit(table_index, acpi_initrd_installed) ||
+		    existing_table->oem_revision >= table->oem_revision) {
+			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+			goto next_table;
+		}
+
+		*length = table_length;
+		*address = acpi_tables_addr + table_offset;
+		pr_info("Table Upgrade: override [%4.4s-%6.6s-%8.8s]\n",
+			table->signature, table->oem_id,
+			table->oem_table_id);
+		acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+		break;
+
+next_table:
+		table_offset += table_length;
+		table_index++;
+	}
+	return AE_OK;
+}
+
+static void __init acpi_table_initrd_scan(void)
+{
+	int table_offset = 0;
+	int table_index = 0;
+	u32 table_length;
+	struct acpi_table_header *table;
+
+	if (!acpi_tables_addr)
+		return;
+
+	while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
+		table = acpi_os_map_memory(acpi_tables_addr + table_offset,
+					   ACPI_HEADER_SIZE);
+		if (table_offset + table->length > all_tables_size) {
+			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+			WARN_ON(1);
+			return;
+		}
+
+		table_length = table->length;
+
+		/* Skip RSDT/XSDT which should only be used for override */
+		if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) ||
+		    ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) {
+			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+			goto next_table;
+		}
+		/*
+		 * Mark the table to avoid being used in
+		 * acpi_table_initrd_override(). Though this is not possible
+		 * because override is disabled in acpi_install_table().
+		 */
+		if (test_and_set_bit(table_index, acpi_initrd_installed)) {
+			acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+			goto next_table;
+		}
+
+		pr_info("Table Upgrade: install [%4.4s-%6.6s-%8.8s]\n",
+			table->signature, table->oem_id,
+			table->oem_table_id);
+		acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+		acpi_install_table(acpi_tables_addr + table_offset, TRUE);
+next_table:
+		table_offset += table_length;
+		table_index++;
+	}
+}
+#else
+static void __init acpi_table_initrd_init(void *data, size_t size)
+{
+}
+
+static acpi_status
+acpi_table_initrd_override(struct acpi_table_header *existing_table,
+			   acpi_physical_address *address,
+			   u32 *table_length)
+{
+	*table_length = 0;
+	*address = 0;
+	return AE_OK;
+}
+
+static void __init acpi_table_initrd_scan(void)
+{
+}
+#endif /* CONFIG_ACPI_TABLE_UPGRADE */
+
+acpi_status
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+				acpi_physical_address *address,
+				u32 *table_length)
+{
+	return acpi_table_initrd_override(existing_table, address,
+					  table_length);
+}
+
+acpi_status
+acpi_os_table_override(struct acpi_table_header *existing_table,
+		       struct acpi_table_header **new_table)
+{
+	if (!existing_table || !new_table)
+		return AE_BAD_PARAMETER;
+
+	*new_table = NULL;
+
+#ifdef CONFIG_ACPI_CUSTOM_DSDT
+	if (strncmp(existing_table->signature, "DSDT", 4) == 0)
+		*new_table = (struct acpi_table_header *)AmlCode;
+#endif
+	if (*new_table != NULL)
+		acpi_table_taint(existing_table);
+	return AE_OK;
+}
+
+void __init early_acpi_table_init(void *data, size_t size)
+{
+	acpi_table_initrd_init(data, size);
+}
+
 /*
  * acpi_table_init()
  *
@@ -457,7 +771,7 @@
 	status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
 	if (ACPI_FAILURE(status))
 		return -EINVAL;
-	acpi_initrd_initialize_tables();
+	acpi_table_initrd_scan();
 
 	check_multiple_madt();
 	return 0;

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 050673f..ac832bf 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c

@@ -707,7 +707,7 @@
 EXPORT_SYMBOL(acpi_check_dsm);
 
 /**
- * acpi_dev_present - Detect presence of a given ACPI device in the system.
+ * acpi_dev_found - Detect presence of a given ACPI device in the namespace.
  * @hid: Hardware ID of the device.
  *
  * Return %true if the device was present at the moment of invocation.
@@ -719,7 +719,7 @@
  * instead). Calling from module_init() is fine (which is synonymous
  * with device_initcall()).
  */
-bool acpi_dev_present(const char *hid)
+bool acpi_dev_found(const char *hid)
 {
 	struct acpi_device_bus_id *acpi_device_bus_id;
 	bool found = false;
@@ -734,7 +734,7 @@
 
 	return found;
 }
-EXPORT_SYMBOL(acpi_dev_present);
+EXPORT_SYMBOL(acpi_dev_found);
 
 /*
  * acpi_backlight= handling, this is done here rather then in video_detect.c

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 1316ddd..3d13276 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c

@@ -358,7 +358,7 @@
 	if (!(video_caps & ACPI_VIDEO_BACKLIGHT))
 		return acpi_backlight_vendor;
 
-	if (acpi_osi_is_win8() && backlight_device_registered(BACKLIGHT_RAW))
+	if (acpi_osi_is_win8() && backlight_device_get_by_type(BACKLIGHT_RAW))
 		return acpi_backlight_native;
 
 	return acpi_backlight_video;

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 0e64a1b..3657ac1 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c

@@ -159,7 +159,7 @@
 
 	count = of_count_phandle_with_args(dev->of_node, "clocks",
 					   "#clock-cells");
-	if (count == 0)
+	if (count <= 0)
 		return -ENODEV;
 
 	clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 56705b5..de23b64 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c

@@ -229,17 +229,6 @@
 	return ret;
 }
 
-static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
-}
-
-static int genpd_restore_dev(struct generic_pm_domain *genpd,
-			struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev);
-}
-
 static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 				     unsigned long val, void *ptr)
 {
@@ -372,17 +361,63 @@
 }
 
 /**
- * pm_genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
+ * __genpd_runtime_suspend - walk the hierarchy of ->runtime_suspend() callbacks
+ * @dev: Device to handle.
+ */
+static int __genpd_runtime_suspend(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_suspend;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_suspend;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_suspend;
+	else
+		cb = NULL;
+
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_suspend;
+
+	return cb ? cb(dev) : 0;
+}
+
+/**
+ * __genpd_runtime_resume - walk the hierarchy of ->runtime_resume() callbacks
+ * @dev: Device to handle.
+ */
+static int __genpd_runtime_resume(struct device *dev)
+{
+	int (*cb)(struct device *__dev);
+
+	if (dev->type && dev->type->pm)
+		cb = dev->type->pm->runtime_resume;
+	else if (dev->class && dev->class->pm)
+		cb = dev->class->pm->runtime_resume;
+	else if (dev->bus && dev->bus->pm)
+		cb = dev->bus->pm->runtime_resume;
+	else
+		cb = NULL;
+
+	if (!cb && dev->driver && dev->driver->pm)
+		cb = dev->driver->pm->runtime_resume;
+
+	return cb ? cb(dev) : 0;
+}
+
+/**
+ * genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
  * @dev: Device to suspend.
  *
  * Carry out a runtime suspend of a device under the assumption that its
  * pm_domain field points to the domain member of an object of type
  * struct generic_pm_domain representing a PM domain consisting of I/O devices.
  */
-static int pm_genpd_runtime_suspend(struct device *dev)
+static int genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	bool (*stop_ok)(struct device *__dev);
+	bool (*suspend_ok)(struct device *__dev);
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
 	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
@@ -401,21 +436,21 @@
 	 * runtime PM is disabled. Under these circumstances, we shall skip
 	 * validating/measuring the PM QoS latency.
 	 */
-	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
-	if (runtime_pm && stop_ok && !stop_ok(dev))
+	suspend_ok = genpd->gov ? genpd->gov->suspend_ok : NULL;
+	if (runtime_pm && suspend_ok && !suspend_ok(dev))
 		return -EBUSY;
 
 	/* Measure suspend latency. */
 	if (runtime_pm)
 		time_start = ktime_get();
 
-	ret = genpd_save_dev(genpd, dev);
+	ret = __genpd_runtime_suspend(dev);
 	if (ret)
 		return ret;
 
 	ret = genpd_stop_dev(genpd, dev);
 	if (ret) {
-		genpd_restore_dev(genpd, dev);
+		__genpd_runtime_resume(dev);
 		return ret;
 	}
 
@@ -446,14 +481,14 @@
 }
 
 /**
- * pm_genpd_runtime_resume - Resume a device belonging to I/O PM domain.
+ * genpd_runtime_resume - Resume a device belonging to I/O PM domain.
  * @dev: Device to resume.
  *
  * Carry out a runtime resume of a device under the assumption that its
  * pm_domain field points to the domain member of an object of type
  * struct generic_pm_domain representing a PM domain consisting of I/O devices.
  */
-static int pm_genpd_runtime_resume(struct device *dev)
+static int genpd_runtime_resume(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
@@ -491,7 +526,7 @@
 	if (ret)
 		goto err_poweroff;
 
-	ret = genpd_restore_dev(genpd, dev);
+	ret = __genpd_runtime_resume(dev);
 	if (ret)
 		goto err_stop;
 
@@ -695,15 +730,6 @@
 	 * at this point and a system wakeup event should be reported if it's
 	 * set up to wake up the system from sleep states.
 	 */
-	pm_runtime_get_noresume(dev);
-	if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
-		pm_wakeup_event(dev, 0);
-
-	if (pm_wakeup_pending()) {
-		pm_runtime_put(dev);
-		return -EBUSY;
-	}
-
 	if (resume_needed(dev, genpd))
 		pm_runtime_resume(dev);
 
@@ -716,10 +742,8 @@
 
 	mutex_unlock(&genpd->lock);
 
-	if (genpd->suspend_power_off) {
-		pm_runtime_put_noidle(dev);
+	if (genpd->suspend_power_off)
 		return 0;
-	}
 
 	/*
 	 * The PM domain must be in the GPD_STATE_ACTIVE state at this point,
@@ -741,7 +765,6 @@
 		pm_runtime_enable(dev);
 	}
 
-	pm_runtime_put(dev);
 	return ret;
 }
 
@@ -1427,54 +1450,6 @@
 }
 EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
 
-/* Default device callbacks for generic PM domains. */
-
-/**
- * pm_genpd_default_save_state - Default "save device state" for PM domains.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_save_state(struct device *dev)
-{
-	int (*cb)(struct device *__dev);
-
-	if (dev->type && dev->type->pm)
-		cb = dev->type->pm->runtime_suspend;
-	else if (dev->class && dev->class->pm)
-		cb = dev->class->pm->runtime_suspend;
-	else if (dev->bus && dev->bus->pm)
-		cb = dev->bus->pm->runtime_suspend;
-	else
-		cb = NULL;
-
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = dev->driver->pm->runtime_suspend;
-
-	return cb ? cb(dev) : 0;
-}
-
-/**
- * pm_genpd_default_restore_state - Default PM domains "restore device state".
- * @dev: Device to handle.
- */
-static int pm_genpd_default_restore_state(struct device *dev)
-{
-	int (*cb)(struct device *__dev);
-
-	if (dev->type && dev->type->pm)
-		cb = dev->type->pm->runtime_resume;
-	else if (dev->class && dev->class->pm)
-		cb = dev->class->pm->runtime_resume;
-	else if (dev->bus && dev->bus->pm)
-		cb = dev->bus->pm->runtime_resume;
-	else
-		cb = NULL;
-
-	if (!cb && dev->driver && dev->driver->pm)
-		cb = dev->driver->pm->runtime_resume;
-
-	return cb ? cb(dev) : 0;
-}
-
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -1498,8 +1473,8 @@
 	genpd->device_count = 0;
 	genpd->max_off_time_ns = -1;
 	genpd->max_off_time_changed = true;
-	genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
-	genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
+	genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
+	genpd->domain.ops.runtime_resume = genpd_runtime_resume;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
 	genpd->domain.ops.suspend = pm_genpd_suspend;
 	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
@@ -1520,8 +1495,6 @@
 	genpd->domain.ops.restore_early = pm_genpd_resume_early;
 	genpd->domain.ops.restore = pm_genpd_resume;
 	genpd->domain.ops.complete = pm_genpd_complete;
-	genpd->dev_ops.save_state = pm_genpd_default_save_state;
-	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
 
 	if (genpd->flags & GENPD_FLAG_PM_CLK) {
 		genpd->dev_ops.stop = pm_clk_suspend;

diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 00a5436..2e0fce7 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c

@@ -37,10 +37,10 @@
 }
 
 /**
- * default_stop_ok - Default PM domain governor routine for stopping devices.
+ * default_suspend_ok - Default PM domain governor routine to suspend devices.
  * @dev: Device to check.
  */
-static bool default_stop_ok(struct device *dev)
+static bool default_suspend_ok(struct device *dev)
 {
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
 	unsigned long flags;
@@ -51,13 +51,13 @@
 	spin_lock_irqsave(&dev->power.lock, flags);
 
 	if (!td->constraint_changed) {
-		bool ret = td->cached_stop_ok;
+		bool ret = td->cached_suspend_ok;
 
 		spin_unlock_irqrestore(&dev->power.lock, flags);
 		return ret;
 	}
 	td->constraint_changed = false;
-	td->cached_stop_ok = false;
+	td->cached_suspend_ok = false;
 	td->effective_constraint_ns = -1;
 	constraint_ns = __dev_pm_qos_read_value(dev);
 
@@ -83,13 +83,13 @@
 			return false;
 	}
 	td->effective_constraint_ns = constraint_ns;
-	td->cached_stop_ok = constraint_ns >= 0;
+	td->cached_suspend_ok = constraint_ns >= 0;
 
 	/*
 	 * The children have been suspended already, so we don't need to take
-	 * their stop latencies into account here.
+	 * their suspend latencies into account here.
 	 */
-	return td->cached_stop_ok;
+	return td->cached_suspend_ok;
 }
 
 /**
@@ -150,7 +150,7 @@
 		 */
 		td = &to_gpd_data(pdd)->td;
 		constraint_ns = td->effective_constraint_ns;
-		/* default_stop_ok() need not be called before us. */
+		/* default_suspend_ok() need not be called before us. */
 		if (constraint_ns < 0) {
 			constraint_ns = dev_pm_qos_read_value(pdd->dev);
 			constraint_ns *= NSEC_PER_USEC;
@@ -227,7 +227,7 @@
 }
 
 struct dev_power_governor simple_qos_governor = {
-	.stop_ok = default_stop_ok,
+	.suspend_ok = default_suspend_ok,
 	.power_down_ok = default_power_down_ok,
 };
 
@@ -236,5 +236,5 @@
  */
 struct dev_power_governor pm_domain_always_on_gov = {
 	.power_down_ok = always_on_power_down_ok,
-	.stop_ok = default_stop_ok,
+	.suspend_ok = default_suspend_ok,
 };

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 6e7c3cc..c81667d 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c

@@ -1556,7 +1556,6 @@
 static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int (*callback)(struct device *) = NULL;
-	char *info = NULL;
 	int ret = 0;
 
 	if (dev->power.syscore)
@@ -1579,24 +1578,17 @@
 		goto unlock;
 	}
 
-	if (dev->pm_domain) {
-		info = "preparing power domain ";
+	if (dev->pm_domain)
 		callback = dev->pm_domain->ops.prepare;
-	} else if (dev->type && dev->type->pm) {
-		info = "preparing type ";
+	else if (dev->type && dev->type->pm)
 		callback = dev->type->pm->prepare;
-	} else if (dev->class && dev->class->pm) {
-		info = "preparing class ";
+	else if (dev->class && dev->class->pm)
 		callback = dev->class->pm->prepare;
-	} else if (dev->bus && dev->bus->pm) {
-		info = "preparing bus ";
+	else if (dev->bus && dev->bus->pm)
 		callback = dev->bus->pm->prepare;
-	}
 
-	if (!callback && dev->driver && dev->driver->pm) {
-		info = "preparing driver ";
+	if (!callback && dev->driver && dev->driver->pm)
 		callback = dev->driver->pm->prepare;
-	}
 
 	if (callback)
 		ret = callback(dev);

diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile
index 19837ef..e70ceb4 100644
--- a/drivers/base/power/opp/Makefile
+++ b/drivers/base/power/opp/Makefile

@@ -1,3 +1,4 @@
 ccflags-$(CONFIG_DEBUG_DRIVER)	:= -DDEBUG
 obj-y				+= core.o cpu.o
+obj-$(CONFIG_OF)		+= of.o
 obj-$(CONFIG_DEBUG_FS)		+= debugfs.o

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index d8f4cc2..7c04c87 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c

@@ -18,7 +18,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/device.h>
-#include <linux/of.h>
 #include <linux/export.h>
 #include <linux/regulator/consumer.h>
 
@@ -29,7 +28,7 @@
  * from here, with each opp_table containing the list of opps it supports in
  * various states of availability.
  */
-static LIST_HEAD(opp_tables);
+LIST_HEAD(opp_tables);
 /* Lock to allow exclusive modification to the device and opp lists */
 DEFINE_MUTEX(opp_table_lock);
 
@@ -53,26 +52,6 @@
 	return NULL;
 }
 
-static struct opp_table *_managed_opp(const struct device_node *np)
-{
-	struct opp_table *opp_table;
-
-	list_for_each_entry_rcu(opp_table, &opp_tables, node) {
-		if (opp_table->np == np) {
-			/*
-			 * Multiple devices can point to the same OPP table and
-			 * so will have same node-pointer, np.
-			 *
-			 * But the OPPs will be considered as shared only if the
-			 * OPP table contains a "opp-shared" property.
-			 */
-			return opp_table->shared_opp ? opp_table : NULL;
-		}
-	}
-
-	return NULL;
-}
-
 /**
  * _find_opp_table() - find opp_table struct using device pointer
  * @dev:	device pointer used to lookup OPP table
@@ -757,7 +736,6 @@
 {
 	struct opp_table *opp_table;
 	struct opp_device *opp_dev;
-	struct device_node *np;
 	int ret;
 
 	/* Check for existing table for 'dev' first */
@@ -781,20 +759,7 @@
 		return NULL;
 	}
 
-	/*
-	 * Only required for backward compatibility with v1 bindings, but isn't
-	 * harmful for other cases. And so we do it unconditionally.
-	 */
-	np = of_node_get(dev->of_node);
-	if (np) {
-		u32 val;
-
-		if (!of_property_read_u32(np, "clock-latency", &val))
-			opp_table->clock_latency_ns_max = val;
-		of_property_read_u32(np, "voltage-tolerance",
-				     &opp_table->voltage_tolerance_v1);
-		of_node_put(np);
-	}
+	_of_init_opp_table(opp_table, dev);
 
 	/* Set regulator to a non-NULL error value */
 	opp_table->regulator = ERR_PTR(-ENXIO);
@@ -890,8 +855,8 @@
  * It is assumed that the caller holds required mutex for an RCU updater
  * strategy.
  */
-static void _opp_remove(struct opp_table *opp_table,
-			struct dev_pm_opp *opp, bool notify)
+void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp,
+		 bool notify)
 {
 	/*
 	 * Notify the changes in the availability of the operable
@@ -952,8 +917,8 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
 
-static struct dev_pm_opp *_allocate_opp(struct device *dev,
-					struct opp_table **opp_table)
+struct dev_pm_opp *_allocate_opp(struct device *dev,
+				 struct opp_table **opp_table)
 {
 	struct dev_pm_opp *opp;
 
@@ -989,8 +954,8 @@
 	return true;
 }
 
-static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
-		    struct opp_table *opp_table)
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
+	     struct opp_table *opp_table)
 {
 	struct dev_pm_opp *opp;
 	struct list_head *head = &opp_table->opp_list;
@@ -1066,8 +1031,8 @@
  *		Duplicate OPPs (both freq and volt are same) and !opp->available
  * -ENOMEM	Memory allocation failure
  */
-static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
-		       bool dynamic)
+int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
+		bool dynamic)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *new_opp;
@@ -1112,83 +1077,6 @@
 	return ret;
 }
 
-/* TODO: Support multiple regulators */
-static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
-			      struct opp_table *opp_table)
-{
-	u32 microvolt[3] = {0};
-	u32 val;
-	int count, ret;
-	struct property *prop = NULL;
-	char name[NAME_MAX];
-
-	/* Search for "opp-microvolt-<name>" */
-	if (opp_table->prop_name) {
-		snprintf(name, sizeof(name), "opp-microvolt-%s",
-			 opp_table->prop_name);
-		prop = of_find_property(opp->np, name, NULL);
-	}
-
-	if (!prop) {
-		/* Search for "opp-microvolt" */
-		sprintf(name, "opp-microvolt");
-		prop = of_find_property(opp->np, name, NULL);
-
-		/* Missing property isn't a problem, but an invalid entry is */
-		if (!prop)
-			return 0;
-	}
-
-	count = of_property_count_u32_elems(opp->np, name);
-	if (count < 0) {
-		dev_err(dev, "%s: Invalid %s property (%d)\n",
-			__func__, name, count);
-		return count;
-	}
-
-	/* There can be one or three elements here */
-	if (count != 1 && count != 3) {
-		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
-			__func__, name, count);
-		return -EINVAL;
-	}
-
-	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
-	if (ret) {
-		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
-		return -EINVAL;
-	}
-
-	opp->u_volt = microvolt[0];
-
-	if (count == 1) {
-		opp->u_volt_min = opp->u_volt;
-		opp->u_volt_max = opp->u_volt;
-	} else {
-		opp->u_volt_min = microvolt[1];
-		opp->u_volt_max = microvolt[2];
-	}
-
-	/* Search for "opp-microamp-<name>" */
-	prop = NULL;
-	if (opp_table->prop_name) {
-		snprintf(name, sizeof(name), "opp-microamp-%s",
-			 opp_table->prop_name);
-		prop = of_find_property(opp->np, name, NULL);
-	}
-
-	if (!prop) {
-		/* Search for "opp-microamp" */
-		sprintf(name, "opp-microamp");
-		prop = of_find_property(opp->np, name, NULL);
-	}
-
-	if (prop && !of_property_read_u32(opp->np, name, &val))
-		opp->u_amp = val;
-
-	return 0;
-}
-
 /**
  * dev_pm_opp_set_supported_hw() - Set supported platforms
  * @dev: Device for which supported-hw has to be set.
@@ -1517,144 +1405,6 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
 
-static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
-			      struct device_node *np)
-{
-	unsigned int count = opp_table->supported_hw_count;
-	u32 version;
-	int ret;
-
-	if (!opp_table->supported_hw)
-		return true;
-
-	while (count--) {
-		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
-						 &version);
-		if (ret) {
-			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
-				 __func__, count, ret);
-			return false;
-		}
-
-		/* Both of these are bitwise masks of the versions */
-		if (!(version & opp_table->supported_hw[count]))
-			return false;
-	}
-
-	return true;
-}
-
-/**
- * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
- * @dev:	device for which we do this operation
- * @np:		device node
- *
- * This function adds an opp definition to the opp table and returns status. The
- * opp can be controlled using dev_pm_opp_enable/disable functions and may be
- * removed by dev_pm_opp_remove.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- *
- * Return:
- * 0		On success OR
- *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST	Freq are same and volt are different OR
- *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM	Memory allocation failure
- * -EINVAL	Failed parsing the OPP node
- */
-static int _opp_add_static_v2(struct device *dev, struct device_node *np)
-{
-	struct opp_table *opp_table;
-	struct dev_pm_opp *new_opp;
-	u64 rate;
-	u32 val;
-	int ret;
-
-	/* Hold our table modification lock here */
-	mutex_lock(&opp_table_lock);
-
-	new_opp = _allocate_opp(dev, &opp_table);
-	if (!new_opp) {
-		ret = -ENOMEM;
-		goto unlock;
-	}
-
-	ret = of_property_read_u64(np, "opp-hz", &rate);
-	if (ret < 0) {
-		dev_err(dev, "%s: opp-hz not found\n", __func__);
-		goto free_opp;
-	}
-
-	/* Check if the OPP supports hardware's hierarchy of versions or not */
-	if (!_opp_is_supported(dev, opp_table, np)) {
-		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
-		goto free_opp;
-	}
-
-	/*
-	 * Rate is defined as an unsigned long in clk API, and so casting
-	 * explicitly to its type. Must be fixed once rate is 64 bit
-	 * guaranteed in clk API.
-	 */
-	new_opp->rate = (unsigned long)rate;
-	new_opp->turbo = of_property_read_bool(np, "turbo-mode");
-
-	new_opp->np = np;
-	new_opp->dynamic = false;
-	new_opp->available = true;
-
-	if (!of_property_read_u32(np, "clock-latency-ns", &val))
-		new_opp->clock_latency_ns = val;
-
-	ret = opp_parse_supplies(new_opp, dev, opp_table);
-	if (ret)
-		goto free_opp;
-
-	ret = _opp_add(dev, new_opp, opp_table);
-	if (ret)
-		goto free_opp;
-
-	/* OPP to select on device suspend */
-	if (of_property_read_bool(np, "opp-suspend")) {
-		if (opp_table->suspend_opp) {
-			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
-				 __func__, opp_table->suspend_opp->rate,
-				 new_opp->rate);
-		} else {
-			new_opp->suspend = true;
-			opp_table->suspend_opp = new_opp;
-		}
-	}
-
-	if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
-		opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
-
-	mutex_unlock(&opp_table_lock);
-
-	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
-		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
-		 new_opp->u_volt_min, new_opp->u_volt_max,
-		 new_opp->clock_latency_ns);
-
-	/*
-	 * Notify the changes in the availability of the operable
-	 * frequency/voltage list.
-	 */
-	srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
-	return 0;
-
-free_opp:
-	_opp_remove(opp_table, new_opp, false);
-unlock:
-	mutex_unlock(&opp_table_lock);
-	return ret;
-}
-
 /**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
@@ -1842,21 +1592,11 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
 
-#ifdef CONFIG_OF
-/**
- * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
- *				  entries
- * @dev:	device pointer used to lookup OPP table.
- *
- * Free OPPs created using static entries present in DT.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function indirectly uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
+/*
+ * Free OPPs either created using static entries present in DT or even the
+ * dynamically added entries based on remove_all param.
  */
-void dev_pm_opp_of_remove_table(struct device *dev)
+void _dev_pm_opp_remove_table(struct device *dev, bool remove_all)
 {
 	struct opp_table *opp_table;
 	struct dev_pm_opp *opp, *tmp;
@@ -1881,7 +1621,7 @@
 	if (list_is_singular(&opp_table->dev_list)) {
 		/* Free static OPPs */
 		list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
-			if (!opp->dynamic)
+			if (remove_all || !opp->dynamic)
 				_opp_remove(opp_table, opp, true);
 		}
 	} else {
@@ -1891,160 +1631,22 @@
 unlock:
 	mutex_unlock(&opp_table_lock);
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
-
-/* Returns opp descriptor node for a device, caller must do of_node_put() */
-struct device_node *_of_get_opp_desc_node(struct device *dev)
-{
-	/*
-	 * TODO: Support for multiple OPP tables.
-	 *
-	 * There should be only ONE phandle present in "operating-points-v2"
-	 * property.
-	 */
-
-	return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
-}
-
-/* Initializes OPP tables based on new bindings */
-static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
-{
-	struct device_node *np;
-	struct opp_table *opp_table;
-	int ret = 0, count = 0;
-
-	mutex_lock(&opp_table_lock);
-
-	opp_table = _managed_opp(opp_np);
-	if (opp_table) {
-		/* OPPs are already managed */
-		if (!_add_opp_dev(dev, opp_table))
-			ret = -ENOMEM;
-		mutex_unlock(&opp_table_lock);
-		return ret;
-	}
-	mutex_unlock(&opp_table_lock);
-
-	/* We have opp-table node now, iterate over it and add OPPs */
-	for_each_available_child_of_node(opp_np, np) {
-		count++;
-
-		ret = _opp_add_static_v2(dev, np);
-		if (ret) {
-			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
-				ret);
-			goto free_table;
-		}
-	}
-
-	/* There should be one of more OPP defined */
-	if (WARN_ON(!count))
-		return -ENOENT;
-
-	mutex_lock(&opp_table_lock);
-
-	opp_table = _find_opp_table(dev);
-	if (WARN_ON(IS_ERR(opp_table))) {
-		ret = PTR_ERR(opp_table);
-		mutex_unlock(&opp_table_lock);
-		goto free_table;
-	}
-
-	opp_table->np = opp_np;
-	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
-
-	mutex_unlock(&opp_table_lock);
-
-	return 0;
-
-free_table:
-	dev_pm_opp_of_remove_table(dev);
-
-	return ret;
-}
-
-/* Initializes OPP tables based on old-deprecated bindings */
-static int _of_add_opp_table_v1(struct device *dev)
-{
-	const struct property *prop;
-	const __be32 *val;
-	int nr;
-
-	prop = of_find_property(dev->of_node, "operating-points", NULL);
-	if (!prop)
-		return -ENODEV;
-	if (!prop->value)
-		return -ENODATA;
-
-	/*
-	 * Each OPP is a set of tuples consisting of frequency and
-	 * voltage like <freq-kHz vol-uV>.
-	 */
-	nr = prop->length / sizeof(u32);
-	if (nr % 2) {
-		dev_err(dev, "%s: Invalid OPP table\n", __func__);
-		return -EINVAL;
-	}
-
-	val = prop->value;
-	while (nr) {
-		unsigned long freq = be32_to_cpup(val++) * 1000;
-		unsigned long volt = be32_to_cpup(val++);
-
-		if (_opp_add_v1(dev, freq, volt, false))
-			dev_warn(dev, "%s: Failed to add OPP %ld\n",
-				 __func__, freq);
-		nr -= 2;
-	}
-
-	return 0;
-}
 
 /**
- * dev_pm_opp_of_add_table() - Initialize opp table from device tree
+ * dev_pm_opp_remove_table() - Free all OPPs associated with the device
  * @dev:	device pointer used to lookup OPP table.
  *
- * Register the initial OPP table with the OPP library for given device.
+ * Free both OPPs created using static entries present in DT and the
+ * dynamically added entries.
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function indirectly uses RCU updater strategy with mutex locks
  * to keep the integrity of the internal data structures. Callers should ensure
  * that this function is *NOT* called under RCU protection or in contexts where
  * mutex cannot be locked.
- *
- * Return:
- * 0		On success OR
- *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST	Freq are same and volt are different OR
- *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM	Memory allocation failure
- * -ENODEV	when 'operating-points' property is not found or is invalid data
- *		in device node.
- * -ENODATA	when empty 'operating-points' property is found
- * -EINVAL	when invalid entries are found in opp-v2 table
  */
-int dev_pm_opp_of_add_table(struct device *dev)
+void dev_pm_opp_remove_table(struct device *dev)
 {
-	struct device_node *opp_np;
-	int ret;
-
-	/*
-	 * OPPs have two version of bindings now. The older one is deprecated,
-	 * try for the new binding first.
-	 */
-	opp_np = _of_get_opp_desc_node(dev);
-	if (!opp_np) {
-		/*
-		 * Try old-deprecated bindings for backward compatibility with
-		 * older dtbs.
-		 */
-		return _of_add_opp_table_v1(dev);
-	}
-
-	ret = _of_add_opp_table_v2(dev, opp_np);
-	of_node_put(opp_np);
-
-	return ret;
+	_dev_pm_opp_remove_table(dev, true);
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
-#endif
+EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table);

diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index ba2bdbd..83d6e7b 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c

@@ -18,7 +18,6 @@
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/of.h>
 #include <linux/slab.h>
 
 #include "opp.h"
@@ -119,8 +118,66 @@
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-/* Required only for V1 bindings, as v2 can manage it from DT itself */
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
+{
+	struct device *cpu_dev;
+	int cpu;
+
+	WARN_ON(cpumask_empty(cpumask));
+
+	for_each_cpu(cpu, cpumask) {
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__,
+			       cpu);
+			continue;
+		}
+
+		if (of)
+			dev_pm_opp_of_remove_table(cpu_dev);
+		else
+			dev_pm_opp_remove_table(cpu_dev);
+	}
+}
+
+/**
+ * dev_pm_opp_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used to remove all the OPPs entries associated with
+ * the cpus in @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
+{
+	_dev_pm_opp_cpumask_remove_table(cpumask, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
+
+/**
+ * dev_pm_opp_set_sharing_cpus() - Mark OPP table as shared by few CPUs
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask of the CPUs which share the OPP table with @cpu_dev
+ *
+ * This marks OPP table of the @cpu_dev as shared by the CPUs present in
+ * @cpumask.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
+				const struct cpumask *cpumask)
 {
 	struct opp_device *opp_dev;
 	struct opp_table *opp_table;
@@ -131,7 +188,7 @@
 
 	opp_table = _find_opp_table(cpu_dev);
 	if (IS_ERR(opp_table)) {
-		ret = -EINVAL;
+		ret = PTR_ERR(opp_table);
 		goto unlock;
 	}
 
@@ -152,6 +209,9 @@
 				__func__, cpu);
 			continue;
 		}
+
+		/* Mark opp-table as multiple CPUs are sharing it now */
+		opp_table->shared_opp = true;
 	}
 unlock:
 	mutex_unlock(&opp_table_lock);
@@ -160,112 +220,47 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
 
-#ifdef CONFIG_OF
-void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
-{
-	struct device *cpu_dev;
-	int cpu;
-
-	WARN_ON(cpumask_empty(cpumask));
-
-	for_each_cpu(cpu, cpumask) {
-		cpu_dev = get_cpu_device(cpu);
-		if (!cpu_dev) {
-			pr_err("%s: failed to get cpu%d device\n", __func__,
-			       cpu);
-			continue;
-		}
-
-		dev_pm_opp_of_remove_table(cpu_dev);
-	}
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
-
-int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
-{
-	struct device *cpu_dev;
-	int cpu, ret = 0;
-
-	WARN_ON(cpumask_empty(cpumask));
-
-	for_each_cpu(cpu, cpumask) {
-		cpu_dev = get_cpu_device(cpu);
-		if (!cpu_dev) {
-			pr_err("%s: failed to get cpu%d device\n", __func__,
-			       cpu);
-			continue;
-		}
-
-		ret = dev_pm_opp_of_add_table(cpu_dev);
-		if (ret) {
-			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
-			       __func__, cpu, ret);
-
-			/* Free all other OPPs */
-			dev_pm_opp_of_cpumask_remove_table(cpumask);
-			break;
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
-
-/*
- * Works only for OPP v2 bindings.
+/**
+ * dev_pm_opp_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with @cpu_dev
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask to update with information of sharing CPUs
  *
- * Returns -ENOENT if operating-points-v2 bindings aren't supported.
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
  */
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 {
-	struct device_node *np, *tmp_np;
-	struct device *tcpu_dev;
-	int cpu, ret = 0;
+	struct opp_device *opp_dev;
+	struct opp_table *opp_table;
+	int ret = 0;
 
-	/* Get OPP descriptor node */
-	np = _of_get_opp_desc_node(cpu_dev);
-	if (!np) {
-		dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
-		return -ENOENT;
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _find_opp_table(cpu_dev);
+	if (IS_ERR(opp_table)) {
+		ret = PTR_ERR(opp_table);
+		goto unlock;
 	}
 
-	cpumask_set_cpu(cpu_dev->id, cpumask);
+	cpumask_clear(cpumask);
 
-	/* OPPs are shared ? */
-	if (!of_property_read_bool(np, "opp-shared"))
-		goto put_cpu_node;
-
-	for_each_possible_cpu(cpu) {
-		if (cpu == cpu_dev->id)
-			continue;
-
-		tcpu_dev = get_cpu_device(cpu);
-		if (!tcpu_dev) {
-			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
-				__func__, cpu);
-			ret = -ENODEV;
-			goto put_cpu_node;
-		}
-
-		/* Get OPP descriptor node */
-		tmp_np = _of_get_opp_desc_node(tcpu_dev);
-		if (!tmp_np) {
-			dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
-				__func__);
-			ret = -ENOENT;
-			goto put_cpu_node;
-		}
-
-		/* CPUs are sharing opp node */
-		if (np == tmp_np)
-			cpumask_set_cpu(cpu, cpumask);
-
-		of_node_put(tmp_np);
+	if (opp_table->shared_opp) {
+		list_for_each_entry(opp_dev, &opp_table->dev_list, node)
+			cpumask_set_cpu(opp_dev->dev->id, cpumask);
+	} else {
+		cpumask_set_cpu(cpu_dev->id, cpumask);
 	}
 
-put_cpu_node:
-	of_node_put(np);
+unlock:
+	mutex_unlock(&opp_table_lock);
+
 	return ret;
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
-#endif
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_sharing_cpus);

diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
new file mode 100644
index 0000000..94d2010
--- /dev/null
+++ b/drivers/base/power/opp/of.c

@@ -0,0 +1,591 @@
+/*
+ * Generic OPP OF helpers
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/export.h>
+
+#include "opp.h"
+
+static struct opp_table *_managed_opp(const struct device_node *np)
+{
+	struct opp_table *opp_table;
+
+	list_for_each_entry_rcu(opp_table, &opp_tables, node) {
+		if (opp_table->np == np) {
+			/*
+			 * Multiple devices can point to the same OPP table and
+			 * so will have same node-pointer, np.
+			 *
+			 * But the OPPs will be considered as shared only if the
+			 * OPP table contains a "opp-shared" property.
+			 */
+			return opp_table->shared_opp ? opp_table : NULL;
+		}
+	}
+
+	return NULL;
+}
+
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev)
+{
+	struct device_node *np;
+
+	/*
+	 * Only required for backward compatibility with v1 bindings, but isn't
+	 * harmful for other cases. And so we do it unconditionally.
+	 */
+	np = of_node_get(dev->of_node);
+	if (np) {
+		u32 val;
+
+		if (!of_property_read_u32(np, "clock-latency", &val))
+			opp_table->clock_latency_ns_max = val;
+		of_property_read_u32(np, "voltage-tolerance",
+				     &opp_table->voltage_tolerance_v1);
+		of_node_put(np);
+	}
+}
+
+static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
+			      struct device_node *np)
+{
+	unsigned int count = opp_table->supported_hw_count;
+	u32 version;
+	int ret;
+
+	if (!opp_table->supported_hw)
+		return true;
+
+	while (count--) {
+		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
+						 &version);
+		if (ret) {
+			dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
+				 __func__, count, ret);
+			return false;
+		}
+
+		/* Both of these are bitwise masks of the versions */
+		if (!(version & opp_table->supported_hw[count]))
+			return false;
+	}
+
+	return true;
+}
+
+/* TODO: Support multiple regulators */
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
+			      struct opp_table *opp_table)
+{
+	u32 microvolt[3] = {0};
+	u32 val;
+	int count, ret;
+	struct property *prop = NULL;
+	char name[NAME_MAX];
+
+	/* Search for "opp-microvolt-<name>" */
+	if (opp_table->prop_name) {
+		snprintf(name, sizeof(name), "opp-microvolt-%s",
+			 opp_table->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microvolt" */
+		sprintf(name, "opp-microvolt");
+		prop = of_find_property(opp->np, name, NULL);
+
+		/* Missing property isn't a problem, but an invalid entry is */
+		if (!prop)
+			return 0;
+	}
+
+	count = of_property_count_u32_elems(opp->np, name);
+	if (count < 0) {
+		dev_err(dev, "%s: Invalid %s property (%d)\n",
+			__func__, name, count);
+		return count;
+	}
+
+	/* There can be one or three elements here */
+	if (count != 1 && count != 3) {
+		dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
+			__func__, name, count);
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32_array(opp->np, name, microvolt, count);
+	if (ret) {
+		dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
+		return -EINVAL;
+	}
+
+	opp->u_volt = microvolt[0];
+
+	if (count == 1) {
+		opp->u_volt_min = opp->u_volt;
+		opp->u_volt_max = opp->u_volt;
+	} else {
+		opp->u_volt_min = microvolt[1];
+		opp->u_volt_max = microvolt[2];
+	}
+
+	/* Search for "opp-microamp-<name>" */
+	prop = NULL;
+	if (opp_table->prop_name) {
+		snprintf(name, sizeof(name), "opp-microamp-%s",
+			 opp_table->prop_name);
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (!prop) {
+		/* Search for "opp-microamp" */
+		sprintf(name, "opp-microamp");
+		prop = of_find_property(opp->np, name, NULL);
+	}
+
+	if (prop && !of_property_read_u32(opp->np, name, &val))
+		opp->u_amp = val;
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
+ *				  entries
+ * @dev:	device pointer used to lookup OPP table.
+ *
+ * Free OPPs created using static entries present in DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_remove_table(struct device *dev)
+{
+	_dev_pm_opp_remove_table(dev, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
+
+/* Returns opp descriptor node for a device, caller must do of_node_put() */
+struct device_node *_of_get_opp_desc_node(struct device *dev)
+{
+	/*
+	 * TODO: Support for multiple OPP tables.
+	 *
+	 * There should be only ONE phandle present in "operating-points-v2"
+	 * property.
+	 */
+
+	return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
+}
+
+/**
+ * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
+ * @dev:	device for which we do this operation
+ * @np:		device node
+ *
+ * This function adds an opp definition to the opp table and returns status. The
+ * opp can be controlled using dev_pm_opp_enable/disable functions and may be
+ * removed by dev_pm_opp_remove.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -EINVAL	Failed parsing the OPP node
+ */
+static int _opp_add_static_v2(struct device *dev, struct device_node *np)
+{
+	struct opp_table *opp_table;
+	struct dev_pm_opp *new_opp;
+	u64 rate;
+	u32 val;
+	int ret;
+
+	/* Hold our table modification lock here */
+	mutex_lock(&opp_table_lock);
+
+	new_opp = _allocate_opp(dev, &opp_table);
+	if (!new_opp) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	ret = of_property_read_u64(np, "opp-hz", &rate);
+	if (ret < 0) {
+		dev_err(dev, "%s: opp-hz not found\n", __func__);
+		goto free_opp;
+	}
+
+	/* Check if the OPP supports hardware's hierarchy of versions or not */
+	if (!_opp_is_supported(dev, opp_table, np)) {
+		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
+		goto free_opp;
+	}
+
+	/*
+	 * Rate is defined as an unsigned long in clk API, and so casting
+	 * explicitly to its type. Must be fixed once rate is 64 bit
+	 * guaranteed in clk API.
+	 */
+	new_opp->rate = (unsigned long)rate;
+	new_opp->turbo = of_property_read_bool(np, "turbo-mode");
+
+	new_opp->np = np;
+	new_opp->dynamic = false;
+	new_opp->available = true;
+
+	if (!of_property_read_u32(np, "clock-latency-ns", &val))
+		new_opp->clock_latency_ns = val;
+
+	ret = opp_parse_supplies(new_opp, dev, opp_table);
+	if (ret)
+		goto free_opp;
+
+	ret = _opp_add(dev, new_opp, opp_table);
+	if (ret)
+		goto free_opp;
+
+	/* OPP to select on device suspend */
+	if (of_property_read_bool(np, "opp-suspend")) {
+		if (opp_table->suspend_opp) {
+			dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
+				 __func__, opp_table->suspend_opp->rate,
+				 new_opp->rate);
+		} else {
+			new_opp->suspend = true;
+			opp_table->suspend_opp = new_opp;
+		}
+	}
+
+	if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
+		opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
+
+	mutex_unlock(&opp_table_lock);
+
+	pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
+		 __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
+		 new_opp->u_volt_min, new_opp->u_volt_max,
+		 new_opp->clock_latency_ns);
+
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
+	return 0;
+
+free_opp:
+	_opp_remove(opp_table, new_opp, false);
+unlock:
+	mutex_unlock(&opp_table_lock);
+	return ret;
+}
+
+/* Initializes OPP tables based on new bindings */
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
+{
+	struct device_node *np;
+	struct opp_table *opp_table;
+	int ret = 0, count = 0;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _managed_opp(opp_np);
+	if (opp_table) {
+		/* OPPs are already managed */
+		if (!_add_opp_dev(dev, opp_table))
+			ret = -ENOMEM;
+		mutex_unlock(&opp_table_lock);
+		return ret;
+	}
+	mutex_unlock(&opp_table_lock);
+
+	/* We have opp-table node now, iterate over it and add OPPs */
+	for_each_available_child_of_node(opp_np, np) {
+		count++;
+
+		ret = _opp_add_static_v2(dev, np);
+		if (ret) {
+			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
+				ret);
+			goto free_table;
+		}
+	}
+
+	/* There should be one of more OPP defined */
+	if (WARN_ON(!count))
+		return -ENOENT;
+
+	mutex_lock(&opp_table_lock);
+
+	opp_table = _find_opp_table(dev);
+	if (WARN_ON(IS_ERR(opp_table))) {
+		ret = PTR_ERR(opp_table);
+		mutex_unlock(&opp_table_lock);
+		goto free_table;
+	}
+
+	opp_table->np = opp_np;
+	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+
+	mutex_unlock(&opp_table_lock);
+
+	return 0;
+
+free_table:
+	dev_pm_opp_of_remove_table(dev);
+
+	return ret;
+}
+
+/* Initializes OPP tables based on old-deprecated bindings */
+static int _of_add_opp_table_v1(struct device *dev)
+{
+	const struct property *prop;
+	const __be32 *val;
+	int nr;
+
+	prop = of_find_property(dev->of_node, "operating-points", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (!prop->value)
+		return -ENODATA;
+
+	/*
+	 * Each OPP is a set of tuples consisting of frequency and
+	 * voltage like <freq-kHz vol-uV>.
+	 */
+	nr = prop->length / sizeof(u32);
+	if (nr % 2) {
+		dev_err(dev, "%s: Invalid OPP table\n", __func__);
+		return -EINVAL;
+	}
+
+	val = prop->value;
+	while (nr) {
+		unsigned long freq = be32_to_cpup(val++) * 1000;
+		unsigned long volt = be32_to_cpup(val++);
+
+		if (_opp_add_v1(dev, freq, volt, false))
+			dev_warn(dev, "%s: Failed to add OPP %ld\n",
+				 __func__, freq);
+		nr -= 2;
+	}
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_of_add_table() - Initialize opp table from device tree
+ * @dev:	device pointer used to lookup OPP table.
+ *
+ * Register the initial OPP table with the OPP library for given device.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -ENODEV	when 'operating-points' property is not found or is invalid data
+ *		in device node.
+ * -ENODATA	when empty 'operating-points' property is found
+ * -EINVAL	when invalid entries are found in opp-v2 table
+ */
+int dev_pm_opp_of_add_table(struct device *dev)
+{
+	struct device_node *opp_np;
+	int ret;
+
+	/*
+	 * OPPs have two version of bindings now. The older one is deprecated,
+	 * try for the new binding first.
+	 */
+	opp_np = _of_get_opp_desc_node(dev);
+	if (!opp_np) {
+		/*
+		 * Try old-deprecated bindings for backward compatibility with
+		 * older dtbs.
+		 */
+		return _of_add_opp_table_v1(dev);
+	}
+
+	ret = _of_add_opp_table_v2(dev, opp_np);
+	of_node_put(opp_np);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
+
+/* CPU device specific helpers */
+
+/**
+ * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used only to remove static entries created from DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
+{
+	_dev_pm_opp_cpumask_remove_table(cpumask, true);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
+
+/**
+ * dev_pm_opp_of_cpumask_add_table() - Adds OPP table for @cpumask
+ * @cpumask:	cpumask for which OPP table needs to be added.
+ *
+ * This adds the OPP tables for CPUs present in the @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
+{
+	struct device *cpu_dev;
+	int cpu, ret = 0;
+
+	WARN_ON(cpumask_empty(cpumask));
+
+	for_each_cpu(cpu, cpumask) {
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("%s: failed to get cpu%d device\n", __func__,
+			       cpu);
+			continue;
+		}
+
+		ret = dev_pm_opp_of_add_table(cpu_dev);
+		if (ret) {
+			pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
+			       __func__, cpu, ret);
+
+			/* Free all other OPPs */
+			dev_pm_opp_of_cpumask_remove_table(cpumask);
+			break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
+
+/*
+ * Works only for OPP v2 bindings.
+ *
+ * Returns -ENOENT if operating-points-v2 bindings aren't supported.
+ */
+/**
+ * dev_pm_opp_of_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with
+ *				      @cpu_dev using operating-points-v2
+ *				      bindings.
+ *
+ * @cpu_dev:	CPU device for which we do this operation
+ * @cpumask:	cpumask to update with information of sharing CPUs
+ *
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENOENT if operating-points-v2 isn't present for @cpu_dev.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
+				   struct cpumask *cpumask)
+{
+	struct device_node *np, *tmp_np;
+	struct device *tcpu_dev;
+	int cpu, ret = 0;
+
+	/* Get OPP descriptor node */
+	np = _of_get_opp_desc_node(cpu_dev);
+	if (!np) {
+		dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
+		return -ENOENT;
+	}
+
+	cpumask_set_cpu(cpu_dev->id, cpumask);
+
+	/* OPPs are shared ? */
+	if (!of_property_read_bool(np, "opp-shared"))
+		goto put_cpu_node;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == cpu_dev->id)
+			continue;
+
+		tcpu_dev = get_cpu_device(cpu);
+		if (!tcpu_dev) {
+			dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
+				__func__, cpu);
+			ret = -ENODEV;
+			goto put_cpu_node;
+		}
+
+		/* Get OPP descriptor node */
+		tmp_np = _of_get_opp_desc_node(tcpu_dev);
+		if (!tmp_np) {
+			dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
+				__func__);
+			ret = -ENOENT;
+			goto put_cpu_node;
+		}
+
+		/* CPUs are sharing opp node */
+		if (np == tmp_np)
+			cpumask_set_cpu(cpu, cpumask);
+
+		of_node_put(tmp_np);
+	}
+
+put_cpu_node:
+	of_node_put(np);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);

diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index f67f806..20f3be2 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h

@@ -28,6 +28,8 @@
 /* Lock to allow exclusive modification to the device and opp lists */
 extern struct mutex opp_table_lock;
 
+extern struct list_head opp_tables;
+
 /*
  * Internal data structure organization with the OPP layer library is as
  * follows:
@@ -183,6 +185,18 @@
 struct opp_table *_find_opp_table(struct device *dev);
 struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
 struct device_node *_of_get_opp_desc_node(struct device *dev);
+void _dev_pm_opp_remove_table(struct device *dev, bool remove_all);
+struct dev_pm_opp *_allocate_opp(struct device *dev, struct opp_table **opp_table);
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table);
+void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp, bool notify);
+int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, bool dynamic);
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of);
+
+#ifdef CONFIG_OF
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev);
+#else
+static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) {}
+#endif
 
 #ifdef CONFIG_DEBUG_FS
 void opp_debug_remove_one(struct dev_pm_opp *opp);

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 4c70550..b746904 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c

@@ -1506,11 +1506,16 @@
 		goto out;
 	}
 
-	ret = callback(dev);
+	ret = pm_runtime_set_active(dev);
 	if (ret)
 		goto out;
 
-	pm_runtime_set_active(dev);
+	ret = callback(dev);
+	if (ret) {
+		pm_runtime_set_suspended(dev);
+		goto out;
+	}
+
 	pm_runtime_mark_last_busy(dev);
 out:
 	pm_runtime_enable(dev);

diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c
index 3ee7255..4d2e50b 100644
--- a/drivers/base/regmap/regcache-flat.c
+++ b/drivers/base/regmap/regcache-flat.c

@@ -27,7 +27,7 @@
 	int i;
 	unsigned int *cache;
 
-	if (!map || map->reg_stride_order < 0)
+	if (!map || map->reg_stride_order < 0 || !map->max_register)
 		return -EINVAL;
 
 	map->cache = kcalloc(regcache_flat_get_index(map, map->max_register)

diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 4170b7d..df7ff729 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c

@@ -529,7 +529,7 @@
  * regcache_cache_bypass: Put a register map into cache bypass mode
  *
  * @map: map to configure
- * @cache_bypass: flag if changes should not be written to the hardware
+ * @cache_bypass: flag if changes should not be written to the cache
  *
  * When a register map is marked with the cache bypass option, writes
  * to the register map API will only update the hardware and not the

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 1e25b52..7b1c412 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c

@@ -104,7 +104,7 @@
 #define IPMI_BT_INTMASK_ENABLE_IRQ_BIT	1
 
 enum si_type {
-    SI_KCS, SI_SMIC, SI_BT
+	SI_KCS, SI_SMIC, SI_BT
 };
 
 static const char * const si_to_str[] = { "kcs", "smic", "bt" };
@@ -410,7 +410,7 @@
 
 		rv = SI_SM_CALL_WITHOUT_DELAY;
 	}
- out:
+out:
 	return rv;
 }
 
@@ -539,7 +539,7 @@
 
 static void handle_flags(struct smi_info *smi_info)
 {
- retry:
+retry:
 	if (smi_info->msg_flags & WDT_PRE_TIMEOUT_INT) {
 		/* Watchdog pre-timeout */
 		smi_inc_stat(smi_info, watchdog_pretimeouts);
@@ -831,7 +831,7 @@
 {
 	enum si_sm_result si_sm_result;
 
- restart:
+restart:
 	/*
 	 * There used to be a loop here that waited a little while
 	 * (around 25us) before giving up.  That turned out to be
@@ -944,7 +944,7 @@
 			smi_info->timer_running = false;
 	}
 
- out:
+out:
 	return si_sm_result;
 }
 
@@ -1190,7 +1190,7 @@
 		timeout = jiffies + SI_TIMEOUT_JIFFIES;
 	}
 
- do_mod_timer:
+do_mod_timer:
 	if (smi_result != SI_SM_IDLE)
 		smi_mod_timer(smi_info, timeout);
 	else
@@ -1576,10 +1576,9 @@
 		if (request_region(addr + idx * info->io.regspacing,
 				   info->io.regsize, DEVICE_NAME) == NULL) {
 			/* Undo allocations */
-			while (idx--) {
+			while (idx--)
 				release_region(addr + idx * info->io.regspacing,
 					       info->io.regsize);
-			}
 			return -EIO;
 		}
 	}
@@ -1638,25 +1637,28 @@
 }
 #endif
 
-static void mem_cleanup(struct smi_info *info)
+static void mem_region_cleanup(struct smi_info *info, int num)
 {
 	unsigned long addr = info->io.addr_data;
-	int           mapsize;
+	int idx;
 
+	for (idx = 0; idx < num; idx++)
+		release_mem_region(addr + idx * info->io.regspacing,
+				   info->io.regsize);
+}
+
+static void mem_cleanup(struct smi_info *info)
+{
 	if (info->io.addr) {
 		iounmap(info->io.addr);
-
-		mapsize = ((info->io_size * info->io.regspacing)
-			   - (info->io.regspacing - info->io.regsize));
-
-		release_mem_region(addr, mapsize);
+		mem_region_cleanup(info, info->io_size);
 	}
 }
 
 static int mem_setup(struct smi_info *info)
 {
 	unsigned long addr = info->io.addr_data;
-	int           mapsize;
+	int           mapsize, idx;
 
 	if (!addr)
 		return -ENODEV;
@@ -1693,6 +1695,21 @@
 	}
 
 	/*
+	 * Some BIOSes reserve disjoint memory regions in their ACPI
+	 * tables.  This causes problems when trying to request the
+	 * entire region.  Therefore we must request each register
+	 * separately.
+	 */
+	for (idx = 0; idx < info->io_size; idx++) {
+		if (request_mem_region(addr + idx * info->io.regspacing,
+				       info->io.regsize, DEVICE_NAME) == NULL) {
+			/* Undo allocations */
+			mem_region_cleanup(info, idx);
+			return -EIO;
+		}
+	}
+
+	/*
 	 * Calculate the total amount of memory to claim.  This is an
 	 * unusual looking calculation, but it avoids claiming any
 	 * more memory than it has to.  It will claim everything
@@ -1701,13 +1718,9 @@
 	 */
 	mapsize = ((info->io_size * info->io.regspacing)
 		   - (info->io.regspacing - info->io.regsize));
-
-	if (request_mem_region(addr, mapsize, DEVICE_NAME) == NULL)
-		return -EIO;
-
 	info->io.addr = ioremap(addr, mapsize);
 	if (info->io.addr == NULL) {
-		release_mem_region(addr, mapsize);
+		mem_region_cleanup(info, info->io_size);
 		return -EIO;
 	}
 	return 0;
@@ -1975,7 +1988,7 @@
 		}
 	}
 	rv = len;
- out:
+out:
 	kfree(str);
 	return rv;
 }
@@ -2945,7 +2958,7 @@
 	/* Check and record info from the get device id, in case we need it. */
 	rv = ipmi_demangle_device_id(resp, resp_len, &smi_info->device_id);
 
- out:
+out:
 	kfree(resp);
 	return rv;
 }
@@ -3192,7 +3205,7 @@
 	else
 		smi_info->supports_event_msg_buff = true;
 
- out:
+out:
 	kfree(resp);
 	return rv;
 }
@@ -3718,10 +3731,10 @@
 
 	return 0;
 
- out_err_stop_timer:
+out_err_stop_timer:
 	wait_for_timer_and_thread(new_smi);
 
- out_err:
+out_err:
 	new_smi->interrupt_disabled = true;
 
 	if (new_smi->intf) {

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 8b3be8b..097c868 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c

@@ -1870,7 +1870,7 @@
 		return -EIO;
 	}
 
-	myaddr = spmi->addr.address >> 1;
+	myaddr = spmi->addr.address & 0x7f;
 
 	return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI);
 }

diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index a7f4585..b7445b6 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig

@@ -18,7 +18,11 @@
 
 if CPU_FREQ
 
+config CPU_FREQ_GOV_ATTR_SET
+	bool
+
 config CPU_FREQ_GOV_COMMON
+	select CPU_FREQ_GOV_ATTR_SET
 	select IRQ_WORK
 	bool
 
@@ -103,6 +107,17 @@
 	  Be aware that not all cpufreq drivers support the conservative
 	  governor. If unsure have a look at the help section of the
 	  driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+	bool "schedutil"
+	depends on SMP
+	select CPU_FREQ_GOV_SCHEDUTIL
+	select CPU_FREQ_GOV_PERFORMANCE
+	help
+	  Use the 'schedutil' CPUFreq governor by default. If unsure,
+	  have a look at the help section of that governor. The fallback
+	  governor will be 'performance'.
+
 endchoice
 
 config CPU_FREQ_GOV_PERFORMANCE
@@ -184,6 +199,26 @@
 
 	  If in doubt, say N.
 
+config CPU_FREQ_GOV_SCHEDUTIL
+	tristate "'schedutil' cpufreq policy governor"
+	depends on CPU_FREQ && SMP
+	select CPU_FREQ_GOV_ATTR_SET
+	select IRQ_WORK
+	help
+	  This governor makes decisions based on the utilization data provided
+	  by the scheduler.  It sets the CPU frequency to be proportional to
+	  the utilization/capacity ratio coming from the scheduler.  If the
+	  utilization is frequency-invariant, the new frequency is also
+	  proportional to the maximum available frequency.  If that is not the
+	  case, it is proportional to the current frequency of the CPU.  The
+	  frequency tipping point is at utilization/capacity equal to 80% in
+	  both cases.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called cpufreq_schedutil.
+
+	  If in doubt, say N.
+
 comment "CPU frequency scaling drivers"
 
 config CPUFREQ_DT
@@ -191,6 +226,7 @@
 	depends on HAVE_CLK && OF
 	# if CPU_THERMAL is on and THERMAL=m, CPUFREQ_DT cannot be =y:
 	depends on !CPU_THERMAL || THERMAL
+	select CPUFREQ_DT_PLATDEV
 	select PM_OPP
 	help
 	  This adds a generic DT based cpufreq driver for frequency management.
@@ -199,6 +235,15 @@
 
 	  If in doubt, say N.
 
+config CPUFREQ_DT_PLATDEV
+	bool
+	help
+	  This adds a generic DT based cpufreq platdev driver for frequency
+	  management.  This creates a 'cpufreq-dt' platform device, on the
+	  supported platforms.
+
+	  If in doubt, say N.
+
 if X86
 source "drivers/cpufreq/Kconfig.x86"
 endif

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 14b1f93..d89b8af 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm

@@ -50,15 +50,6 @@
 
 	  If in doubt, say N.
 
-config ARM_HISI_ACPU_CPUFREQ
-	tristate "Hisilicon ACPU CPUfreq driver"
-	depends on ARCH_HISI && CPUFREQ_DT
-	select PM_OPP
-	help
-	  This enables the hisilicon ACPU CPUfreq driver.
-
-	  If in doubt, say N.
-
 config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC

diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index c59bdcb..adbd1de 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86

@@ -5,6 +5,7 @@
 config X86_INTEL_PSTATE
        bool "Intel P state control"
        depends on X86
+       select ACPI_PROCESSOR if ACPI
        help
           This driver provides a P state for Intel core processors.
 	  The driver implements an internal governor and will become

diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9e63fb1..e1eb11e 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile

@@ -11,8 +11,10 @@
 obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND)	+= cpufreq_ondemand.o
 obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
 obj-$(CONFIG_CPU_FREQ_GOV_COMMON)		+= cpufreq_governor.o
+obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET)	+= cpufreq_governor_attr_set.o
 
 obj-$(CONFIG_CPUFREQ_DT)		+= cpufreq-dt.o
+obj-$(CONFIG_CPUFREQ_DT_PLATDEV)	+= cpufreq-dt-platdev.o
 
 ##################################################################################
 # x86 drivers.
@@ -53,7 +55,6 @@
 obj-$(CONFIG_UX500_SOC_DB8500)		+= dbx500-cpufreq.o
 obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ)	+= exynos5440-cpufreq.o
 obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ)	+= highbank-cpufreq.o
-obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ)	+= hisi-acpu-cpufreq.o
 obj-$(CONFIG_ARM_IMX6Q_CPUFREQ)		+= imx6q-cpufreq.o
 obj-$(CONFIG_ARM_INTEGRATOR)		+= integrator-cpufreq.o
 obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ)	+= kirkwood-cpufreq.o
@@ -78,6 +79,7 @@
 obj-$(CONFIG_ARM_TEGRA124_CPUFREQ)	+= tegra124-cpufreq.o
 obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ)	+= vexpress-spc-cpufreq.o
 obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
+obj-$(CONFIG_MACH_MVEBU_V7)		+= mvebu-cpufreq.o
 
 
 ##################################################################################

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index fb57121..32a1505 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c

@@ -25,6 +25,8 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -50,8 +52,6 @@
 MODULE_DESCRIPTION("ACPI Processor P-States Driver");
 MODULE_LICENSE("GPL");
 
-#define PFX "acpi-cpufreq: "
-
 enum {
 	UNDEFINED_CAPABLE = 0,
 	SYSTEM_INTEL_MSR_CAPABLE,
@@ -65,7 +65,6 @@
 #define MSR_K7_HWCR_CPB_DIS	(1ULL << 25)
 
 struct acpi_cpufreq_data {
-	struct cpufreq_frequency_table *freq_table;
 	unsigned int resume;
 	unsigned int cpu_feature;
 	unsigned int acpi_perf_cpu;
@@ -200,8 +199,9 @@
 	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
 }
 
-static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
+static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct acpi_processor_performance *perf;
 	int i;
 
@@ -209,13 +209,14 @@
 
 	for (i = 0; i < perf->state_count; i++) {
 		if (value == perf->states[i].status)
-			return data->freq_table[i].frequency;
+			return policy->freq_table[i].frequency;
 	}
 	return 0;
 }
 
-static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
+static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct cpufreq_frequency_table *pos;
 	struct acpi_processor_performance *perf;
 
@@ -226,20 +227,22 @@
 
 	perf = to_perf_data(data);
 
-	cpufreq_for_each_entry(pos, data->freq_table)
+	cpufreq_for_each_entry(pos, policy->freq_table)
 		if (msr == perf->states[pos->driver_data].status)
 			return pos->frequency;
-	return data->freq_table[0].frequency;
+	return policy->freq_table[0].frequency;
 }
 
-static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
+static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
+
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
 	case SYSTEM_AMD_MSR_CAPABLE:
-		return extract_msr(val, data);
+		return extract_msr(policy, val);
 	case SYSTEM_IO_CAPABLE:
-		return extract_io(val, data);
+		return extract_io(policy, val);
 	default:
 		return 0;
 	}
@@ -374,11 +377,11 @@
 		return 0;
 
 	data = policy->driver_data;
-	if (unlikely(!data || !data->freq_table))
+	if (unlikely(!data || !policy->freq_table))
 		return 0;
 
-	cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
-	freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
+	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
+	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
 	if (freq != cached_freq) {
 		/*
 		 * The dreaded BIOS frequency change behind our back.
@@ -392,14 +395,15 @@
 	return freq;
 }
 
-static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
-				struct acpi_cpufreq_data *data)
+static unsigned int check_freqs(struct cpufreq_policy *policy,
+				const struct cpumask *mask, unsigned int freq)
 {
+	struct acpi_cpufreq_data *data = policy->driver_data;
 	unsigned int cur_freq;
 	unsigned int i;
 
 	for (i = 0; i < 100; i++) {
-		cur_freq = extract_freq(get_cur_val(mask, data), data);
+		cur_freq = extract_freq(policy, get_cur_val(mask, data));
 		if (cur_freq == freq)
 			return 1;
 		udelay(10);
@@ -416,12 +420,12 @@
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	int result = 0;
 
-	if (unlikely(data == NULL || data->freq_table == NULL)) {
+	if (unlikely(!data)) {
 		return -ENODEV;
 	}
 
 	perf = to_perf_data(data);
-	next_perf_state = data->freq_table[index].driver_data;
+	next_perf_state = policy->freq_table[index].driver_data;
 	if (perf->state == next_perf_state) {
 		if (unlikely(data->resume)) {
 			pr_debug("Called after resume, resetting to P%d\n",
@@ -444,8 +448,8 @@
 	drv_write(data, mask, perf->states[next_perf_state].control);
 
 	if (acpi_pstate_strict) {
-		if (!check_freqs(mask, data->freq_table[index].frequency,
-					data)) {
+		if (!check_freqs(policy, mask,
+				 policy->freq_table[index].frequency)) {
 			pr_debug("acpi_cpufreq_target failed (%d)\n",
 				policy->cpu);
 			result = -EAGAIN;
@@ -458,6 +462,43 @@
 	return result;
 }
 
+unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
+				      unsigned int target_freq)
+{
+	struct acpi_cpufreq_data *data = policy->driver_data;
+	struct acpi_processor_performance *perf;
+	struct cpufreq_frequency_table *entry;
+	unsigned int next_perf_state, next_freq, freq;
+
+	/*
+	 * Find the closest frequency above target_freq.
+	 *
+	 * The table is sorted in the reverse order with respect to the
+	 * frequency and all of the entries are valid (see the initialization).
+	 */
+	entry = policy->freq_table;
+	do {
+		entry++;
+		freq = entry->frequency;
+	} while (freq >= target_freq && freq != CPUFREQ_TABLE_END);
+	entry--;
+	next_freq = entry->frequency;
+	next_perf_state = entry->driver_data;
+
+	perf = to_perf_data(data);
+	if (perf->state == next_perf_state) {
+		if (unlikely(data->resume))
+			data->resume = 0;
+		else
+			return next_freq;
+	}
+
+	data->cpu_freq_write(&perf->control_register,
+			     perf->states[next_perf_state].control);
+	perf->state = next_perf_state;
+	return next_freq;
+}
+
 static unsigned long
 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
 {
@@ -611,10 +652,7 @@
 		if ((c->x86 == 15) &&
 		    (c->x86_model == 6) &&
 		    (c->x86_mask == 8)) {
-			printk(KERN_INFO "acpi-cpufreq: Intel(R) "
-			    "Xeon(R) 7100 Errata AL30, processors may "
-			    "lock up on frequency changes: disabling "
-			    "acpi-cpufreq.\n");
+			pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
 			return -ENODEV;
 		    }
 		}
@@ -631,6 +669,7 @@
 	unsigned int result = 0;
 	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
 	struct acpi_processor_performance *perf;
+	struct cpufreq_frequency_table *freq_table;
 #ifdef CONFIG_SMP
 	static int blacklisted;
 #endif
@@ -690,7 +729,7 @@
 		cpumask_copy(data->freqdomain_cpus,
 			     topology_sibling_cpumask(cpu));
 		policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
-		pr_info_once(PFX "overriding BIOS provided _PSD data\n");
+		pr_info_once("overriding BIOS provided _PSD data\n");
 	}
 #endif
 
@@ -742,9 +781,9 @@
 		goto err_unreg;
 	}
 
-	data->freq_table = kzalloc(sizeof(*data->freq_table) *
+	freq_table = kzalloc(sizeof(*freq_table) *
 		    (perf->state_count+1), GFP_KERNEL);
-	if (!data->freq_table) {
+	if (!freq_table) {
 		result = -ENOMEM;
 		goto err_unreg;
 	}
@@ -762,30 +801,29 @@
 	if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
 	    policy->cpuinfo.transition_latency > 20 * 1000) {
 		policy->cpuinfo.transition_latency = 20 * 1000;
-		printk_once(KERN_INFO
-			    "P-state transition latency capped at 20 uS\n");
+		pr_info_once("P-state transition latency capped at 20 uS\n");
 	}
 
 	/* table init */
 	for (i = 0; i < perf->state_count; i++) {
 		if (i > 0 && perf->states[i].core_frequency >=
-		    data->freq_table[valid_states-1].frequency / 1000)
+		    freq_table[valid_states-1].frequency / 1000)
 			continue;
 
-		data->freq_table[valid_states].driver_data = i;
-		data->freq_table[valid_states].frequency =
+		freq_table[valid_states].driver_data = i;
+		freq_table[valid_states].frequency =
 		    perf->states[i].core_frequency * 1000;
 		valid_states++;
 	}
-	data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
 	perf->state = 0;
 
-	result = cpufreq_table_validate_and_show(policy, data->freq_table);
+	result = cpufreq_table_validate_and_show(policy, freq_table);
 	if (result)
 		goto err_freqfree;
 
 	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
-		printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+		pr_warn(FW_WARN "P-state 0 is not max freq\n");
 
 	switch (perf->control_register.space_id) {
 	case ACPI_ADR_SPACE_SYSTEM_IO:
@@ -821,10 +859,13 @@
 	 */
 	data->resume = 1;
 
+	policy->fast_switch_possible = !acpi_pstate_strict &&
+		!(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
+
 	return result;
 
 err_freqfree:
-	kfree(data->freq_table);
+	kfree(freq_table);
 err_unreg:
 	acpi_processor_unregister_performance(cpu);
 err_free_mask:
@@ -842,13 +883,12 @@
 
 	pr_debug("acpi_cpufreq_cpu_exit\n");
 
-	if (data) {
-		policy->driver_data = NULL;
-		acpi_processor_unregister_performance(data->acpi_perf_cpu);
-		free_cpumask_var(data->freqdomain_cpus);
-		kfree(data->freq_table);
-		kfree(data);
-	}
+	policy->fast_switch_possible = false;
+	policy->driver_data = NULL;
+	acpi_processor_unregister_performance(data->acpi_perf_cpu);
+	free_cpumask_var(data->freqdomain_cpus);
+	kfree(policy->freq_table);
+	kfree(data);
 
 	return 0;
 }
@@ -876,6 +916,7 @@
 static struct cpufreq_driver acpi_cpufreq_driver = {
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= acpi_cpufreq_target,
+	.fast_switch	= acpi_cpufreq_fast_switch,
 	.bios_limit	= acpi_processor_get_bios_limit,
 	.init		= acpi_cpufreq_cpu_init,
 	.exit		= acpi_cpufreq_cpu_exit,

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index c251247..4180422 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c

@@ -298,7 +298,8 @@
 	return 0;
 }
 
-static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
+					    const struct cpumask *cpumask)
 {
 	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
 
@@ -308,11 +309,12 @@
 	clk_put(clk[cluster]);
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
 	if (arm_bL_ops->free_opp_table)
-		arm_bL_ops->free_opp_table(cpu_dev);
+		arm_bL_ops->free_opp_table(cpumask);
 	dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
 }
 
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
+					   const struct cpumask *cpumask)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
 	int i;
@@ -321,7 +323,7 @@
 		return;
 
 	if (cluster < MAX_CLUSTERS)
-		return _put_cluster_clk_and_freq_table(cpu_dev);
+		return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
 
 	for_each_present_cpu(i) {
 		struct device *cdev = get_cpu_device(i);
@@ -330,14 +332,15 @@
 			return;
 		}
 
-		_put_cluster_clk_and_freq_table(cdev);
+		_put_cluster_clk_and_freq_table(cdev, cpumask);
 	}
 
 	/* free virtual table */
 	kfree(freq_table[cluster]);
 }
 
-static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
+					   const struct cpumask *cpumask)
 {
 	u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
 	int ret;
@@ -345,7 +348,7 @@
 	if (freq_table[cluster])
 		return 0;
 
-	ret = arm_bL_ops->init_opp_table(cpu_dev);
+	ret = arm_bL_ops->init_opp_table(cpumask);
 	if (ret) {
 		dev_err(cpu_dev, "%s: init_opp_table failed, cpu: %d, err: %d\n",
 				__func__, cpu_dev->id, ret);
@@ -374,14 +377,15 @@
 
 free_opp_table:
 	if (arm_bL_ops->free_opp_table)
-		arm_bL_ops->free_opp_table(cpu_dev);
+		arm_bL_ops->free_opp_table(cpumask);
 out:
 	dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
 			cluster);
 	return ret;
 }
 
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
+					  const struct cpumask *cpumask)
 {
 	u32 cluster = cpu_to_cluster(cpu_dev->id);
 	int i, ret;
@@ -390,7 +394,7 @@
 		return 0;
 
 	if (cluster < MAX_CLUSTERS) {
-		ret = _get_cluster_clk_and_freq_table(cpu_dev);
+		ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
 		if (ret)
 			atomic_dec(&cluster_usage[cluster]);
 		return ret;
@@ -407,7 +411,7 @@
 			return -ENODEV;
 		}
 
-		ret = _get_cluster_clk_and_freq_table(cdev);
+		ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
 		if (ret)
 			goto put_clusters;
 	}
@@ -433,7 +437,7 @@
 			return -ENODEV;
 		}
 
-		_put_cluster_clk_and_freq_table(cdev);
+		_put_cluster_clk_and_freq_table(cdev, cpumask);
 	}
 
 	atomic_dec(&cluster_usage[cluster]);
@@ -455,18 +459,6 @@
 		return -ENODEV;
 	}
 
-	ret = get_cluster_clk_and_freq_table(cpu_dev);
-	if (ret)
-		return ret;
-
-	ret = cpufreq_table_validate_and_show(policy, freq_table[cur_cluster]);
-	if (ret) {
-		dev_err(cpu_dev, "CPU %d, cluster: %d invalid freq table\n",
-				policy->cpu, cur_cluster);
-		put_cluster_clk_and_freq_table(cpu_dev);
-		return ret;
-	}
-
 	if (cur_cluster < MAX_CLUSTERS) {
 		int cpu;
 
@@ -479,6 +471,18 @@
 		per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
 	}
 
+	ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+	if (ret)
+		return ret;
+
+	ret = cpufreq_table_validate_and_show(policy, freq_table[cur_cluster]);
+	if (ret) {
+		dev_err(cpu_dev, "CPU %d, cluster: %d invalid freq table\n",
+			policy->cpu, cur_cluster);
+		put_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+		return ret;
+	}
+
 	if (arm_bL_ops->get_transition_latency)
 		policy->cpuinfo.transition_latency =
 			arm_bL_ops->get_transition_latency(cpu_dev);
@@ -509,7 +513,7 @@
 		return -ENODEV;
 	}
 
-	put_cluster_clk_and_freq_table(cpu_dev);
+	put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
 	dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
 
 	return 0;

diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index b88889d..184d7c3 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h

@@ -30,11 +30,11 @@
 	 * This must set opp table for cpu_dev in a similar way as done by
 	 * dev_pm_opp_of_add_table().
 	 */
-	int (*init_opp_table)(struct device *cpu_dev);
+	int (*init_opp_table)(const struct cpumask *cpumask);
 
 	/* Optional */
 	int (*get_transition_latency)(struct device *cpu_dev);
-	void (*free_opp_table)(struct device *cpu_dev);
+	void (*free_opp_table)(const struct cpumask *cpumask);
 };
 
 int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);

diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 16ddeef..39b3f51 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c

@@ -43,23 +43,6 @@
 	return np;
 }
 
-static int dt_init_opp_table(struct device *cpu_dev)
-{
-	struct device_node *np;
-	int ret;
-
-	np = of_node_get(cpu_dev->of_node);
-	if (!np) {
-		pr_err("failed to find cpu%d node\n", cpu_dev->id);
-		return -ENOENT;
-	}
-
-	ret = dev_pm_opp_of_add_table(cpu_dev);
-	of_node_put(np);
-
-	return ret;
-}
-
 static int dt_get_transition_latency(struct device *cpu_dev)
 {
 	struct device_node *np;
@@ -81,8 +64,8 @@
 static struct cpufreq_arm_bL_ops dt_bL_ops = {
 	.name	= "dt-bl",
 	.get_transition_latency = dt_get_transition_latency,
-	.init_opp_table = dt_init_opp_table,
-	.free_opp_table = dev_pm_opp_of_remove_table,
+	.init_opp_table = dev_pm_opp_of_cpumask_add_table,
+	.free_opp_table = dev_pm_opp_of_cpumask_remove_table,
 };
 
 static int generic_bL_probe(struct platform_device *pdev)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 7c0bdfb..8882b8e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c

@@ -173,4 +173,25 @@
 	return -ENODEV;
 }
 
+static void __exit cppc_cpufreq_exit(void)
+{
+	struct cpudata *cpu;
+	int i;
+
+	cpufreq_unregister_driver(&cppc_cpufreq_driver);
+
+	for_each_possible_cpu(i) {
+		cpu = all_cpu_data[i];
+		free_cpumask_var(cpu->shared_cpu_map);
+		kfree(cpu);
+	}
+
+	kfree(all_cpu_data);
+}
+
+module_exit(cppc_cpufreq_exit);
+MODULE_AUTHOR("Ashwin Chaugule");
+MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");
+MODULE_LICENSE("GPL");
+
 late_initcall(cppc_cpufreq_init);

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
new file mode 100644
index 0000000..3646b14
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c

@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2016 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+static const struct of_device_id machines[] __initconst = {
+	{ .compatible = "allwinner,sun4i-a10", },
+	{ .compatible = "allwinner,sun5i-a10s", },
+	{ .compatible = "allwinner,sun5i-a13", },
+	{ .compatible = "allwinner,sun5i-r8", },
+	{ .compatible = "allwinner,sun6i-a31", },
+	{ .compatible = "allwinner,sun6i-a31s", },
+	{ .compatible = "allwinner,sun7i-a20", },
+	{ .compatible = "allwinner,sun8i-a23", },
+	{ .compatible = "allwinner,sun8i-a33", },
+	{ .compatible = "allwinner,sun8i-a83t", },
+	{ .compatible = "allwinner,sun8i-h3", },
+
+	{ .compatible = "hisilicon,hi6220", },
+
+	{ .compatible = "fsl,imx27", },
+	{ .compatible = "fsl,imx51", },
+	{ .compatible = "fsl,imx53", },
+	{ .compatible = "fsl,imx7d", },
+
+	{ .compatible = "marvell,berlin", },
+
+	{ .compatible = "samsung,exynos3250", },
+	{ .compatible = "samsung,exynos4210", },
+	{ .compatible = "samsung,exynos4212", },
+	{ .compatible = "samsung,exynos4412", },
+	{ .compatible = "samsung,exynos5250", },
+#ifndef CONFIG_BL_SWITCHER
+	{ .compatible = "samsung,exynos5420", },
+	{ .compatible = "samsung,exynos5800", },
+#endif
+
+	{ .compatible = "renesas,emev2", },
+	{ .compatible = "renesas,r7s72100", },
+	{ .compatible = "renesas,r8a73a4", },
+	{ .compatible = "renesas,r8a7740", },
+	{ .compatible = "renesas,r8a7778", },
+	{ .compatible = "renesas,r8a7779", },
+	{ .compatible = "renesas,r8a7790", },
+	{ .compatible = "renesas,r8a7791", },
+	{ .compatible = "renesas,r8a7793", },
+	{ .compatible = "renesas,r8a7794", },
+	{ .compatible = "renesas,sh73a0", },
+
+	{ .compatible = "rockchip,rk2928", },
+	{ .compatible = "rockchip,rk3036", },
+	{ .compatible = "rockchip,rk3066a", },
+	{ .compatible = "rockchip,rk3066b", },
+	{ .compatible = "rockchip,rk3188", },
+	{ .compatible = "rockchip,rk3228", },
+	{ .compatible = "rockchip,rk3288", },
+	{ .compatible = "rockchip,rk3366", },
+	{ .compatible = "rockchip,rk3368", },
+	{ .compatible = "rockchip,rk3399", },
+
+	{ .compatible = "sigma,tango4" },
+
+	{ .compatible = "ti,omap2", },
+	{ .compatible = "ti,omap3", },
+	{ .compatible = "ti,omap4", },
+	{ .compatible = "ti,omap5", },
+
+	{ .compatible = "xlnx,zynq-7000", },
+};
+
+static int __init cpufreq_dt_platdev_init(void)
+{
+	struct device_node *np = of_find_node_by_path("/");
+
+	if (!np)
+		return -ENODEV;
+
+	if (!of_match_node(machines, np))
+		return -ENODEV;
+
+	of_node_put(of_root);
+
+	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
+							       NULL, 0));
+}
+device_initcall(cpufreq_dt_platdev_init);

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 5f8dbe6..3957de8 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c

@@ -15,7 +15,6 @@
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
-#include <linux/cpufreq-dt.h>
 #include <linux/cpumask.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -147,7 +146,7 @@
 	struct clk *cpu_clk;
 	struct dev_pm_opp *suspend_opp;
 	unsigned int transition_latency;
-	bool opp_v1 = false;
+	bool fallback = false;
 	const char *name;
 	int ret;
 
@@ -167,14 +166,16 @@
 	/* Get OPP-sharing information from "operating-points-v2" bindings */
 	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
 	if (ret) {
+		if (ret != -ENOENT)
+			goto out_put_clk;
+
 		/*
 		 * operating-points-v2 not supported, fallback to old method of
-		 * finding shared-OPPs for backward compatibility.
+		 * finding shared-OPPs for backward compatibility if the
+		 * platform hasn't set sharing CPUs.
 		 */
-		if (ret == -ENOENT)
-			opp_v1 = true;
-		else
-			goto out_put_clk;
+		if (dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus))
+			fallback = true;
 	}
 
 	/*
@@ -214,11 +215,8 @@
 		goto out_free_opp;
 	}
 
-	if (opp_v1) {
-		struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
-
-		if (!pd || !pd->independent_clocks)
-			cpumask_setall(policy->cpus);
+	if (fallback) {
+		cpumask_setall(policy->cpus);
 
 		/*
 		 * OPP tables are initialized only for policy->cpu, do it for

diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index db69eeb..5503d49 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c

@@ -7,6 +7,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -56,8 +58,6 @@
 MODULE_PARM_DESC(min_fsb,
 		"Minimum FSB to use, if not defined: current FSB - 50");
 
-#define PFX "cpufreq-nforce2: "
-
 /**
  * nforce2_calc_fsb - calculate FSB
  * @pll: PLL value
@@ -174,13 +174,13 @@
 	int pll = 0;
 
 	if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
-		printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
+		pr_err("FSB %d is out of range!\n", fsb);
 		return -EINVAL;
 	}
 
 	tfsb = nforce2_fsb_read(0);
 	if (!tfsb) {
-		printk(KERN_ERR PFX "Error while reading the FSB\n");
+		pr_err("Error while reading the FSB\n");
 		return -EINVAL;
 	}
 
@@ -276,8 +276,7 @@
 	/* local_irq_save(flags); */
 
 	if (nforce2_set_fsb(target_fsb) < 0)
-		printk(KERN_ERR PFX "Changing FSB to %d failed\n",
-			target_fsb);
+		pr_err("Changing FSB to %d failed\n", target_fsb);
 	else
 		pr_debug("Changed FSB successfully to %d\n",
 			target_fsb);
@@ -325,8 +324,7 @@
 	/* FIX: Get FID from CPU */
 	if (!fid) {
 		if (!cpu_khz) {
-			printk(KERN_WARNING PFX
-			"cpu_khz not set, can't calculate multiplier!\n");
+			pr_warn("cpu_khz not set, can't calculate multiplier!\n");
 			return -ENODEV;
 		}
 
@@ -341,8 +339,8 @@
 		}
 	}
 
-	printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
-	       fid / 10, fid % 10);
+	pr_info("FSB currently at %i MHz, FID %d.%d\n",
+		fsb, fid / 10, fid % 10);
 
 	/* Set maximum FSB to FSB at boot time */
 	max_fsb = nforce2_fsb_read(1);
@@ -401,11 +399,9 @@
 	if (nforce2_dev == NULL)
 		return -ENODEV;
 
-	printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
-	       nforce2_dev->revision);
-	printk(KERN_INFO PFX
-	       "FSB changing is maybe unstable and can lead to "
-	       "crashes and data loss.\n");
+	pr_info("Detected nForce2 chipset revision %X\n",
+		nforce2_dev->revision);
+	pr_info("FSB changing is maybe unstable and can lead to crashes and data loss\n");
 
 	return 0;
 }
@@ -423,7 +419,7 @@
 
 	/* detect chipset */
 	if (nforce2_detect_chipset()) {
-		printk(KERN_INFO PFX "No nForce2 chipset.\n");
+		pr_info("No nForce2 chipset\n");
 		return -ENODEV;
 	}
 

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c4acfc5..035513b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -78,6 +78,11 @@
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
 static int cpufreq_start_governor(struct cpufreq_policy *policy);
 
+static inline int cpufreq_exit_governor(struct cpufreq_policy *policy)
+{
+	return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+}
+
 /**
  * Two notifier lists: the "policy" list is involved in the
  * validation process for a new CPU frequency policy; the
@@ -429,6 +434,73 @@
 }
 EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end);
 
+/*
+ * Fast frequency switching status count.  Positive means "enabled", negative
+ * means "disabled" and 0 means "not decided yet".
+ */
+static int cpufreq_fast_switch_count;
+static DEFINE_MUTEX(cpufreq_fast_switch_lock);
+
+static void cpufreq_list_transition_notifiers(void)
+{
+	struct notifier_block *nb;
+
+	pr_info("Registered transition notifiers:\n");
+
+	mutex_lock(&cpufreq_transition_notifier_list.mutex);
+
+	for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next)
+		pr_info("%pF\n", nb->notifier_call);
+
+	mutex_unlock(&cpufreq_transition_notifier_list.mutex);
+}
+
+/**
+ * cpufreq_enable_fast_switch - Enable fast frequency switching for policy.
+ * @policy: cpufreq policy to enable fast frequency switching for.
+ *
+ * Try to enable fast frequency switching for @policy.
+ *
+ * The attempt will fail if there is at least one transition notifier registered
+ * at this point, as fast frequency switching is quite fundamentally at odds
+ * with transition notifiers.  Thus if successful, it will make registration of
+ * transition notifiers fail going forward.
+ */
+void cpufreq_enable_fast_switch(struct cpufreq_policy *policy)
+{
+	lockdep_assert_held(&policy->rwsem);
+
+	if (!policy->fast_switch_possible)
+		return;
+
+	mutex_lock(&cpufreq_fast_switch_lock);
+	if (cpufreq_fast_switch_count >= 0) {
+		cpufreq_fast_switch_count++;
+		policy->fast_switch_enabled = true;
+	} else {
+		pr_warn("CPU%u: Fast frequency switching not enabled\n",
+			policy->cpu);
+		cpufreq_list_transition_notifiers();
+	}
+	mutex_unlock(&cpufreq_fast_switch_lock);
+}
+EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch);
+
+/**
+ * cpufreq_disable_fast_switch - Disable fast frequency switching for policy.
+ * @policy: cpufreq policy to disable fast frequency switching for.
+ */
+void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
+{
+	mutex_lock(&cpufreq_fast_switch_lock);
+	if (policy->fast_switch_enabled) {
+		policy->fast_switch_enabled = false;
+		if (!WARN_ON(cpufreq_fast_switch_count <= 0))
+			cpufreq_fast_switch_count--;
+	}
+	mutex_unlock(&cpufreq_fast_switch_lock);
+}
+EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch);
 
 /*********************************************************************
  *                          SYSFS INTERFACE                          *
@@ -1248,26 +1320,24 @@
  */
 static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
+	struct cpufreq_policy *policy;
 	unsigned cpu = dev->id;
-	int ret;
 
 	dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
 
-	if (cpu_online(cpu)) {
-		ret = cpufreq_online(cpu);
-	} else {
-		/*
-		 * A hotplug notifier will follow and we will handle it as CPU
-		 * online then.  For now, just create the sysfs link, unless
-		 * there is no policy or the link is already present.
-		 */
-		struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
+	if (cpu_online(cpu))
+		return cpufreq_online(cpu);
 
-		ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus)
-			? add_cpu_dev_symlink(policy, cpu) : 0;
-	}
+	/*
+	 * A hotplug notifier will follow and we will handle it as CPU online
+	 * then.  For now, just create the sysfs link, unless there is no policy
+	 * or the link is already present.
+	 */
+	policy = per_cpu(cpufreq_cpu_data, cpu);
+	if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+		return 0;
 
-	return ret;
+	return add_cpu_dev_symlink(policy, cpu);
 }
 
 static void cpufreq_offline(unsigned int cpu)
@@ -1319,7 +1389,7 @@
 
 	/* If cpu is last user of policy, free policy */
 	if (has_target()) {
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		ret = cpufreq_exit_governor(policy);
 		if (ret)
 			pr_err("%s: Failed to exit governor\n", __func__);
 	}
@@ -1447,8 +1517,12 @@
 
 	ret_freq = cpufreq_driver->get(policy->cpu);
 
-	/* Updating inactive policies is invalid, so avoid doing that. */
-	if (unlikely(policy_is_inactive(policy)))
+	/*
+	 * Updating inactive policies is invalid, so avoid doing that.  Also
+	 * if fast frequency switching is used with the given policy, the check
+	 * against policy->cur is pointless, so skip it in that case too.
+	 */
+	if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled)
 		return ret_freq;
 
 	if (ret_freq && policy->cur &&
@@ -1679,8 +1753,18 @@
 
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
+		mutex_lock(&cpufreq_fast_switch_lock);
+
+		if (cpufreq_fast_switch_count > 0) {
+			mutex_unlock(&cpufreq_fast_switch_lock);
+			return -EBUSY;
+		}
 		ret = srcu_notifier_chain_register(
 				&cpufreq_transition_notifier_list, nb);
+		if (!ret)
+			cpufreq_fast_switch_count--;
+
+		mutex_unlock(&cpufreq_fast_switch_lock);
 		break;
 	case CPUFREQ_POLICY_NOTIFIER:
 		ret = blocking_notifier_chain_register(
@@ -1713,8 +1797,14 @@
 
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
+		mutex_lock(&cpufreq_fast_switch_lock);
+
 		ret = srcu_notifier_chain_unregister(
 				&cpufreq_transition_notifier_list, nb);
+		if (!ret && !WARN_ON(cpufreq_fast_switch_count >= 0))
+			cpufreq_fast_switch_count++;
+
+		mutex_unlock(&cpufreq_fast_switch_lock);
 		break;
 	case CPUFREQ_POLICY_NOTIFIER:
 		ret = blocking_notifier_chain_unregister(
@@ -1733,6 +1823,37 @@
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/**
+ * cpufreq_driver_fast_switch - Carry out a fast CPU frequency switch.
+ * @policy: cpufreq policy to switch the frequency for.
+ * @target_freq: New frequency to set (may be approximate).
+ *
+ * Carry out a fast frequency switch without sleeping.
+ *
+ * The driver's ->fast_switch() callback invoked by this function must be
+ * suitable for being called from within RCU-sched read-side critical sections
+ * and it is expected to select the minimum available frequency greater than or
+ * equal to @target_freq (CPUFREQ_RELATION_L).
+ *
+ * This function must not be called if policy->fast_switch_enabled is unset.
+ *
+ * Governors calling this function must guarantee that it will never be invoked
+ * twice in parallel for the same policy and that it will never be called in
+ * parallel with either ->target() or ->target_index() for the same policy.
+ *
+ * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch()
+ * callback to indicate an error condition, the hardware configuration must be
+ * preserved.
+ */
+unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+					unsigned int target_freq)
+{
+	clamp_val(target_freq, policy->min, policy->max);
+
+	return cpufreq_driver->fast_switch(policy, target_freq);
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
+
 /* Must set freqs->new to intermediate frequency */
 static int __target_intermediate(struct cpufreq_policy *policy,
 				 struct cpufreq_freqs *freqs, int index)
@@ -2108,7 +2229,7 @@
 			return ret;
 		}
 
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		ret = cpufreq_exit_governor(policy);
 		if (ret) {
 			pr_err("%s: Failed to Exit Governor: %s (%d)\n",
 			       __func__, old_gov->name, ret);
@@ -2125,7 +2246,7 @@
 			pr_debug("cpufreq: governor change\n");
 			return 0;
 		}
-		cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+		cpufreq_exit_governor(policy);
 	}
 
 	/* new governor failed, so re-start old one */
@@ -2193,16 +2314,13 @@
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
 		cpufreq_online(cpu);
 		break;
 
 	case CPU_DOWN_PREPARE:
 		cpufreq_offline(cpu);
 		break;
-
-	case CPU_DOWN_FAILED:
-		cpufreq_online(cpu);
-		break;
 	}
 	return NOTIFY_OK;
 }

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index bf4913f..316df24 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c

@@ -129,9 +129,10 @@
 /************************** sysfs interface ************************/
 static struct dbs_governor cs_dbs_gov;
 
-static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+					  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 	ret = sscanf(buf, "%u", &input);
@@ -143,9 +144,10 @@
 	return count;
 }
 
-static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+				  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;
@@ -158,9 +160,10 @@
 	return count;
 }
 
-static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
+				    const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;
@@ -175,9 +178,10 @@
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 
@@ -199,9 +203,10 @@
 	return count;
 }
 
-static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
+			       size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input;
 	int ret;

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 5f1147f..be498d5 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c

@@ -43,9 +43,10 @@
  * This must be called with dbs_data->mutex held, otherwise traversing
  * policy_dbs_list isn't safe.
  */
-ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct policy_dbs_info *policy_dbs;
 	unsigned int rate;
 	int ret;
@@ -59,7 +60,7 @@
 	 * We are operating under dbs_data->mutex and so the list and its
 	 * entries can't be freed concurrently.
 	 */
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+	list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
 		mutex_lock(&policy_dbs->timer_mutex);
 		/*
 		 * On 32-bit architectures this may race with the
@@ -96,13 +97,13 @@
 {
 	struct policy_dbs_info *policy_dbs;
 
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+	list_for_each_entry(policy_dbs, &dbs_data->attr_set.policy_list, list) {
 		unsigned int j;
 
 		for_each_cpu(j, policy_dbs->policy->cpus) {
 			struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
-			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall,
+			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
 								  dbs_data->io_is_busy);
 			if (dbs_data->ignore_nice_load)
 				j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
@@ -111,54 +112,6 @@
 }
 EXPORT_SYMBOL_GPL(gov_update_cpu_data);
 
-static inline struct dbs_data *to_dbs_data(struct kobject *kobj)
-{
-	return container_of(kobj, struct dbs_data, kobj);
-}
-
-static inline struct governor_attr *to_gov_attr(struct attribute *attr)
-{
-	return container_of(attr, struct governor_attr, attr);
-}
-
-static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
-			     char *buf)
-{
-	struct dbs_data *dbs_data = to_dbs_data(kobj);
-	struct governor_attr *gattr = to_gov_attr(attr);
-
-	return gattr->show(dbs_data, buf);
-}
-
-static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
-			      const char *buf, size_t count)
-{
-	struct dbs_data *dbs_data = to_dbs_data(kobj);
-	struct governor_attr *gattr = to_gov_attr(attr);
-	int ret = -EBUSY;
-
-	mutex_lock(&dbs_data->mutex);
-
-	if (dbs_data->usage_count)
-		ret = gattr->store(dbs_data, buf, count);
-
-	mutex_unlock(&dbs_data->mutex);
-
-	return ret;
-}
-
-/*
- * Sysfs Ops for accessing governor attributes.
- *
- * All show/store invocations for governor specific sysfs attributes, will first
- * call the below show/store callbacks and the attribute specific callback will
- * be called from within it.
- */
-static const struct sysfs_ops governor_sysfs_ops = {
-	.show	= governor_show,
-	.store	= governor_store,
-};
-
 unsigned int dbs_update(struct cpufreq_policy *policy)
 {
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -184,14 +137,14 @@
 	/* Get Absolute Load */
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
-		u64 cur_wall_time, cur_idle_time;
-		unsigned int idle_time, wall_time;
+		u64 update_time, cur_idle_time;
+		unsigned int idle_time, time_elapsed;
 		unsigned int load;
 
-		cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
+		cur_idle_time = get_cpu_idle_time(j, &update_time, io_busy);
 
-		wall_time = cur_wall_time - j_cdbs->prev_cpu_wall;
-		j_cdbs->prev_cpu_wall = cur_wall_time;
+		time_elapsed = update_time - j_cdbs->prev_update_time;
+		j_cdbs->prev_update_time = update_time;
 
 		idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
 		j_cdbs->prev_cpu_idle = cur_idle_time;
@@ -203,47 +156,62 @@
 			j_cdbs->prev_cpu_nice = cur_nice;
 		}
 
-		if (unlikely(!wall_time || wall_time < idle_time))
-			continue;
-
-		/*
-		 * If the CPU had gone completely idle, and a task just woke up
-		 * on this CPU now, it would be unfair to calculate 'load' the
-		 * usual way for this elapsed time-window, because it will show
-		 * near-zero load, irrespective of how CPU intensive that task
-		 * actually is. This is undesirable for latency-sensitive bursty
-		 * workloads.
-		 *
-		 * To avoid this, we reuse the 'load' from the previous
-		 * time-window and give this task a chance to start with a
-		 * reasonably high CPU frequency. (However, we shouldn't over-do
-		 * this copy, lest we get stuck at a high load (high frequency)
-		 * for too long, even when the current system load has actually
-		 * dropped down. So we perform the copy only once, upon the
-		 * first wake-up from idle.)
-		 *
-		 * Detecting this situation is easy: the governor's utilization
-		 * update handler would not have run during CPU-idle periods.
-		 * Hence, an unusually large 'wall_time' (as compared to the
-		 * sampling rate) indicates this scenario.
-		 *
-		 * prev_load can be zero in two cases and we must recalculate it
-		 * for both cases:
-		 * - during long idle intervals
-		 * - explicitly set to zero
-		 */
-		if (unlikely(wall_time > (2 * sampling_rate) &&
-			     j_cdbs->prev_load)) {
-			load = j_cdbs->prev_load;
-
+		if (unlikely(!time_elapsed)) {
 			/*
-			 * Perform a destructive copy, to ensure that we copy
-			 * the previous load only once, upon the first wake-up
-			 * from idle.
+			 * That can only happen when this function is called
+			 * twice in a row with a very short interval between the
+			 * calls, so the previous load value can be used then.
 			 */
+			load = j_cdbs->prev_load;
+		} else if (unlikely(time_elapsed > 2 * sampling_rate &&
+				    j_cdbs->prev_load)) {
+			/*
+			 * If the CPU had gone completely idle and a task has
+			 * just woken up on this CPU now, it would be unfair to
+			 * calculate 'load' the usual way for this elapsed
+			 * time-window, because it would show near-zero load,
+			 * irrespective of how CPU intensive that task actually
+			 * was. This is undesirable for latency-sensitive bursty
+			 * workloads.
+			 *
+			 * To avoid this, reuse the 'load' from the previous
+			 * time-window and give this task a chance to start with
+			 * a reasonably high CPU frequency. However, that
+			 * shouldn't be over-done, lest we get stuck at a high
+			 * load (high frequency) for too long, even when the
+			 * current system load has actually dropped down, so
+			 * clear prev_load to guarantee that the load will be
+			 * computed again next time.
+			 *
+			 * Detecting this situation is easy: the governor's
+			 * utilization update handler would not have run during
+			 * CPU-idle periods.  Hence, an unusually large
+			 * 'time_elapsed' (as compared to the sampling rate)
+			 * indicates this scenario.
+			 */
+			load = j_cdbs->prev_load;
 			j_cdbs->prev_load = 0;
 		} else {
-			load = 100 * (wall_time - idle_time) / wall_time;
+			if (time_elapsed >= idle_time) {
+				load = 100 * (time_elapsed - idle_time) / time_elapsed;
+			} else {
+				/*
+				 * That can happen if idle_time is returned by
+				 * get_cpu_idle_time_jiffy().  In that case
+				 * idle_time is roughly equal to the difference
+				 * between time_elapsed and "busy time" obtained
+				 * from CPU statistics.  Then, the "busy time"
+				 * can end up being greater than time_elapsed
+				 * (for example, if jiffies_64 and the CPU
+				 * statistics are updated by different CPUs),
+				 * so idle_time may in fact be negative.  That
+				 * means, though, that the CPU was busy all
+				 * the time (on the rough average) during the
+				 * last sampling interval and 100 can be
+				 * returned as the load.
+				 */
+				load = (int)idle_time < 0 ? 100 : 0;
+			}
 			j_cdbs->prev_load = load;
 		}
 
@@ -254,43 +222,6 @@
 }
 EXPORT_SYMBOL_GPL(dbs_update);
 
-static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
-				unsigned int delay_us)
-{
-	struct cpufreq_policy *policy = policy_dbs->policy;
-	int cpu;
-
-	gov_update_sample_delay(policy_dbs, delay_us);
-	policy_dbs->last_sample_time = 0;
-
-	for_each_cpu(cpu, policy->cpus) {
-		struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
-
-		cpufreq_set_update_util_data(cpu, &cdbs->update_util);
-	}
-}
-
-static inline void gov_clear_update_util(struct cpufreq_policy *policy)
-{
-	int i;
-
-	for_each_cpu(i, policy->cpus)
-		cpufreq_set_update_util_data(i, NULL);
-
-	synchronize_sched();
-}
-
-static void gov_cancel_work(struct cpufreq_policy *policy)
-{
-	struct policy_dbs_info *policy_dbs = policy->governor_data;
-
-	gov_clear_update_util(policy_dbs->policy);
-	irq_work_sync(&policy_dbs->irq_work);
-	cancel_work_sync(&policy_dbs->work);
-	atomic_set(&policy_dbs->work_count, 0);
-	policy_dbs->work_in_progress = false;
-}
-
 static void dbs_work_handler(struct work_struct *work)
 {
 	struct policy_dbs_info *policy_dbs;
@@ -378,6 +309,44 @@
 	irq_work_queue(&policy_dbs->irq_work);
 }
 
+static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
+				unsigned int delay_us)
+{
+	struct cpufreq_policy *policy = policy_dbs->policy;
+	int cpu;
+
+	gov_update_sample_delay(policy_dbs, delay_us);
+	policy_dbs->last_sample_time = 0;
+
+	for_each_cpu(cpu, policy->cpus) {
+		struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
+
+		cpufreq_add_update_util_hook(cpu, &cdbs->update_util,
+					     dbs_update_util_handler);
+	}
+}
+
+static inline void gov_clear_update_util(struct cpufreq_policy *policy)
+{
+	int i;
+
+	for_each_cpu(i, policy->cpus)
+		cpufreq_remove_update_util_hook(i);
+
+	synchronize_sched();
+}
+
+static void gov_cancel_work(struct cpufreq_policy *policy)
+{
+	struct policy_dbs_info *policy_dbs = policy->governor_data;
+
+	gov_clear_update_util(policy_dbs->policy);
+	irq_work_sync(&policy_dbs->irq_work);
+	cancel_work_sync(&policy_dbs->work);
+	atomic_set(&policy_dbs->work_count, 0);
+	policy_dbs->work_in_progress = false;
+}
+
 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
 						     struct dbs_governor *gov)
 {
@@ -400,7 +369,6 @@
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
 
 		j_cdbs->policy_dbs = policy_dbs;
-		j_cdbs->update_util.func = dbs_update_util_handler;
 	}
 	return policy_dbs;
 }
@@ -449,10 +417,7 @@
 		policy_dbs->dbs_data = dbs_data;
 		policy->governor_data = policy_dbs;
 
-		mutex_lock(&dbs_data->mutex);
-		dbs_data->usage_count++;
-		list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
-		mutex_unlock(&dbs_data->mutex);
+		gov_attr_set_get(&dbs_data->attr_set, &policy_dbs->list);
 		goto out;
 	}
 
@@ -462,8 +427,7 @@
 		goto free_policy_dbs_info;
 	}
 
-	INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
-	mutex_init(&dbs_data->mutex);
+	gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
 
 	ret = gov->init(dbs_data, !policy->governor->initialized);
 	if (ret)
@@ -483,14 +447,11 @@
 	if (!have_governor_per_policy())
 		gov->gdbs_data = dbs_data;
 
+	policy_dbs->dbs_data = dbs_data;
 	policy->governor_data = policy_dbs;
 
-	policy_dbs->dbs_data = dbs_data;
-	dbs_data->usage_count = 1;
-	list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
-
 	gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
-	ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
+	ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
 				   get_governor_parent_kobj(policy),
 				   "%s", gov->gov.name);
 	if (!ret)
@@ -519,29 +480,21 @@
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
-	int count;
+	unsigned int count;
 
 	/* Protect gov->gdbs_data against concurrent updates. */
 	mutex_lock(&gov_dbs_data_mutex);
 
-	mutex_lock(&dbs_data->mutex);
-	list_del(&policy_dbs->list);
-	count = --dbs_data->usage_count;
-	mutex_unlock(&dbs_data->mutex);
+	count = gov_attr_set_put(&dbs_data->attr_set, &policy_dbs->list);
+
+	policy->governor_data = NULL;
 
 	if (!count) {
-		kobject_put(&dbs_data->kobj);
-
-		policy->governor_data = NULL;
-
 		if (!have_governor_per_policy())
 			gov->gdbs_data = NULL;
 
 		gov->exit(dbs_data, policy->governor->initialized == 1);
-		mutex_destroy(&dbs_data->mutex);
 		kfree(dbs_data);
-	} else {
-		policy->governor_data = NULL;
 	}
 
 	free_policy_dbs_info(policy_dbs, gov);
@@ -570,12 +523,12 @@
 
 	for_each_cpu(j, policy->cpus) {
 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
-		unsigned int prev_load;
 
-		j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
-
-		prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle;
-		j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall;
+		j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, io_busy);
+		/*
+		 * Make the first invocation of dbs_update() compute the load.
+		 */
+		j_cdbs->prev_load = 0;
 
 		if (ignore_nice)
 			j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];

diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 61ff82f..34eb214 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h

@@ -24,20 +24,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 
-/*
- * The polling frequency depends on the capability of the processor. Default
- * polling frequency is 1000 times the transition latency of the processor. The
- * governor will work on any processor with transition latency <= 10ms, using
- * appropriate sampling rate.
- *
- * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL)
- * this governor will not work. All times here are in us (micro seconds).
- */
-#define MIN_SAMPLING_RATE_RATIO			(2)
-#define LATENCY_MULTIPLIER			(1000)
-#define MIN_LATENCY_MULTIPLIER			(20)
-#define TRANSITION_LATENCY_LIMIT		(10 * 1000 * 1000)
-
 /* Ondemand Sampling types */
 enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
 
@@ -52,7 +38,7 @@
 
 /* Governor demand based switching data (per-policy or global). */
 struct dbs_data {
-	int usage_count;
+	struct gov_attr_set attr_set;
 	void *tuners;
 	unsigned int min_sampling_rate;
 	unsigned int ignore_nice_load;
@@ -60,37 +46,27 @@
 	unsigned int sampling_down_factor;
 	unsigned int up_threshold;
 	unsigned int io_is_busy;
-
-	struct kobject kobj;
-	struct list_head policy_dbs_list;
-	/*
-	 * Protect concurrent updates to governor tunables from sysfs,
-	 * policy_dbs_list and usage_count.
-	 */
-	struct mutex mutex;
 };
 
-/* Governor's specific attributes */
-struct dbs_data;
-struct governor_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct dbs_data *dbs_data, char *buf);
-	ssize_t (*store)(struct dbs_data *dbs_data, const char *buf,
-			 size_t count);
-};
+static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
+{
+	return container_of(attr_set, struct dbs_data, attr_set);
+}
 
 #define gov_show_one(_gov, file_name)					\
 static ssize_t show_##file_name						\
-(struct dbs_data *dbs_data, char *buf)					\
+(struct gov_attr_set *attr_set, char *buf)				\
 {									\
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
 	struct _gov##_dbs_tuners *tuners = dbs_data->tuners;		\
 	return sprintf(buf, "%u\n", tuners->file_name);			\
 }
 
 #define gov_show_one_common(file_name)					\
 static ssize_t show_##file_name						\
-(struct dbs_data *dbs_data, char *buf)					\
+(struct gov_attr_set *attr_set, char *buf)				\
 {									\
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);		\
 	return sprintf(buf, "%u\n", dbs_data->file_name);		\
 }
 
@@ -135,7 +111,7 @@
 /* Per cpu structures */
 struct cpu_dbs_info {
 	u64 prev_cpu_idle;
-	u64 prev_cpu_wall;
+	u64 prev_update_time;
 	u64 prev_cpu_nice;
 	/*
 	 * Used to keep track of load in the previous interval. However, when
@@ -184,7 +160,7 @@
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
 void od_unregister_powersave_bias_handler(void);
-ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 			    size_t count);
 void gov_update_cpu_data(struct dbs_data *dbs_data);
 #endif /* _CPUFREQ_GOVERNOR_H */

diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
new file mode 100644
index 0000000..52841f8
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c

@@ -0,0 +1,84 @@
+/*
+ * Abstract code for CPUFreq governor tunable sysfs attributes.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpufreq_governor.h"
+
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+	return container_of(kobj, struct gov_attr_set, kobj);
+}
+
+static inline struct governor_attr *to_gov_attr(struct attribute *attr)
+{
+	return container_of(attr, struct governor_attr, attr);
+}
+
+static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
+			     char *buf)
+{
+	struct governor_attr *gattr = to_gov_attr(attr);
+
+	return gattr->show(to_gov_attr_set(kobj), buf);
+}
+
+static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
+	struct governor_attr *gattr = to_gov_attr(attr);
+	int ret;
+
+	mutex_lock(&attr_set->update_lock);
+	ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY;
+	mutex_unlock(&attr_set->update_lock);
+	return ret;
+}
+
+const struct sysfs_ops governor_sysfs_ops = {
+	.show	= governor_show,
+	.store	= governor_store,
+};
+EXPORT_SYMBOL_GPL(governor_sysfs_ops);
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+	INIT_LIST_HEAD(&attr_set->policy_list);
+	mutex_init(&attr_set->update_lock);
+	attr_set->usage_count = 1;
+	list_add(list_node, &attr_set->policy_list);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_init);
+
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+	mutex_lock(&attr_set->update_lock);
+	attr_set->usage_count++;
+	list_add(list_node, &attr_set->policy_list);
+	mutex_unlock(&attr_set->update_lock);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_get);
+
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+	unsigned int count;
+
+	mutex_lock(&attr_set->update_lock);
+	list_del(list_node);
+	count = --attr_set->usage_count;
+	mutex_unlock(&attr_set->update_lock);
+	if (count)
+		return count;
+
+	kobject_put(&attr_set->kobj);
+	mutex_destroy(&attr_set->update_lock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_put);

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index acd8027..3001634 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c

@@ -207,9 +207,10 @@
 /************************** sysfs interface ************************/
 static struct dbs_governor od_dbs_gov;
 
-static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+				size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 
@@ -224,9 +225,10 @@
 	return count;
 }
 
-static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+				  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 	ret = sscanf(buf, "%u", &input);
@@ -240,9 +242,10 @@
 	return count;
 }
 
-static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+					  const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct policy_dbs_info *policy_dbs;
 	unsigned int input;
 	int ret;
@@ -254,7 +257,7 @@
 	dbs_data->sampling_down_factor = input;
 
 	/* Reset down sampling multiplier in case it was active */
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+	list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
 		/*
 		 * Doing this without locking might lead to using different
 		 * rate_mult values in od_update() and od_dbs_timer().
@@ -267,9 +270,10 @@
 	return count;
 }
 
-static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
-		const char *buf, size_t count)
+static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+				      const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	unsigned int input;
 	int ret;
 
@@ -291,9 +295,10 @@
 	return count;
 }
 
-static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_powersave_bias(struct gov_attr_set *attr_set,
+				    const char *buf, size_t count)
 {
+	struct dbs_data *dbs_data = to_dbs_data(attr_set);
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	struct policy_dbs_info *policy_dbs;
 	unsigned int input;
@@ -308,7 +313,7 @@
 
 	od_tuners->powersave_bias = input;
 
-	list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list)
+	list_for_each_entry(policy_dbs, &attr_set->policy_list, list)
 		ondemand_powersave_bias_init(policy_dbs->policy);
 
 	return count;

diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 4d16f45..9f3dec9 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c

@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 
 static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
 static DEFINE_MUTEX(userspace_mutex);
@@ -31,6 +32,7 @@
 static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
 {
 	int ret = -EINVAL;
+	unsigned int *setspeed = policy->governor_data;
 
 	pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
 
@@ -38,6 +40,8 @@
 	if (!per_cpu(cpu_is_managed, policy->cpu))
 		goto err;
 
+	*setspeed = freq;
+
 	ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
  err:
 	mutex_unlock(&userspace_mutex);
@@ -49,19 +53,45 @@
 	return sprintf(buf, "%u\n", policy->cur);
 }
 
+static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy)
+{
+	unsigned int *setspeed;
+
+	setspeed = kzalloc(sizeof(*setspeed), GFP_KERNEL);
+	if (!setspeed)
+		return -ENOMEM;
+
+	policy->governor_data = setspeed;
+	return 0;
+}
+
 static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
 				   unsigned int event)
 {
+	unsigned int *setspeed = policy->governor_data;
 	unsigned int cpu = policy->cpu;
 	int rc = 0;
 
+	if (event == CPUFREQ_GOV_POLICY_INIT)
+		return cpufreq_userspace_policy_init(policy);
+
+	if (!setspeed)
+		return -EINVAL;
+
 	switch (event) {
+	case CPUFREQ_GOV_POLICY_EXIT:
+		mutex_lock(&userspace_mutex);
+		policy->governor_data = NULL;
+		kfree(setspeed);
+		mutex_unlock(&userspace_mutex);
+		break;
 	case CPUFREQ_GOV_START:
 		BUG_ON(!policy->cur);
 		pr_debug("started managing cpu %u\n", cpu);
 
 		mutex_lock(&userspace_mutex);
 		per_cpu(cpu_is_managed, cpu) = 1;
+		*setspeed = policy->cur;
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_STOP:
@@ -69,20 +99,23 @@
 
 		mutex_lock(&userspace_mutex);
 		per_cpu(cpu_is_managed, cpu) = 0;
+		*setspeed = 0;
 		mutex_unlock(&userspace_mutex);
 		break;
 	case CPUFREQ_GOV_LIMITS:
 		mutex_lock(&userspace_mutex);
-		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
-			cpu, policy->min, policy->max,
-			policy->cur);
+		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
+			cpu, policy->min, policy->max, policy->cur, *setspeed);
 
-		if (policy->max < policy->cur)
+		if (policy->max < *setspeed)
 			__cpufreq_driver_target(policy, policy->max,
 						CPUFREQ_RELATION_H);
-		else if (policy->min > policy->cur)
+		else if (policy->min > *setspeed)
 			__cpufreq_driver_target(policy, policy->min,
 						CPUFREQ_RELATION_L);
+		else
+			__cpufreq_driver_target(policy, *setspeed,
+						CPUFREQ_RELATION_L);
 		mutex_unlock(&userspace_mutex);
 		break;
 	}

diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 4085244c..cdf097b 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c

@@ -6,6 +6,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -20,7 +22,7 @@
 #include <asm/msr.h>
 #include <asm/tsc.h>
 
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 #endif
@@ -33,7 +35,7 @@
 
 struct eps_cpu_data {
 	u32 fsb;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	u32 bios_limit;
 #endif
 	struct cpufreq_frequency_table freq_table[];
@@ -46,7 +48,7 @@
 static int voltage_failsafe_off;
 static int set_max_voltage;
 
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 static int ignore_acpi_limit;
 
 static struct acpi_processor_performance *eps_acpi_cpu_perf;
@@ -141,11 +143,9 @@
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-		current_voltage * 16 + 700);
+	pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n",
-		current_multiplier);
+	pr_info("Current multiplier = %d\n", current_multiplier);
 	}
 #endif
 	return 0;
@@ -166,7 +166,7 @@
 	dest_state = centaur->freq_table[index].driver_data & 0xffff;
 	ret = eps_set_state(centaur, policy, dest_state);
 	if (ret)
-		printk(KERN_ERR "eps: Timeout!\n");
+		pr_err("Timeout!\n");
 	return ret;
 }
 
@@ -186,7 +186,7 @@
 	int k, step, voltage;
 	int ret;
 	int states;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	unsigned int limit;
 #endif
 
@@ -194,36 +194,36 @@
 		return -ENODEV;
 
 	/* Check brand */
-	printk(KERN_INFO "eps: Detected VIA ");
+	pr_info("Detected VIA ");
 
 	switch (c->x86_model) {
 	case 10:
 		rdmsr(0x1153, lo, hi);
 		brand = (((lo >> 2) ^ lo) >> 18) & 3;
-		printk(KERN_CONT "Model A ");
+		pr_cont("Model A ");
 		break;
 	case 13:
 		rdmsr(0x1154, lo, hi);
 		brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
-		printk(KERN_CONT "Model D ");
+		pr_cont("Model D ");
 		break;
 	}
 
 	switch (brand) {
 	case EPS_BRAND_C7M:
-		printk(KERN_CONT "C7-M\n");
+		pr_cont("C7-M\n");
 		break;
 	case EPS_BRAND_C7:
-		printk(KERN_CONT "C7\n");
+		pr_cont("C7\n");
 		break;
 	case EPS_BRAND_EDEN:
-		printk(KERN_CONT "Eden\n");
+		pr_cont("Eden\n");
 		break;
 	case EPS_BRAND_C7D:
-		printk(KERN_CONT "C7-D\n");
+		pr_cont("C7-D\n");
 		break;
 	case EPS_BRAND_C3:
-		printk(KERN_CONT "C3\n");
+		pr_cont("C3\n");
 		return -ENODEV;
 		break;
 	}
@@ -235,7 +235,7 @@
 		/* Can be locked at 0 */
 		rdmsrl(MSR_IA32_MISC_ENABLE, val);
 		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
+			pr_info("Can't enable Enhanced PowerSaver\n");
 			return -ENODEV;
 		}
 	}
@@ -243,22 +243,19 @@
 	/* Print voltage and multiplier */
 	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 	current_voltage = lo & 0xff;
-	printk(KERN_INFO "eps: Current voltage = %dmV\n",
-			current_voltage * 16 + 700);
+	pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700);
 	current_multiplier = (lo >> 8) & 0xff;
-	printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
+	pr_info("Current multiplier = %d\n", current_multiplier);
 
 	/* Print limits */
 	max_voltage = hi & 0xff;
-	printk(KERN_INFO "eps: Highest voltage = %dmV\n",
-			max_voltage * 16 + 700);
+	pr_info("Highest voltage = %dmV\n", max_voltage * 16 + 700);
 	max_multiplier = (hi >> 8) & 0xff;
-	printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
+	pr_info("Highest multiplier = %d\n", max_multiplier);
 	min_voltage = (hi >> 16) & 0xff;
-	printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
-			min_voltage * 16 + 700);
+	pr_info("Lowest voltage = %dmV\n", min_voltage * 16 + 700);
 	min_multiplier = (hi >> 24) & 0xff;
-	printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
+	pr_info("Lowest multiplier = %d\n", min_multiplier);
 
 	/* Sanity checks */
 	if (current_multiplier == 0 || max_multiplier == 0
@@ -276,34 +273,30 @@
 
 	/* Check for systems using underclocked CPU */
 	if (!freq_failsafe_off && max_multiplier != current_multiplier) {
-		printk(KERN_INFO "eps: Your processor is running at different "
-			"frequency then its maximum. Aborting.\n");
-		printk(KERN_INFO "eps: You can use freq_failsafe_off option "
-			"to disable this check.\n");
+		pr_info("Your processor is running at different frequency then its maximum. Aborting.\n");
+		pr_info("You can use freq_failsafe_off option to disable this check.\n");
 		return -EINVAL;
 	}
 	if (!voltage_failsafe_off && max_voltage != current_voltage) {
-		printk(KERN_INFO "eps: Your processor is running at different "
-			"voltage then its maximum. Aborting.\n");
-		printk(KERN_INFO "eps: You can use voltage_failsafe_off "
-			"option to disable this check.\n");
+		pr_info("Your processor is running at different voltage then its maximum. Aborting.\n");
+		pr_info("You can use voltage_failsafe_off option to disable this check.\n");
 		return -EINVAL;
 	}
 
 	/* Calc FSB speed */
 	fsb = cpu_khz / current_multiplier;
 
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	/* Check for ACPI processor speed limit */
 	if (!ignore_acpi_limit && !eps_acpi_init()) {
 		if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) {
-			printk(KERN_INFO "eps: ACPI limit %u.%uGHz\n",
+			pr_info("ACPI limit %u.%uGHz\n",
 				limit/1000000,
 				(limit%1000000)/10000);
 			eps_acpi_exit(policy);
 			/* Check if max_multiplier is in BIOS limits */
 			if (limit && max_multiplier * fsb > limit) {
-				printk(KERN_INFO "eps: Aborting.\n");
+				pr_info("Aborting\n");
 				return -EINVAL;
 			}
 		}
@@ -319,8 +312,7 @@
 		v = (set_max_voltage - 700) / 16;
 		/* Check if voltage is within limits */
 		if (v >= min_voltage && v <= max_voltage) {
-			printk(KERN_INFO "eps: Setting %dmV as maximum.\n",
-				v * 16 + 700);
+			pr_info("Setting %dmV as maximum\n", v * 16 + 700);
 			max_voltage = v;
 		}
 	}
@@ -341,7 +333,7 @@
 
 	/* Copy basic values */
 	centaur->fsb = fsb;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 	centaur->bios_limit = limit;
 #endif
 
@@ -426,7 +418,7 @@
 MODULE_PARM_DESC(freq_failsafe_off, "Disable current vs max frequency check");
 module_param(voltage_failsafe_off, int, 0644);
 MODULE_PARM_DESC(voltage_failsafe_off, "Disable current vs max voltage check");
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
 module_param(ignore_acpi_limit, int, 0644);
 MODULE_PARM_DESC(ignore_acpi_limit, "Don't check ACPI's processor speed limit");
 #endif

diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index 1c06e78..bfce11c 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c

@@ -16,6 +16,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -185,7 +187,7 @@
 static int __init elanfreq_setup(char *str)
 {
 	max_freq = simple_strtoul(str, &str, 0);
-	printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+	pr_warn("You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
 	return 1;
 }
 __setup("elanfreq=", elanfreq_setup);

diff --git a/drivers/cpufreq/hisi-acpu-cpufreq.c b/drivers/cpufreq/hisi-acpu-cpufreq.c
deleted file mode 100644
index 026d5b2..0000000
--- a/drivers/cpufreq/hisi-acpu-cpufreq.c
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * Hisilicon Platforms Using ACPU CPUFreq Support
- *
- * Copyright (c) 2015 Hisilicon Limited.
- * Copyright (c) 2015 Linaro Limited.
- *
- * Leo Yan <leo.yan@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-
-static int __init hisi_acpu_cpufreq_driver_init(void)
-{
-	struct platform_device *pdev;
-
-	if (!of_machine_is_compatible("hisilicon,hi6220"))
-		return -ENODEV;
-
-	pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-	return PTR_ERR_OR_ZERO(pdev);
-}
-module_init(hisi_acpu_cpufreq_driver_init);
-
-MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
-MODULE_DESCRIPTION("Hisilicon acpu cpufreq driver");
-MODULE_LICENSE("GPL v2");

diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index 0202429..759612d 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c

@@ -8,6 +8,8 @@
  *      Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -118,8 +120,7 @@
 
 	if (ret) {
 		set_cpus_allowed_ptr(current, &saved_mask);
-		printk(KERN_WARNING "get performance failed with error %d\n",
-		       ret);
+		pr_warn("get performance failed with error %d\n", ret);
 		ret = 0;
 		goto migrate_end;
 	}
@@ -177,7 +178,7 @@
 
 	ret = processor_set_pstate(value);
 	if (ret) {
-		printk(KERN_WARNING "Transition failed with error %d\n", ret);
+		pr_warn("Transition failed with error %d\n", ret);
 		retval = -ENODEV;
 		goto migrate_end;
 	}
@@ -291,8 +292,7 @@
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
 
-	printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
-	       "activated.\n", cpu);
+	pr_info("CPU%u - ACPI performance management activated\n", cpu);
 
 	for (i = 0; i < data->acpi_data.state_count; i++)
 		pr_debug("     %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b230eba..b76a98d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -10,6 +10,8 @@
  * of the License.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
@@ -39,10 +41,17 @@
 #define ATOM_TURBO_RATIOS	0x66c
 #define ATOM_TURBO_VIDS		0x66d
 
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
+
 #define FRAC_BITS 8
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
 
+#define EXT_BITS 6
+#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
+
 static inline int32_t mul_fp(int32_t x, int32_t y)
 {
 	return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
@@ -64,12 +73,22 @@
 	return ret;
 }
 
+static inline u64 mul_ext_fp(u64 x, u64 y)
+{
+	return (x * y) >> EXT_FRAC_BITS;
+}
+
+static inline u64 div_ext_fp(u64 x, u64 y)
+{
+	return div64_u64(x << EXT_FRAC_BITS, y);
+}
+
 /**
  * struct sample -	Store performance sample
- * @core_pct_busy:	Ratio of APERF/MPERF in percent, which is actual
+ * @core_avg_perf:	Ratio of APERF/MPERF which is the actual average
  *			performance during last sample period
  * @busy_scaled:	Scaled busy value which is used to calculate next
- *			P state. This can be different than core_pct_busy
+ *			P state. This can be different than core_avg_perf
  *			to account for cpu idle period
  * @aperf:		Difference of actual performance frequency clock count
  *			read from APERF MSR between last and current sample
@@ -84,7 +103,7 @@
  * data for choosing next P State.
  */
 struct sample {
-	int32_t core_pct_busy;
+	int32_t core_avg_perf;
 	int32_t busy_scaled;
 	u64 aperf;
 	u64 mperf;
@@ -162,6 +181,7 @@
  * struct cpudata -	Per CPU instance data storage
  * @cpu:		CPU number for this instance data
  * @update_util:	CPUFreq utility callback information
+ * @update_util_set:	CPUFreq utility callback is set
  * @pstate:		Stores P state limits for this CPU
  * @vid:		Stores VID limits for this CPU
  * @pid:		Stores PID parameters for this CPU
@@ -172,6 +192,8 @@
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *			current sample
  * @sample:		Storage for storing last Sample data
+ * @acpi_perf_data:	Stores ACPI perf information read from _PSS
+ * @valid_pss_table:	Set to true for valid ACPI _PSS entries found
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -179,6 +201,7 @@
 	int cpu;
 
 	struct update_util_data update_util;
+	bool   update_util_set;
 
 	struct pstate_data pstate;
 	struct vid_data vid;
@@ -190,6 +213,10 @@
 	u64	prev_tsc;
 	u64	prev_cummulative_iowait;
 	struct sample sample;
+#ifdef CONFIG_ACPI
+	struct acpi_processor_performance acpi_perf_data;
+	bool valid_pss_table;
+#endif
 };
 
 static struct cpudata **all_cpu_data;
@@ -258,6 +285,9 @@
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
 
+#ifdef CONFIG_ACPI
+static bool acpi_ppc;
+#endif
 
 /**
  * struct perf_limits - Store user and policy limits
@@ -331,6 +361,124 @@
 static struct perf_limits *limits = &powersave_limits;
 #endif
 
+#ifdef CONFIG_ACPI
+
+static bool intel_pstate_get_ppc_enable_status(void)
+{
+	if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
+	    acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
+		return true;
+
+	return acpi_ppc;
+}
+
+/*
+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
+ * target ratio 0x17. The _PSS control value stores in a format which can be
+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
+ * This function converts the _PSS control value to intel pstate driver format
+ * for comparison and assignment.
+ */
+static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
+{
+	return cpu->acpi_perf_data.states[index].control >> 8;
+}
+
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+	struct cpudata *cpu;
+	int turbo_pss_ctl;
+	int ret;
+	int i;
+
+	if (hwp_active)
+		return;
+
+	if (!intel_pstate_get_ppc_enable_status())
+		return;
+
+	cpu = all_cpu_data[policy->cpu];
+
+	ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
+						  policy->cpu);
+	if (ret)
+		return;
+
+	/*
+	 * Check if the control value in _PSS is for PERF_CTL MSR, which should
+	 * guarantee that the states returned by it map to the states in our
+	 * list directly.
+	 */
+	if (cpu->acpi_perf_data.control_register.space_id !=
+						ACPI_ADR_SPACE_FIXED_HARDWARE)
+		goto err;
+
+	/*
+	 * If there is only one entry _PSS, simply ignore _PSS and continue as
+	 * usual without taking _PSS into account
+	 */
+	if (cpu->acpi_perf_data.state_count < 2)
+		goto err;
+
+	pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
+	for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
+		pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
+			 (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
+			 (u32) cpu->acpi_perf_data.states[i].core_frequency,
+			 (u32) cpu->acpi_perf_data.states[i].power,
+			 (u32) cpu->acpi_perf_data.states[i].control);
+	}
+
+	/*
+	 * The _PSS table doesn't contain whole turbo frequency range.
+	 * This just contains +1 MHZ above the max non turbo frequency,
+	 * with control value corresponding to max turbo ratio. But
+	 * when cpufreq set policy is called, it will call with this
+	 * max frequency, which will cause a reduced performance as
+	 * this driver uses real max turbo frequency as the max
+	 * frequency. So correct this frequency in _PSS table to
+	 * correct max turbo frequency based on the turbo ratio.
+	 * Also need to convert to MHz as _PSS freq is in MHz.
+	 */
+	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
+	if (turbo_pss_ctl > cpu->pstate.max_pstate)
+		cpu->acpi_perf_data.states[0].core_frequency =
+					policy->cpuinfo.max_freq / 1000;
+	cpu->valid_pss_table = true;
+	pr_info("_PPC limits will be enforced\n");
+
+	return;
+
+ err:
+	cpu->valid_pss_table = false;
+	acpi_processor_unregister_performance(policy->cpu);
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+	struct cpudata *cpu;
+
+	cpu = all_cpu_data[policy->cpu];
+	if (!cpu->valid_pss_table)
+		return;
+
+	acpi_processor_unregister_performance(policy->cpu);
+}
+
+#else
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+}
+#endif
+
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 			     int deadband, int integral) {
 	pid->setpoint = int_tofp(setpoint);
@@ -341,17 +489,17 @@
 
 static inline void pid_p_gain_set(struct _pid *pid, int percent)
 {
-	pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+	pid->p_gain = div_fp(percent, 100);
 }
 
 static inline void pid_i_gain_set(struct _pid *pid, int percent)
 {
-	pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+	pid->i_gain = div_fp(percent, 100);
 }
 
 static inline void pid_d_gain_set(struct _pid *pid, int percent)
 {
-	pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+	pid->d_gain = div_fp(percent, 100);
 }
 
 static signed int pid_calc(struct _pid *pid, int32_t busy)
@@ -537,7 +685,7 @@
 
 	total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
 	no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
-	turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
+	turbo_fp = div_fp(no_turbo, total);
 	turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
 	return sprintf(buf, "%u\n", turbo_pct);
 }
@@ -579,7 +727,7 @@
 
 	update_turbo_state();
 	if (limits->turbo_disabled) {
-		pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
+		pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
 		return -EPERM;
 	}
 
@@ -608,8 +756,7 @@
 				   limits->max_perf_pct);
 	limits->max_perf_pct = max(limits->min_perf_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
-				  int_tofp(100));
+	limits->max_perf = div_fp(limits->max_perf_pct, 100);
 
 	if (hwp_active)
 		intel_pstate_hwp_set_online_cpus();
@@ -633,8 +780,7 @@
 				   limits->min_perf_pct);
 	limits->min_perf_pct = min(limits->max_perf_pct,
 				   limits->min_perf_pct);
-	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
-				  int_tofp(100));
+	limits->min_perf = div_fp(limits->min_perf_pct, 100);
 
 	if (hwp_active)
 		intel_pstate_hwp_set_online_cpus();
@@ -1019,15 +1165,11 @@
 	intel_pstate_set_min_pstate(cpu);
 }
 
-static inline void intel_pstate_calc_busy(struct cpudata *cpu)
+static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
-	int64_t core_pct;
 
-	core_pct = int_tofp(sample->aperf) * int_tofp(100);
-	core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
-
-	sample->core_pct_busy = (int32_t)core_pct;
+	sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
 }
 
 static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
@@ -1070,9 +1212,14 @@
 
 static inline int32_t get_avg_frequency(struct cpudata *cpu)
 {
-	return fp_toint(mul_fp(cpu->sample.core_pct_busy,
-			       int_tofp(cpu->pstate.max_pstate_physical *
-						cpu->pstate.scaling / 100)));
+	return mul_ext_fp(cpu->sample.core_avg_perf,
+			  cpu->pstate.max_pstate_physical * cpu->pstate.scaling);
+}
+
+static inline int32_t get_avg_pstate(struct cpudata *cpu)
+{
+	return mul_ext_fp(cpu->pstate.max_pstate_physical,
+			  cpu->sample.core_avg_perf);
 }
 
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
@@ -1107,49 +1254,43 @@
 	cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
 	cpu->sample.busy_scaled = cpu_load;
 
-	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
+	return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
 }
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 {
-	int32_t core_busy, max_pstate, current_pstate, sample_ratio;
+	int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
 	u64 duration_ns;
 
 	/*
-	 * core_busy is the ratio of actual performance to max
-	 * max_pstate is the max non turbo pstate available
-	 * current_pstate was the pstate that was requested during
-	 * 	the last sample period.
-	 *
-	 * We normalize core_busy, which was our actual percent
-	 * performance to what we requested during the last sample
-	 * period. The result will be a percentage of busy at a
-	 * specified pstate.
+	 * perf_scaled is the average performance during the last sampling
+	 * period scaled by the ratio of the maximum P-state to the P-state
+	 * requested last time (in percent).  That measures the system's
+	 * response to the previous P-state selection.
 	 */
-	core_busy = cpu->sample.core_pct_busy;
-	max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
-	current_pstate = int_tofp(cpu->pstate.current_pstate);
-	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
+	max_pstate = cpu->pstate.max_pstate_physical;
+	current_pstate = cpu->pstate.current_pstate;
+	perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf,
+			       div_fp(100 * max_pstate, current_pstate));
 
 	/*
 	 * Since our utilization update callback will not run unless we are
 	 * in C0, check if the actual elapsed time is significantly greater (3x)
 	 * than our sample interval.  If it is, then we were idle for a long
-	 * enough period of time to adjust our busyness.
+	 * enough period of time to adjust our performance metric.
 	 */
 	duration_ns = cpu->sample.time - cpu->last_sample_time;
 	if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
-		sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
-				      int_tofp(duration_ns));
-		core_busy = mul_fp(core_busy, sample_ratio);
+		sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns);
+		perf_scaled = mul_fp(perf_scaled, sample_ratio);
 	} else {
 		sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
 		if (sample_ratio < int_tofp(1))
-			core_busy = 0;
+			perf_scaled = 0;
 	}
 
-	cpu->sample.busy_scaled = core_busy;
-	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
+	cpu->sample.busy_scaled = perf_scaled;
+	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
 }
 
 static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
@@ -1179,7 +1320,7 @@
 	intel_pstate_update_pstate(cpu, target_pstate);
 
 	sample = &cpu->sample;
-	trace_pstate_sample(fp_toint(sample->core_pct_busy),
+	trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
 		fp_toint(sample->busy_scaled),
 		from,
 		cpu->pstate.current_pstate,
@@ -1199,7 +1340,7 @@
 		bool sample_taken = intel_pstate_sample(cpu, time);
 
 		if (sample_taken) {
-			intel_pstate_calc_busy(cpu);
+			intel_pstate_calc_avg_perf(cpu);
 			if (!hwp_active)
 				intel_pstate_adjust_busy_pstate(cpu);
 		}
@@ -1261,23 +1402,16 @@
 
 	intel_pstate_busy_pid_reset(cpu);
 
-	cpu->update_util.func = intel_pstate_update_util;
-
-	pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
+	pr_debug("controlling: cpu %d\n", cpunum);
 
 	return 0;
 }
 
 static unsigned int intel_pstate_get(unsigned int cpu_num)
 {
-	struct sample *sample;
-	struct cpudata *cpu;
+	struct cpudata *cpu = all_cpu_data[cpu_num];
 
-	cpu = all_cpu_data[cpu_num];
-	if (!cpu)
-		return 0;
-	sample = &cpu->sample;
-	return get_avg_frequency(cpu);
+	return cpu ? get_avg_frequency(cpu) : 0;
 }
 
 static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
@@ -1286,12 +1420,20 @@
 
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
-	cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
+	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
+				     intel_pstate_update_util);
+	cpu->update_util_set = true;
 }
 
 static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 {
-	cpufreq_set_update_util_data(cpu, NULL);
+	struct cpudata *cpu_data = all_cpu_data[cpu];
+
+	if (!cpu_data->update_util_set)
+		return;
+
+	cpufreq_remove_update_util_hook(cpu);
+	cpu_data->update_util_set = false;
 	synchronize_sched();
 }
 
@@ -1311,20 +1453,31 @@
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
+	struct cpudata *cpu;
+
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
 	intel_pstate_clear_update_util_hook(policy->cpu);
 
+	cpu = all_cpu_data[0];
+	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) {
+		if (policy->max < policy->cpuinfo.max_freq &&
+		    policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
+			pr_debug("policy->max > max non turbo frequency\n");
+			policy->max = policy->cpuinfo.max_freq;
+		}
+	}
+
 	if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
 		limits = &performance_limits;
 		if (policy->max >= policy->cpuinfo.max_freq) {
-			pr_debug("intel_pstate: set performance\n");
+			pr_debug("set performance\n");
 			intel_pstate_set_performance_limits(limits);
 			goto out;
 		}
 	} else {
-		pr_debug("intel_pstate: set powersave\n");
+		pr_debug("set powersave\n");
 		limits = &powersave_limits;
 	}
 
@@ -1348,10 +1501,8 @@
 	/* Make sure min_perf_pct <= max_perf_pct */
 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 
-	limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
-				  int_tofp(100));
-	limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
-				  int_tofp(100));
+	limits->min_perf = div_fp(limits->min_perf_pct, 100);
+	limits->max_perf = div_fp(limits->max_perf_pct, 100);
 
  out:
 	intel_pstate_set_update_util_hook(policy->cpu);
@@ -1377,7 +1528,7 @@
 	int cpu_num = policy->cpu;
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
-	pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
+	pr_debug("CPU %d exiting\n", cpu_num);
 
 	intel_pstate_clear_update_util_hook(cpu_num);
 
@@ -1410,12 +1561,20 @@
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
 	policy->cpuinfo.max_freq =
 		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	intel_pstate_init_acpi_perf_limits(policy);
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
 
 	return 0;
 }
 
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+	intel_pstate_exit_perf_limits(policy);
+
+	return 0;
+}
+
 static struct cpufreq_driver intel_pstate_driver = {
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.verify		= intel_pstate_verify_policy,
@@ -1423,6 +1582,7 @@
 	.resume		= intel_pstate_hwp_set_policy,
 	.get		= intel_pstate_get,
 	.init		= intel_pstate_cpu_init,
+	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
 	.name		= "intel_pstate",
 };
@@ -1466,8 +1626,7 @@
 
 }
 
-#if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
+#ifdef CONFIG_ACPI
 
 static bool intel_pstate_no_acpi_pss(void)
 {
@@ -1623,7 +1782,7 @@
 	if (intel_pstate_platform_pwr_mgmt_exists())
 		return -ENODEV;
 
-	pr_info("Intel P-state driver initializing.\n");
+	pr_info("Intel P-state driver initializing\n");
 
 	all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
 	if (!all_cpu_data)
@@ -1640,7 +1799,7 @@
 	intel_pstate_sysfs_expose_params();
 
 	if (hwp_active)
-		pr_info("intel_pstate: HWP enabled\n");
+		pr_info("HWP enabled\n");
 
 	return rc;
 out:
@@ -1666,13 +1825,19 @@
 	if (!strcmp(str, "disable"))
 		no_load = 1;
 	if (!strcmp(str, "no_hwp")) {
-		pr_info("intel_pstate: HWP disabled\n");
+		pr_info("HWP disabled\n");
 		no_hwp = 1;
 	}
 	if (!strcmp(str, "force"))
 		force_load = 1;
 	if (!strcmp(str, "hwp_only"))
 		hwp_only = 1;
+
+#ifdef CONFIG_ACPI
+	if (!strcmp(str, "support_acpi_ppc"))
+		acpi_ppc = true;
+#endif
+
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);

diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 0f6b229..c46a12d 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c

@@ -21,6 +21,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -40,8 +42,6 @@
 
 #include "longhaul.h"
 
-#define PFX "longhaul: "
-
 #define TYPE_LONGHAUL_V1	1
 #define TYPE_LONGHAUL_V2	2
 #define TYPE_POWERSAVER		3
@@ -347,14 +347,13 @@
 	freqs.new = calc_speed(longhaul_get_cpu_mult());
 	/* Check if requested frequency is set. */
 	if (unlikely(freqs.new != speed)) {
-		printk(KERN_INFO PFX "Failed to set requested frequency!\n");
+		pr_info("Failed to set requested frequency!\n");
 		/* Revision ID = 1 but processor is expecting revision key
 		 * equal to 0. Jumpers at the bottom of processor will change
 		 * multiplier and FSB, but will not change bits in Longhaul
 		 * MSR nor enable voltage scaling. */
 		if (!revid_errata) {
-			printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
-						"option.\n");
+			pr_info("Enabling \"Ignore Revision ID\" option\n");
 			revid_errata = 1;
 			msleep(200);
 			goto retry_loop;
@@ -364,11 +363,10 @@
 		 * but it doesn't change frequency. I tried poking various
 		 * bits in northbridge registers, but without success. */
 		if (longhaul_flags & USE_ACPI_C3) {
-			printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
+			pr_info("Disabling ACPI C3 support\n");
 			longhaul_flags &= ~USE_ACPI_C3;
 			if (revid_errata) {
-				printk(KERN_INFO PFX "Disabling \"Ignore "
-						"Revision ID\" option.\n");
+				pr_info("Disabling \"Ignore Revision ID\" option\n");
 				revid_errata = 0;
 			}
 			msleep(200);
@@ -379,7 +377,7 @@
 		 * RevID = 1. RevID errata will make things right. Just
 		 * to be 100% sure. */
 		if (longhaul_version == TYPE_LONGHAUL_V2) {
-			printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
+			pr_info("Switching to Longhaul ver. 1\n");
 			longhaul_version = TYPE_LONGHAUL_V1;
 			msleep(200);
 			goto retry_loop;
@@ -387,8 +385,7 @@
 	}
 
 	if (!bm_timeout) {
-		printk(KERN_INFO PFX "Warning: Timeout while waiting for "
-				"idle PCI bus.\n");
+		pr_info("Warning: Timeout while waiting for idle PCI bus\n");
 		return -EBUSY;
 	}
 
@@ -433,12 +430,12 @@
 	/* Get current frequency */
 	mult = longhaul_get_cpu_mult();
 	if (mult == -1) {
-		printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
+		pr_info("Invalid (reserved) multiplier!\n");
 		return -EINVAL;
 	}
 	fsb = guess_fsb(mult);
 	if (fsb == 0) {
-		printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
+		pr_info("Invalid (reserved) FSB!\n");
 		return -EINVAL;
 	}
 	/* Get max multiplier - as we always did.
@@ -468,11 +465,11 @@
 		 print_speed(highest_speed/1000));
 
 	if (lowest_speed == highest_speed) {
-		printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+		pr_info("highestspeed == lowest, aborting\n");
 		return -EINVAL;
 	}
 	if (lowest_speed > highest_speed) {
-		printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+		pr_info("nonsense! lowest (%d > %d) !\n",
 			lowest_speed, highest_speed);
 		return -EINVAL;
 	}
@@ -538,16 +535,16 @@
 
 	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
 	if (!(longhaul.bits.RevisionID & 1)) {
-		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
+		pr_info("Voltage scaling not supported by CPU\n");
 		return;
 	}
 
 	if (!longhaul.bits.VRMRev) {
-		printk(KERN_INFO PFX "VRM 8.5\n");
+		pr_info("VRM 8.5\n");
 		vrm_mV_table = &vrm85_mV[0];
 		mV_vrm_table = &mV_vrm85[0];
 	} else {
-		printk(KERN_INFO PFX "Mobile VRM\n");
+		pr_info("Mobile VRM\n");
 		if (cpu_model < CPU_NEHEMIAH)
 			return;
 		vrm_mV_table = &mobilevrm_mV[0];
@@ -558,27 +555,21 @@
 	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
 
 	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
-		printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
-					"Voltage scaling disabled.\n",
-					minvid.mV/1000, minvid.mV%1000,
-					maxvid.mV/1000, maxvid.mV%1000);
+		pr_info("Bogus values Min:%d.%03d Max:%d.%03d - Voltage scaling disabled\n",
+			minvid.mV/1000, minvid.mV%1000,
+			maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	if (minvid.mV == maxvid.mV) {
-		printk(KERN_INFO PFX "Claims to support voltage scaling but "
-				"min & max are both %d.%03d. "
-				"Voltage scaling disabled\n",
-				maxvid.mV/1000, maxvid.mV%1000);
+		pr_info("Claims to support voltage scaling but min & max are both %d.%03d - Voltage scaling disabled\n",
+			maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
 	/* How many voltage steps*/
 	numvscales = maxvid.pos - minvid.pos + 1;
-	printk(KERN_INFO PFX
-		"Max VID=%d.%03d  "
-		"Min VID=%d.%03d, "
-		"%d possible voltage scales\n",
+	pr_info("Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
 		maxvid.mV/1000, maxvid.mV%1000,
 		minvid.mV/1000, minvid.mV%1000,
 		numvscales);
@@ -617,12 +608,12 @@
 			pos = minvid.pos;
 		freq_pos->driver_data |= mV_vrm_table[pos] << 8;
 		vid = vrm_mV_table[mV_vrm_table[pos]];
-		printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+		pr_info("f: %d kHz, index: %d, vid: %d mV\n",
 			speed, (int)(freq_pos - longhaul_table), vid.mV);
 	}
 
 	can_scale_voltage = 1;
-	printk(KERN_INFO PFX "Voltage scaling enabled.\n");
+	pr_info("Voltage scaling enabled\n");
 }
 
 
@@ -720,8 +711,7 @@
 			pci_write_config_byte(dev, reg, pci_cmd);
 			pci_read_config_byte(dev, reg, &pci_cmd);
 			if (!(pci_cmd & 1<<7)) {
-				printk(KERN_ERR PFX
-					"Can't enable access to port 0x22.\n");
+				pr_err("Can't enable access to port 0x22\n");
 				status = 0;
 			}
 		}
@@ -758,8 +748,7 @@
 		if (pci_cmd & 1 << 7) {
 			pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
 			acpi_regs_addr &= 0xff00;
-			printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
-					acpi_regs_addr);
+			pr_info("ACPI I/O at 0x%x\n", acpi_regs_addr);
 		}
 
 		pci_dev_put(dev);
@@ -853,14 +842,14 @@
 			longhaul_version = TYPE_LONGHAUL_V1;
 	}
 
-	printk(KERN_INFO PFX "VIA %s CPU detected.  ", cpuname);
+	pr_info("VIA %s CPU detected.  ", cpuname);
 	switch (longhaul_version) {
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
-		printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
+		pr_cont("Longhaul v%d supported\n", longhaul_version);
 		break;
 	case TYPE_POWERSAVER:
-		printk(KERN_CONT "Powersaver supported.\n");
+		pr_cont("Powersaver supported\n");
 		break;
 	};
 
@@ -889,15 +878,14 @@
 	if (!(longhaul_flags & USE_ACPI_C3
 	     || longhaul_flags & USE_NORTHBRIDGE)
 	    && ((pr == NULL) || !(pr->flags.bm_control))) {
-		printk(KERN_ERR PFX
-			"No ACPI support. Unsupported northbridge.\n");
+		pr_err("No ACPI support: Unsupported northbridge\n");
 		return -ENODEV;
 	}
 
 	if (longhaul_flags & USE_NORTHBRIDGE)
-		printk(KERN_INFO PFX "Using northbridge support.\n");
+		pr_info("Using northbridge support\n");
 	if (longhaul_flags & USE_ACPI_C3)
-		printk(KERN_INFO PFX "Using ACPI support.\n");
+		pr_info("Using ACPI support\n");
 
 	ret = longhaul_get_ranges();
 	if (ret != 0)
@@ -934,20 +922,18 @@
 		return -ENODEV;
 
 	if (!enable) {
-		printk(KERN_ERR PFX "Option \"enable\" not set. Aborting.\n");
+		pr_err("Option \"enable\" not set - Aborting\n");
 		return -ENODEV;
 	}
 #ifdef CONFIG_SMP
 	if (num_online_cpus() > 1) {
-		printk(KERN_ERR PFX "More than 1 CPU detected, "
-				"longhaul disabled.\n");
+		pr_err("More than 1 CPU detected, longhaul disabled\n");
 		return -ENODEV;
 	}
 #endif
 #ifdef CONFIG_X86_IO_APIC
-	if (cpu_has_apic) {
-		printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
-				"broken in this configuration.\n");
+	if (boot_cpu_has(X86_FEATURE_APIC)) {
+		pr_err("APIC detected. Longhaul is currently broken in this configuration.\n");
 		return -ENODEV;
 	}
 #endif
@@ -955,7 +941,7 @@
 	case 6 ... 9:
 		return cpufreq_register_driver(&longhaul_driver);
 	case 10:
-		printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
+		pr_err("Use acpi-cpufreq driver for VIA C7\n");
 	default:
 		;
 	}

diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index cd593c1..6bbdac1 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c

@@ -10,6 +10,9 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/err.h>
@@ -76,7 +79,7 @@
 
 	cpuclk = clk_get(NULL, "cpu_clk");
 	if (IS_ERR(cpuclk)) {
-		printk(KERN_ERR "cpufreq: couldn't get CPU clk\n");
+		pr_err("couldn't get CPU clk\n");
 		return PTR_ERR(cpuclk);
 	}
 
@@ -163,7 +166,7 @@
 	if (ret)
 		return ret;
 
-	pr_info("cpufreq: Loongson-2F CPU frequency driver.\n");
+	pr_info("Loongson-2F CPU frequency driver\n");
 
 	cpufreq_register_notifier(&loongson2_cpufreq_notifier_block,
 				  CPUFREQ_TRANSITION_NOTIFIER);

diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index cc3408f..d9df893 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c

@@ -13,6 +13,8 @@
 
 #undef DEBUG
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -174,7 +176,7 @@
 	/* Get first CPU node */
 	cpunode = of_cpu_device_node_get(0);
 	if (cpunode == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
+		pr_err("Can't find any CPU 0 node\n");
 		goto bail_noprops;
 	}
 
@@ -182,8 +184,7 @@
 	/* we actually don't care on which CPU to access PVR */
 	pvr_hi = PVR_VER(mfspr(SPRN_PVR));
 	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		printk(KERN_ERR "cpufreq: Unsupported CPU version (%x)\n",
-				pvr_hi);
+		pr_err("Unsupported CPU version (%x)\n", pvr_hi);
 		goto bail_noprops;
 	}
 
@@ -222,8 +223,8 @@
 	maple_pmode_cur = -1;
 	maple_scom_switch_freq(maple_scom_query_freq());
 
-	printk(KERN_INFO "Registering Maple CPU frequency driver\n");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+	pr_info("Registering Maple CPU frequency driver\n");
+	pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
 		maple_cpu_freqs[1].frequency/1000,
 		maple_cpu_freqs[0].frequency/1000,
 		maple_cpu_freqs[maple_pmode_cur].frequency/1000);

diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 2058e6d..6f602c7 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c

@@ -59,11 +59,8 @@
 static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu)
 {
 	struct mtk_cpu_dvfs_info *info;
-	struct list_head *list;
 
-	list_for_each(list, &dvfs_info_list) {
-		info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
-
+	list_for_each_entry(info, &dvfs_info_list, list_head) {
 		if (cpumask_test_cpu(cpu, &info->cpus))
 			return info;
 	}
@@ -524,8 +521,7 @@
 
 static int mt8173_cpufreq_probe(struct platform_device *pdev)
 {
-	struct mtk_cpu_dvfs_info *info;
-	struct list_head *list, *tmp;
+	struct mtk_cpu_dvfs_info *info, *tmp;
 	int cpu, ret;
 
 	for_each_possible_cpu(cpu) {
@@ -559,11 +555,9 @@
 	return 0;
 
 release_dvfs_info_list:
-	list_for_each_safe(list, tmp, &dvfs_info_list) {
-		info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
-
+	list_for_each_entry_safe(info, tmp, &dvfs_info_list, list_head) {
 		mtk_cpu_dvfs_info_release(info);
-		list_del(list);
+		list_del(&info->list_head);
 	}
 
 	return ret;

diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c
new file mode 100644
index 0000000..e920889
--- /dev/null
+++ b/drivers/cpufreq/mvebu-cpufreq.c

@@ -0,0 +1,107 @@
+/*
+ * CPUFreq support for Armada 370/XP platforms.
+ *
+ * Copyright (C) 2012-2016 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory Clement <gregory.clement@free-electrons.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#define pr_fmt(fmt) "mvebu-pmsu: " fmt
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/resource.h>
+
+static int __init armada_xp_pmsu_cpufreq_init(void)
+{
+	struct device_node *np;
+	struct resource res;
+	int ret, cpu;
+
+	if (!of_machine_is_compatible("marvell,armadaxp"))
+		return 0;
+
+	/*
+	 * In order to have proper cpufreq handling, we need to ensure
+	 * that the Device Tree description of the CPU clock includes
+	 * the definition of the PMU DFS registers. If not, we do not
+	 * register the clock notifier and the cpufreq driver. This
+	 * piece of code is only for compatibility with old Device
+	 * Trees.
+	 */
+	np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
+	if (!np)
+		return 0;
+
+	ret = of_address_to_resource(np, 1, &res);
+	if (ret) {
+		pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
+		of_node_put(np);
+		return 0;
+	}
+
+	of_node_put(np);
+
+	/*
+	 * For each CPU, this loop registers the operating points
+	 * supported (which are the nominal CPU frequency and half of
+	 * it), and registers the clock notifier that will take care
+	 * of doing the PMSU part of a frequency transition.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct device *cpu_dev;
+		struct clk *clk;
+		int ret;
+
+		cpu_dev = get_cpu_device(cpu);
+		if (!cpu_dev) {
+			pr_err("Cannot get CPU %d\n", cpu);
+			continue;
+		}
+
+		clk = clk_get(cpu_dev, 0);
+		if (IS_ERR(clk)) {
+			pr_err("Cannot get clock for CPU %d\n", cpu);
+			return PTR_ERR(clk);
+		}
+
+		/*
+		 * In case of a failure of dev_pm_opp_add(), we don't
+		 * bother with cleaning up the registered OPP (there's
+		 * no function to do so), and simply cancel the
+		 * registration of the cpufreq device.
+		 */
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+
+		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
+		if (ret) {
+			clk_put(clk);
+			return ret;
+		}
+
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
+						  cpumask_of(cpu_dev->id));
+		if (ret)
+			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+				__func__, ret);
+	}
+
+	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+	return 0;
+}
+device_initcall(armada_xp_pmsu_cpufreq_init);

diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index e3866e0..cead9be 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c

@@ -13,6 +13,9 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -163,13 +166,13 @@
 {
 	mpu_dev = get_cpu_device(0);
 	if (!mpu_dev) {
-		pr_warning("%s: unable to get the mpu device\n", __func__);
+		pr_warn("%s: unable to get the MPU device\n", __func__);
 		return -EINVAL;
 	}
 
 	mpu_reg = regulator_get(mpu_dev, "vcc");
 	if (IS_ERR(mpu_reg)) {
-		pr_warning("%s: unable to get MPU regulator\n", __func__);
+		pr_warn("%s: unable to get MPU regulator\n", __func__);
 		mpu_reg = NULL;
 	} else {
 		/* 

diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index 5dd95da..fd77812 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c

@@ -20,6 +20,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -35,8 +37,6 @@
 
 #include "speedstep-lib.h"
 
-#define PFX	"p4-clockmod: "
-
 /*
  * Duty Cycle (3bits), note DC_DISABLE is not specified in
  * intel docs i just use it to mean disable
@@ -124,11 +124,7 @@
 {
 	if (c->x86 == 0x06) {
 		if (cpu_has(c, X86_FEATURE_EST))
-			printk_once(KERN_WARNING PFX "Warning: EST-capable "
-			       "CPU detected. The acpi-cpufreq module offers "
-			       "voltage scaling in addition to frequency "
-			       "scaling. You should use that instead of "
-			       "p4-clockmod, if possible.\n");
+			pr_warn_once("Warning: EST-capable CPU detected. The acpi-cpufreq module offers voltage scaling in addition to frequency scaling. You should use that instead of p4-clockmod, if possible.\n");
 		switch (c->x86_model) {
 		case 0x0E: /* Core */
 		case 0x0F: /* Core Duo */
@@ -152,11 +148,7 @@
 	p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
 
 	if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
-		printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
-		       "The speedstep-ich or acpi cpufreq modules offer "
-		       "voltage scaling in addition of frequency scaling. "
-		       "You should use either one instead of p4-clockmod, "
-		       "if possible.\n");
+		pr_warn("Warning: Pentium 4-M detected. The speedstep-ich or acpi cpufreq modules offer voltage scaling in addition of frequency scaling. You should use either one instead of p4-clockmod, if possible.\n");
 		return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
 	}
 
@@ -265,8 +257,7 @@
 
 	ret = cpufreq_register_driver(&p4clockmod_driver);
 	if (!ret)
-		printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
-				"Modulation available\n");
+		pr_info("P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
 
 	return ret;
 }

diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 1f49d97..b7b576e 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c

@@ -13,6 +13,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -481,13 +483,13 @@
 		freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
 		lenp /= sizeof(u32);
 		if (freqs == NULL || lenp != 2) {
-			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
+			pr_err("bus-frequencies incorrect or missing\n");
 			return 1;
 		}
 		ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
 						NULL);
 		if (ratio == NULL) {
-			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
+			pr_err("processor-to-bus-ratio*2 missing\n");
 			return 1;
 		}
 
@@ -550,7 +552,7 @@
 	if (volt_gpio_np)
 		voltage_gpio = read_gpio(volt_gpio_np);
 	if (!voltage_gpio){
-		printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
+		pr_err("missing cpu-vcore-select gpio\n");
 		return 1;
 	}
 
@@ -675,9 +677,9 @@
 	pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
 	ppc_proc_freq = cur_freq * 1000ul;
 
-	printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
-	       low_freq/1000, hi_freq/1000, cur_freq/1000);
+	pr_info("Registering PowerMac CPU frequency driver\n");
+	pr_info("Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
+		low_freq/1000, hi_freq/1000, cur_freq/1000);
 
 	return cpufreq_register_driver(&pmac_cpufreq_driver);
 }

diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 4ff8687..267e089 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c

@@ -12,6 +12,8 @@
 
 #undef DEBUG
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/errno.h>
@@ -138,7 +140,7 @@
 		usleep_range(1000, 1000);
 	}
 	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
+		pr_warn("Timeout in clock slewing !\n");
 }
 
 
@@ -266,7 +268,7 @@
 		rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
 
 	if (rc)
-		printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
+		pr_warn("pfunc switch error %d\n", rc);
 
 	/* It's an irq GPIO so we should be able to just block here,
 	 * I'll do that later after I've properly tested the IRQ code for
@@ -282,7 +284,7 @@
 		usleep_range(500, 500);
 	}
 	if (done == 0)
-		printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
+		pr_warn("Timeout in clock slewing !\n");
 
 	/* If frequency is going down, last ramp the voltage */
 	if (speed_mode > g5_pmode_cur)
@@ -368,7 +370,7 @@
 	}
 	pvr_hi = (*valp) >> 16;
 	if (pvr_hi != 0x3c && pvr_hi != 0x44) {
-		printk(KERN_ERR "cpufreq: Unsupported CPU version\n");
+		pr_err("Unsupported CPU version\n");
 		goto bail_noprops;
 	}
 
@@ -403,8 +405,7 @@
 
 		root = of_find_node_by_path("/");
 		if (root == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find root of "
-			       "device tree\n");
+			pr_err("Can't find root of device tree\n");
 			goto bail_noprops;
 		}
 		pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
@@ -412,8 +413,7 @@
 			pmf_find_function(root, "slewing-done");
 		if (pfunc_set_vdnap0 == NULL ||
 		    pfunc_vdnap0_complete == NULL) {
-			printk(KERN_ERR "cpufreq: Can't find required "
-			       "platform function\n");
+			pr_err("Can't find required platform function\n");
 			goto bail_noprops;
 		}
 
@@ -453,10 +453,10 @@
 	g5_pmode_cur = -1;
 	g5_switch_freq(g5_query_freq());
 
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n",
-	       freq_method, volt_method);
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+	pr_info("Registering G5 CPU frequency driver\n");
+	pr_info("Frequency method: %s, Voltage method: %s\n",
+		freq_method, volt_method);
+	pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
 		g5_cpu_freqs[1].frequency/1000,
 		g5_cpu_freqs[0].frequency/1000,
 		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
@@ -493,7 +493,7 @@
 	if (cpuid != NULL)
 		eeprom = of_get_property(cpuid, "cpuid", NULL);
 	if (eeprom == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
+		pr_err("Can't find cpuid EEPROM !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -511,7 +511,7 @@
 		break;
 	}
 	if (hwclock == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n");
+		pr_err("Can't find i2c clock chip !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -539,7 +539,7 @@
 	/* Check we have minimum requirements */
 	if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
 	    pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
-		printk(KERN_ERR "cpufreq: Can't find platform functions !\n");
+		pr_err("Can't find platform functions !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -567,7 +567,7 @@
 	/* Get max frequency from device-tree */
 	valp = of_get_property(cpunode, "clock-frequency", NULL);
 	if (!valp) {
-		printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
+		pr_err("Can't find CPU frequency !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -583,8 +583,7 @@
 
 	/* Check for machines with no useful settings */
 	if (il == ih) {
-		printk(KERN_WARNING "cpufreq: No low frequency mode available"
-		       " on this model !\n");
+		pr_warn("No low frequency mode available on this model !\n");
 		rc = -ENODEV;
 		goto bail;
 	}
@@ -595,7 +594,7 @@
 
 	/* Sanity check */
 	if (min_freq >= max_freq || min_freq < 1000) {
-		printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
+		pr_err("Can't calculate low frequency !\n");
 		rc = -ENXIO;
 		goto bail;
 	}
@@ -619,10 +618,10 @@
 	g5_pmode_cur = -1;
 	g5_switch_freq(g5_query_freq());
 
-	printk(KERN_INFO "Registering G5 CPU frequency driver\n");
-	printk(KERN_INFO "Frequency method: i2c/pfunc, "
-	       "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none");
-	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+	pr_info("Registering G5 CPU frequency driver\n");
+	pr_info("Frequency method: i2c/pfunc, Voltage method: %s\n",
+		has_volt ? "i2c/pfunc" : "none");
+	pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
 		g5_cpu_freqs[1].frequency/1000,
 		g5_cpu_freqs[0].frequency/1000,
 		g5_cpu_freqs[g5_pmode_cur].frequency/1000);
@@ -654,7 +653,7 @@
 	/* Get first CPU node */
 	cpunode = of_cpu_device_node_get(0);
 	if (cpunode == NULL) {
-		pr_err("cpufreq: Can't find any CPU node\n");
+		pr_err("Can't find any CPU node\n");
 		return -ENODEV;
 	}
 

diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index e6f24b2..dedd256 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c

@@ -8,6 +8,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -22,7 +24,6 @@
 #define POWERNOW_IOPORT 0xfff0          /* it doesn't matter where, as long
 					   as it is unused */
 
-#define PFX "powernow-k6: "
 static unsigned int                     busfreq;   /* FSB, in 10 kHz */
 static unsigned int                     max_multiplier;
 
@@ -141,7 +142,7 @@
 {
 
 	if (clock_ratio[best_i].driver_data > max_multiplier) {
-		printk(KERN_ERR PFX "invalid target frequency\n");
+		pr_err("invalid target frequency\n");
 		return -EINVAL;
 	}
 
@@ -175,13 +176,14 @@
 				max_multiplier = param_max_multiplier;
 				goto have_max_multiplier;
 			}
-		printk(KERN_ERR "powernow-k6: invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
+		pr_err("invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
 		return -EINVAL;
 	}
 
 	if (!max_multiplier) {
-		printk(KERN_WARNING "powernow-k6: unknown frequency %u, cannot determine current multiplier\n", khz);
-		printk(KERN_WARNING "powernow-k6: use module parameters max_multiplier and bus_frequency\n");
+		pr_warn("unknown frequency %u, cannot determine current multiplier\n",
+			khz);
+		pr_warn("use module parameters max_multiplier and bus_frequency\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -193,7 +195,7 @@
 			busfreq = param_busfreq / 10;
 			goto have_busfreq;
 		}
-		printk(KERN_ERR "powernow-k6: invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
+		pr_err("invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
 		return -EINVAL;
 	}
 
@@ -275,7 +277,7 @@
 		return -ENODEV;
 
 	if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
-		printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
+		pr_info("PowerNow IOPORT region already used\n");
 		return -EIO;
 	}
 

diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index c1ae199..9f013ed 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c

@@ -13,6 +13,8 @@
  *  - We disable half multipliers if ACPI is used on A0 stepping CPUs.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -35,9 +37,6 @@
 
 #include "powernow-k7.h"
 
-#define PFX "powernow: "
-
-
 struct psb_s {
 	u8 signature[10];
 	u8 tableversion;
@@ -127,14 +126,13 @@
 	maxei = cpuid_eax(0x80000000);
 	if (maxei < 0x80000007) {	/* Any powernow info ? */
 #ifdef MODULE
-		printk(KERN_INFO PFX "No powernow capabilities detected\n");
+		pr_info("No powernow capabilities detected\n");
 #endif
 		return 0;
 	}
 
 	if ((c->x86_model == 6) && (c->x86_mask == 0)) {
-		printk(KERN_INFO PFX "K7 660[A0] core detected, "
-				"enabling errata workarounds\n");
+		pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
 		have_a0 = 1;
 	}
 
@@ -144,22 +142,22 @@
 	if (!(edx & (1 << 1 | 1 << 2)))
 		return 0;
 
-	printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+	pr_info("PowerNOW! Technology present. Can scale: ");
 
 	if (edx & 1 << 1) {
-		printk("frequency");
+		pr_cont("frequency");
 		can_scale_bus = 1;
 	}
 
 	if ((edx & (1 << 1 | 1 << 2)) == 0x6)
-		printk(" and ");
+		pr_cont(" and ");
 
 	if (edx & 1 << 2) {
-		printk("voltage");
+		pr_cont("voltage");
 		can_scale_vid = 1;
 	}
 
-	printk(".\n");
+	pr_cont("\n");
 	return 1;
 }
 
@@ -427,16 +425,14 @@
 err05:
 	kfree(acpi_processor_perf);
 err0:
-	printk(KERN_WARNING PFX "ACPI perflib can not be used on "
-			"this platform\n");
+	pr_warn("ACPI perflib can not be used on this platform\n");
 	acpi_processor_perf = NULL;
 	return retval;
 }
 #else
 static int powernow_acpi_init(void)
 {
-	printk(KERN_INFO PFX "no support for ACPI processor found."
-	       "  Please recompile your kernel with ACPI processor\n");
+	pr_info("no support for ACPI processor found - please recompile your kernel with ACPI processor\n");
 	return -EINVAL;
 }
 #endif
@@ -468,8 +464,7 @@
 			psb = (struct psb_s *) p;
 			pr_debug("Table version: 0x%x\n", psb->tableversion);
 			if (psb->tableversion != 0x12) {
-				printk(KERN_INFO PFX "Sorry, only v1.2 tables"
-						" supported right now\n");
+				pr_info("Sorry, only v1.2 tables supported right now\n");
 				return -ENODEV;
 			}
 
@@ -481,10 +476,8 @@
 
 			latency = psb->settlingtime;
 			if (latency < 100) {
-				printk(KERN_INFO PFX "BIOS set settling time "
-						"to %d microseconds. "
-						"Should be at least 100. "
-						"Correcting.\n", latency);
+				pr_info("BIOS set settling time to %d microseconds. Should be at least 100. Correcting.\n",
+					latency);
 				latency = 100;
 			}
 			pr_debug("Settling Time: %d microseconds.\n",
@@ -516,10 +509,9 @@
 						p += 2;
 				}
 			}
-			printk(KERN_INFO PFX "No PST tables match this cpuid "
-					"(0x%x)\n", etuple);
-			printk(KERN_INFO PFX "This is indicative of a broken "
-					"BIOS.\n");
+			pr_info("No PST tables match this cpuid (0x%x)\n",
+				etuple);
+			pr_info("This is indicative of a broken BIOS\n");
 
 			return -EINVAL;
 		}
@@ -552,7 +544,7 @@
 	sgtc = 100 * m * latency;
 	sgtc = sgtc / 3;
 	if (sgtc > 0xfffff) {
-		printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+		pr_warn("SGTC too large %d\n", sgtc);
 		sgtc = 0xfffff;
 	}
 	return sgtc;
@@ -574,14 +566,10 @@
 
 static int acer_cpufreq_pst(const struct dmi_system_id *d)
 {
-	printk(KERN_WARNING PFX
-		"%s laptop with broken PST tables in BIOS detected.\n",
+	pr_warn("%s laptop with broken PST tables in BIOS detected\n",
 		d->ident);
-	printk(KERN_WARNING PFX
-		"You need to downgrade to 3A21 (09/09/2002), or try a newer "
-		"BIOS than 3A71 (01/20/2003)\n");
-	printk(KERN_WARNING PFX
-		"cpufreq scaling has been disabled as a result of this.\n");
+	pr_warn("You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
+	pr_warn("cpufreq scaling has been disabled as a result of this\n");
 	return 0;
 }
 
@@ -616,40 +604,38 @@
 
 	fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
 	if (!fsb) {
-		printk(KERN_WARNING PFX "can not determine bus frequency\n");
+		pr_warn("can not determine bus frequency\n");
 		return -EINVAL;
 	}
 	pr_debug("FSB: %3dMHz\n", fsb/1000);
 
 	if (dmi_check_system(powernow_dmi_table) || acpi_force) {
-		printk(KERN_INFO PFX "PSB/PST known to be broken.  "
-				"Trying ACPI instead\n");
+		pr_info("PSB/PST known to be broken - trying ACPI instead\n");
 		result = powernow_acpi_init();
 	} else {
 		result = powernow_decode_bios(fidvidstatus.bits.MFID,
 				fidvidstatus.bits.SVID);
 		if (result) {
-			printk(KERN_INFO PFX "Trying ACPI perflib\n");
+			pr_info("Trying ACPI perflib\n");
 			maximum_speed = 0;
 			minimum_speed = -1;
 			latency = 0;
 			result = powernow_acpi_init();
 			if (result) {
-				printk(KERN_INFO PFX
-					"ACPI and legacy methods failed\n");
+				pr_info("ACPI and legacy methods failed\n");
 			}
 		} else {
 			/* SGTC use the bus clock as timer */
 			latency = fixup_sgtc();
-			printk(KERN_INFO PFX "SGTC: %d\n", latency);
+			pr_info("SGTC: %d\n", latency);
 		}
 	}
 
 	if (result)
 		return result;
 
-	printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
-				minimum_speed/1000, maximum_speed/1000);
+	pr_info("Minimum speed %d MHz - Maximum speed %d MHz\n",
+		minimum_speed/1000, maximum_speed/1000);
 
 	policy->cpuinfo.transition_latency =
 		cpufreq_scale(2000000UL, fsb, latency);

diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 39ac78c..54c4536 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c

@@ -36,12 +36,56 @@
 #include <asm/reg.h>
 #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
 #include <asm/opal.h>
+#include <linux/timer.h>
 
 #define POWERNV_MAX_PSTATES	256
 #define PMSR_PSAFE_ENABLE	(1UL << 30)
 #define PMSR_SPR_EM_DISABLE	(1UL << 31)
 #define PMSR_MAX(x)		((x >> 32) & 0xFF)
 
+#define MAX_RAMP_DOWN_TIME				5120
+/*
+ * On an idle system we want the global pstate to ramp-down from max value to
+ * min over a span of ~5 secs. Also we want it to initially ramp-down slowly and
+ * then ramp-down rapidly later on.
+ *
+ * This gives a percentage rampdown for time elapsed in milliseconds.
+ * ramp_down_percentage = ((ms * ms) >> 18)
+ *			~= 3.8 * (sec * sec)
+ *
+ * At 0 ms	ramp_down_percent = 0
+ * At 5120 ms	ramp_down_percent = 100
+ */
+#define ramp_down_percent(time)		((time * time) >> 18)
+
+/* Interval after which the timer is queued to bring down global pstate */
+#define GPSTATE_TIMER_INTERVAL				2000
+
+/**
+ * struct global_pstate_info -	Per policy data structure to maintain history of
+ *				global pstates
+ * @highest_lpstate:		The local pstate from which we are ramping down
+ * @elapsed_time:		Time in ms spent in ramping down from
+ *				highest_lpstate
+ * @last_sampled_time:		Time from boot in ms when global pstates were
+ *				last set
+ * @last_lpstate,last_gpstate:	Last set values for local and global pstates
+ * @timer:			Is used for ramping down if cpu goes idle for
+ *				a long time with global pstate held high
+ * @gpstate_lock:		A spinlock to maintain synchronization between
+ *				routines called by the timer handler and
+ *				governer's target_index calls
+ */
+struct global_pstate_info {
+	int highest_lpstate;
+	unsigned int elapsed_time;
+	unsigned int last_sampled_time;
+	int last_lpstate;
+	int last_gpstate;
+	spinlock_t gpstate_lock;
+	struct timer_list timer;
+};
+
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled, occ_reset;
 
@@ -94,6 +138,17 @@
 	int nr_pstates;
 } powernv_pstate_info;
 
+static inline void reset_gpstates(struct cpufreq_policy *policy)
+{
+	struct global_pstate_info *gpstates = policy->driver_data;
+
+	gpstates->highest_lpstate = 0;
+	gpstates->elapsed_time = 0;
+	gpstates->last_sampled_time = 0;
+	gpstates->last_lpstate = 0;
+	gpstates->last_gpstate = 0;
+}
+
 /*
  * Initialize the freq table based on data obtained
  * from the firmware passed via device-tree
@@ -285,6 +340,7 @@
 struct powernv_smp_call_data {
 	unsigned int freq;
 	int pstate_id;
+	int gpstate_id;
 };
 
 /*
@@ -343,19 +399,21 @@
  * (struct powernv_smp_call_data *) and the pstate_id which needs to be set
  * on this CPU should be present in freq_data->pstate_id.
  */
-static void set_pstate(void *freq_data)
+static void set_pstate(void *data)
 {
 	unsigned long val;
-	unsigned long pstate_ul =
-		((struct powernv_smp_call_data *) freq_data)->pstate_id;
+	struct powernv_smp_call_data *freq_data = data;
+	unsigned long pstate_ul = freq_data->pstate_id;
+	unsigned long gpstate_ul = freq_data->gpstate_id;
 
 	val = get_pmspr(SPRN_PMCR);
 	val = val & 0x0000FFFFFFFFFFFFULL;
 
 	pstate_ul = pstate_ul & 0xFF;
+	gpstate_ul = gpstate_ul & 0xFF;
 
 	/* Set both global(bits 56..63) and local(bits 48..55) PStates */
-	val = val | (pstate_ul << 56) | (pstate_ul << 48);
+	val = val | (gpstate_ul << 56) | (pstate_ul << 48);
 
 	pr_debug("Setting cpu %d pmcr to %016lX\n",
 			raw_smp_processor_id(), val);
@@ -424,6 +482,111 @@
 	}
 }
 
+/**
+ * calc_global_pstate - Calculate global pstate
+ * @elapsed_time:	Elapsed time in milliseconds
+ * @local_pstate:	New local pstate
+ * @highest_lpstate:	pstate from which its ramping down
+ *
+ * Finds the appropriate global pstate based on the pstate from which its
+ * ramping down and the time elapsed in ramping down. It follows a quadratic
+ * equation which ensures that it reaches ramping down to pmin in 5sec.
+ */
+static inline int calc_global_pstate(unsigned int elapsed_time,
+				     int highest_lpstate, int local_pstate)
+{
+	int pstate_diff;
+
+	/*
+	 * Using ramp_down_percent we get the percentage of rampdown
+	 * that we are expecting to be dropping. Difference between
+	 * highest_lpstate and powernv_pstate_info.min will give a absolute
+	 * number of how many pstates we will drop eventually by the end of
+	 * 5 seconds, then just scale it get the number pstates to be dropped.
+	 */
+	pstate_diff =  ((int)ramp_down_percent(elapsed_time) *
+			(highest_lpstate - powernv_pstate_info.min)) / 100;
+
+	/* Ensure that global pstate is >= to local pstate */
+	if (highest_lpstate - pstate_diff < local_pstate)
+		return local_pstate;
+	else
+		return highest_lpstate - pstate_diff;
+}
+
+static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
+{
+	unsigned int timer_interval;
+
+	/*
+	 * Setting up timer to fire after GPSTATE_TIMER_INTERVAL ms, But
+	 * if it exceeds MAX_RAMP_DOWN_TIME ms for ramp down time.
+	 * Set timer such that it fires exactly at MAX_RAMP_DOWN_TIME
+	 * seconds of ramp down time.
+	 */
+	if ((gpstates->elapsed_time + GPSTATE_TIMER_INTERVAL)
+	     > MAX_RAMP_DOWN_TIME)
+		timer_interval = MAX_RAMP_DOWN_TIME - gpstates->elapsed_time;
+	else
+		timer_interval = GPSTATE_TIMER_INTERVAL;
+
+	mod_timer_pinned(&gpstates->timer, jiffies +
+			msecs_to_jiffies(timer_interval));
+}
+
+/**
+ * gpstate_timer_handler
+ *
+ * @data: pointer to cpufreq_policy on which timer was queued
+ *
+ * This handler brings down the global pstate closer to the local pstate
+ * according quadratic equation. Queues a new timer if it is still not equal
+ * to local pstate
+ */
+void gpstate_timer_handler(unsigned long data)
+{
+	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
+	struct global_pstate_info *gpstates = policy->driver_data;
+	int gpstate_id;
+	unsigned int time_diff = jiffies_to_msecs(jiffies)
+					- gpstates->last_sampled_time;
+	struct powernv_smp_call_data freq_data;
+
+	if (!spin_trylock(&gpstates->gpstate_lock))
+		return;
+
+	gpstates->last_sampled_time += time_diff;
+	gpstates->elapsed_time += time_diff;
+	freq_data.pstate_id = gpstates->last_lpstate;
+
+	if ((gpstates->last_gpstate == freq_data.pstate_id) ||
+	    (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
+		gpstate_id = freq_data.pstate_id;
+		reset_gpstates(policy);
+		gpstates->highest_lpstate = freq_data.pstate_id;
+	} else {
+		gpstate_id = calc_global_pstate(gpstates->elapsed_time,
+						gpstates->highest_lpstate,
+						freq_data.pstate_id);
+	}
+
+	/*
+	 * If local pstate is equal to global pstate, rampdown is over
+	 * So timer is not required to be queued.
+	 */
+	if (gpstate_id != freq_data.pstate_id)
+		queue_gpstate_timer(gpstates);
+
+	freq_data.gpstate_id = gpstate_id;
+	gpstates->last_gpstate = freq_data.gpstate_id;
+	gpstates->last_lpstate = freq_data.pstate_id;
+
+	spin_unlock(&gpstates->gpstate_lock);
+
+	/* Timer may get migrated to a different cpu on cpu hot unplug */
+	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
+}
+
 /*
  * powernv_cpufreq_target_index: Sets the frequency corresponding to
  * the cpufreq table entry indexed by new_index on the cpus in the
@@ -433,6 +596,8 @@
 					unsigned int new_index)
 {
 	struct powernv_smp_call_data freq_data;
+	unsigned int cur_msec, gpstate_id;
+	struct global_pstate_info *gpstates = policy->driver_data;
 
 	if (unlikely(rebooting) && new_index != get_nominal_index())
 		return 0;
@@ -440,28 +605,81 @@
 	if (!throttled)
 		powernv_cpufreq_throttle_check(NULL);
 
+	cur_msec = jiffies_to_msecs(get_jiffies_64());
+
+	spin_lock(&gpstates->gpstate_lock);
 	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
 
+	if (!gpstates->last_sampled_time) {
+		gpstate_id = freq_data.pstate_id;
+		gpstates->highest_lpstate = freq_data.pstate_id;
+		goto gpstates_done;
+	}
+
+	if (gpstates->last_gpstate > freq_data.pstate_id) {
+		gpstates->elapsed_time += cur_msec -
+						 gpstates->last_sampled_time;
+
+		/*
+		 * If its has been ramping down for more than MAX_RAMP_DOWN_TIME
+		 * we should be resetting all global pstate related data. Set it
+		 * equal to local pstate to start fresh.
+		 */
+		if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
+			reset_gpstates(policy);
+			gpstates->highest_lpstate = freq_data.pstate_id;
+			gpstate_id = freq_data.pstate_id;
+		} else {
+		/* Elaspsed_time is less than 5 seconds, continue to rampdown */
+			gpstate_id = calc_global_pstate(gpstates->elapsed_time,
+							gpstates->highest_lpstate,
+							freq_data.pstate_id);
+		}
+	} else {
+		reset_gpstates(policy);
+		gpstates->highest_lpstate = freq_data.pstate_id;
+		gpstate_id = freq_data.pstate_id;
+	}
+
+	/*
+	 * If local pstate is equal to global pstate, rampdown is over
+	 * So timer is not required to be queued.
+	 */
+	if (gpstate_id != freq_data.pstate_id)
+		queue_gpstate_timer(gpstates);
+	else
+		del_timer_sync(&gpstates->timer);
+
+gpstates_done:
+	freq_data.gpstate_id = gpstate_id;
+	gpstates->last_sampled_time = cur_msec;
+	gpstates->last_gpstate = freq_data.gpstate_id;
+	gpstates->last_lpstate = freq_data.pstate_id;
+
+	spin_unlock(&gpstates->gpstate_lock);
+
 	/*
 	 * Use smp_call_function to send IPI and execute the
 	 * mtspr on target CPU.  We could do that without IPI
 	 * if current CPU is within policy->cpus (core)
 	 */
 	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
-
 	return 0;
 }
 
 static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-	int base, i;
+	int base, i, ret;
+	struct kernfs_node *kn;
+	struct global_pstate_info *gpstates;
 
 	base = cpu_first_thread_sibling(policy->cpu);
 
 	for (i = 0; i < threads_per_core; i++)
 		cpumask_set_cpu(base + i, policy->cpus);
 
-	if (!policy->driver_data) {
+	kn = kernfs_find_and_get(policy->kobj.sd, throttle_attr_grp.name);
+	if (!kn) {
 		int ret;
 
 		ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
@@ -470,13 +688,37 @@
 				policy->cpu);
 			return ret;
 		}
-		/*
-		 * policy->driver_data is used as a flag for one-time
-		 * creation of throttle sysfs files.
-		 */
-		policy->driver_data = policy;
+	} else {
+		kernfs_put(kn);
 	}
-	return cpufreq_table_validate_and_show(policy, powernv_freqs);
+
+	gpstates =  kzalloc(sizeof(*gpstates), GFP_KERNEL);
+	if (!gpstates)
+		return -ENOMEM;
+
+	policy->driver_data = gpstates;
+
+	/* initialize timer */
+	init_timer_deferrable(&gpstates->timer);
+	gpstates->timer.data = (unsigned long)policy;
+	gpstates->timer.function = gpstate_timer_handler;
+	gpstates->timer.expires = jiffies +
+				msecs_to_jiffies(GPSTATE_TIMER_INTERVAL);
+	spin_lock_init(&gpstates->gpstate_lock);
+	ret = cpufreq_table_validate_and_show(policy, powernv_freqs);
+
+	if (ret < 0)
+		kfree(policy->driver_data);
+
+	return ret;
+}
+
+static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	/* timer is deleted in cpufreq_cpu_stop() */
+	kfree(policy->driver_data);
+
+	return 0;
 }
 
 static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
@@ -604,15 +846,19 @@
 static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 {
 	struct powernv_smp_call_data freq_data;
+	struct global_pstate_info *gpstates = policy->driver_data;
 
 	freq_data.pstate_id = powernv_pstate_info.min;
+	freq_data.gpstate_id = powernv_pstate_info.min;
 	smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
+	del_timer_sync(&gpstates->timer);
 }
 
 static struct cpufreq_driver powernv_cpufreq_driver = {
 	.name		= "powernv-cpufreq",
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.init		= powernv_cpufreq_cpu_init,
+	.exit		= powernv_cpufreq_cpu_exit,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= powernv_cpufreq_target_index,
 	.get		= powernv_cpufreq_get,

diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h
index b4c00a5..3eace72 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.h
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.h

@@ -17,7 +17,7 @@
 
 int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
 
-#if defined(CONFIG_CPU_FREQ_CBE_PMI) || defined(CONFIG_CPU_FREQ_CBE_PMI_MODULE)
+#if IS_ENABLED(CONFIG_CPU_FREQ_CBE_PMI)
 extern bool cbe_cpufreq_has_pmi;
 #else
 #define cbe_cpufreq_has_pmi (0)

diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 7969f76..7c4cd5c 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c

@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/timer.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/of_platform.h>
 
 #include <asm/processor.h>
@@ -142,15 +142,4 @@
 
 	return 0;
 }
-
-static void __exit cbe_cpufreq_pmi_exit(void)
-{
-	cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
-	pmi_unregister_handler(&cbe_pmi_handler);
-}
-
-module_init(cbe_cpufreq_pmi_init);
-module_exit(cbe_cpufreq_pmi_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+device_initcall(cbe_cpufreq_pmi_init);

diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 46fee15..ce345bf 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c

@@ -29,6 +29,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -186,8 +188,7 @@
 
 	ret = regulator_set_voltage(vcc_core, vmin, vmax);
 	if (ret)
-		pr_err("cpufreq: Failed to set vcc_core in [%dmV..%dmV]\n",
-		       vmin, vmax);
+		pr_err("Failed to set vcc_core in [%dmV..%dmV]\n", vmin, vmax);
 	return ret;
 }
 
@@ -195,10 +196,10 @@
 {
 	vcc_core = regulator_get(NULL, "vcc_core");
 	if (IS_ERR(vcc_core)) {
-		pr_info("cpufreq: Didn't find vcc_core regulator\n");
+		pr_info("Didn't find vcc_core regulator\n");
 		vcc_core = NULL;
 	} else {
-		pr_info("cpufreq: Found vcc_core regulator\n");
+		pr_info("Found vcc_core regulator\n");
 	}
 }
 #else
@@ -233,9 +234,8 @@
 {
 	if (!pxa27x_maxfreq) {
 		pxa27x_maxfreq = 416000;
-		printk(KERN_INFO "PXA CPU 27x max frequency not defined "
-		       "(pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n",
-		       pxa27x_maxfreq);
+		pr_info("PXA CPU 27x max frequency not defined (pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n",
+			pxa27x_maxfreq);
 	} else {
 		pxa27x_maxfreq *= 1000;
 	}
@@ -408,7 +408,7 @@
 	 */
 	if (cpu_is_pxa25x()) {
 		find_freq_tables(&pxa255_freq_table, &pxa255_freqs);
-		pr_info("PXA255 cpufreq using %s frequency table\n",
+		pr_info("using %s frequency table\n",
 			pxa255_turbo_table ? "turbo" : "run");
 
 		cpufreq_table_validate_and_show(policy, pxa255_freq_table);
@@ -417,7 +417,7 @@
 		cpufreq_table_validate_and_show(policy, pxa27x_freq_table);
 	}
 
-	printk(KERN_INFO "PXA CPU frequency change support initialized\n");
+	pr_info("frequency change support initialized\n");
 
 	return 0;
 }

diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index b23e525..53d8c3f 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c

@@ -301,10 +301,11 @@
 	return -ENODEV;
 }
 
-static int __exit qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+static int qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct cpu_data *data = policy->driver_data;
 
+	cpufreq_cooling_unregister(data->cdev);
 	kfree(data->pclk);
 	kfree(data->table);
 	kfree(data);
@@ -333,8 +334,8 @@
 		cpud->cdev = of_cpufreq_cooling_register(np,
 							 policy->related_cpus);
 
-		if (IS_ERR(cpud->cdev)) {
-			pr_err("Failed to register cooling device cpu%d: %ld\n",
+		if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
+			pr_err("cpu%d is not running as cooling device: %ld\n",
 					policy->cpu, PTR_ERR(cpud->cdev));
 
 			cpud->cdev = NULL;
@@ -348,7 +349,7 @@
 	.name		= "qoriq_cpufreq",
 	.flags		= CPUFREQ_CONST_LOOPS,
 	.init		= qoriq_cpufreq_cpu_init,
-	.exit		= __exit_p(qoriq_cpufreq_cpu_exit),
+	.exit		= qoriq_cpufreq_cpu_exit,
 	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= qoriq_cpufreq_target,
 	.get		= cpufreq_generic_get,

diff --git a/drivers/cpufreq/s3c2412-cpufreq.c b/drivers/cpufreq/s3c2412-cpufreq.c
index eb26213..b04b6f0 100644
--- a/drivers/cpufreq/s3c2412-cpufreq.c
+++ b/drivers/cpufreq/s3c2412-cpufreq.c

@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -197,21 +199,20 @@
 
 	hclk = clk_get(NULL, "hclk");
 	if (IS_ERR(hclk)) {
-		printk(KERN_ERR "%s: cannot find hclk clock\n", __func__);
+		pr_err("cannot find hclk clock\n");
 		return -ENOENT;
 	}
 
 	fclk = clk_get(NULL, "fclk");
 	if (IS_ERR(fclk)) {
-		printk(KERN_ERR "%s: cannot find fclk clock\n", __func__);
+		pr_err("cannot find fclk clock\n");
 		goto err_fclk;
 	}
 
 	fclk_rate = clk_get_rate(fclk);
 	if (fclk_rate > 200000000) {
-		printk(KERN_INFO
-		       "%s: fclk %ld MHz, assuming 266MHz capable part\n",
-		       __func__, fclk_rate / 1000000);
+		pr_info("fclk %ld MHz, assuming 266MHz capable part\n",
+			fclk_rate / 1000000);
 		s3c2412_cpufreq_info.max.fclk = 266000000;
 		s3c2412_cpufreq_info.max.hclk = 133000000;
 		s3c2412_cpufreq_info.max.pclk =  66000000;
@@ -219,13 +220,13 @@
 
 	armclk = clk_get(NULL, "armclk");
 	if (IS_ERR(armclk)) {
-		printk(KERN_ERR "%s: cannot find arm clock\n", __func__);
+		pr_err("cannot find arm clock\n");
 		goto err_armclk;
 	}
 
 	xtal = clk_get(NULL, "xtal");
 	if (IS_ERR(xtal)) {
-		printk(KERN_ERR "%s: cannot find xtal clock\n", __func__);
+		pr_err("cannot find xtal clock\n");
 		goto err_xtal;
 	}
 

diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c
index 0129f5c..d0d75b6 100644
--- a/drivers/cpufreq/s3c2440-cpufreq.c
+++ b/drivers/cpufreq/s3c2440-cpufreq.c

@@ -11,6 +11,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -66,7 +68,7 @@
 		     __func__, fclk, armclk, hclk_max);
 
 	if (armclk > fclk) {
-		printk(KERN_WARNING "%s: armclk > fclk\n", __func__);
+		pr_warn("%s: armclk > fclk\n", __func__);
 		armclk = fclk;
 	}
 
@@ -273,7 +275,7 @@
 	armclk = s3c_cpufreq_clk_get(NULL, "armclk");
 
 	if (IS_ERR(xtal) || IS_ERR(hclk) || IS_ERR(fclk) || IS_ERR(armclk)) {
-		printk(KERN_ERR "%s: failed to get clocks\n", __func__);
+		pr_err("%s: failed to get clocks\n", __func__);
 		return -ENOENT;
 	}
 

diff --git a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
index 9b7b428..4d976e8 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c

@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
@@ -178,7 +180,7 @@
 {
 	dbgfs_root = debugfs_create_dir("s3c-cpufreq", NULL);
 	if (IS_ERR(dbgfs_root)) {
-		printk(KERN_ERR "%s: error creating debugfs root\n", __func__);
+		pr_err("%s: error creating debugfs root\n", __func__);
 		return PTR_ERR(dbgfs_root);
 	}
 

diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 68ef8fd..ae8eaed 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c

@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -175,7 +177,7 @@
 	cpu_new.freq.fclk = cpu_new.pll.frequency;
 
 	if (s3c_cpufreq_calcdivs(&cpu_new) < 0) {
-		printk(KERN_ERR "no divisors for %d\n", target_freq);
+		pr_err("no divisors for %d\n", target_freq);
 		goto err_notpossible;
 	}
 
@@ -187,7 +189,7 @@
 
 	if (cpu_new.freq.hclk != cpu_cur.freq.hclk) {
 		if (s3c_cpufreq_calcio(&cpu_new) < 0) {
-			printk(KERN_ERR "%s: no IO timings\n", __func__);
+			pr_err("%s: no IO timings\n", __func__);
 			goto err_notpossible;
 		}
 	}
@@ -262,7 +264,7 @@
 	return 0;
 
  err_notpossible:
-	printk(KERN_ERR "no compatible settings for %d\n", target_freq);
+	pr_err("no compatible settings for %d\n", target_freq);
 	return -EINVAL;
 }
 
@@ -331,7 +333,7 @@
 						     &index);
 
 		if (ret < 0) {
-			printk(KERN_ERR "%s: no PLL available\n", __func__);
+			pr_err("%s: no PLL available\n", __func__);
 			goto err_notpossible;
 		}
 
@@ -346,7 +348,7 @@
 	return s3c_cpufreq_settarget(policy, target_freq, pll);
 
  err_notpossible:
-	printk(KERN_ERR "no compatible settings for %d\n", target_freq);
+	pr_err("no compatible settings for %d\n", target_freq);
 	return -EINVAL;
 }
 
@@ -356,7 +358,7 @@
 
 	clk = clk_get(dev, name);
 	if (IS_ERR(clk))
-		printk(KERN_ERR "cpufreq: failed to get clock '%s'\n", name);
+		pr_err("failed to get clock '%s'\n", name);
 
 	return clk;
 }
@@ -378,15 +380,16 @@
 
 	if (IS_ERR(clk_fclk) || IS_ERR(clk_hclk) || IS_ERR(clk_pclk) ||
 	    IS_ERR(_clk_mpll) || IS_ERR(clk_arm) || IS_ERR(_clk_xtal)) {
-		printk(KERN_ERR "%s: could not get clock(s)\n", __func__);
+		pr_err("%s: could not get clock(s)\n", __func__);
 		return -ENOENT;
 	}
 
-	printk(KERN_INFO "%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n", __func__,
-	       clk_get_rate(clk_fclk) / 1000,
-	       clk_get_rate(clk_hclk) / 1000,
-	       clk_get_rate(clk_pclk) / 1000,
-	       clk_get_rate(clk_arm) / 1000);
+	pr_info("%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n",
+		__func__,
+		clk_get_rate(clk_fclk) / 1000,
+		clk_get_rate(clk_hclk) / 1000,
+		clk_get_rate(clk_pclk) / 1000,
+		clk_get_rate(clk_arm) / 1000);
 
 	return 0;
 }
@@ -424,7 +427,7 @@
 
 	ret = s3c_cpufreq_settarget(NULL, suspend_freq, &suspend_pll);
 	if (ret) {
-		printk(KERN_ERR "%s: failed to reset pll/freq\n", __func__);
+		pr_err("%s: failed to reset pll/freq\n", __func__);
 		return ret;
 	}
 
@@ -449,13 +452,12 @@
 int s3c_cpufreq_register(struct s3c_cpufreq_info *info)
 {
 	if (!info || !info->name) {
-		printk(KERN_ERR "%s: failed to pass valid information\n",
-		       __func__);
+		pr_err("%s: failed to pass valid information\n", __func__);
 		return -EINVAL;
 	}
 
-	printk(KERN_INFO "S3C24XX CPU Frequency driver, %s cpu support\n",
-	       info->name);
+	pr_info("S3C24XX CPU Frequency driver, %s cpu support\n",
+		info->name);
 
 	/* check our driver info has valid data */
 
@@ -478,7 +480,7 @@
 	struct s3c_cpufreq_board *ours;
 
 	if (!board) {
-		printk(KERN_INFO "%s: no board data\n", __func__);
+		pr_info("%s: no board data\n", __func__);
 		return -EINVAL;
 	}
 
@@ -487,7 +489,7 @@
 
 	ours = kzalloc(sizeof(*ours), GFP_KERNEL);
 	if (ours == NULL) {
-		printk(KERN_ERR "%s: no memory\n", __func__);
+		pr_err("%s: no memory\n", __func__);
 		return -ENOMEM;
 	}
 
@@ -502,15 +504,15 @@
 	int ret;
 
 	if (!cpu_cur.info->get_iotiming) {
-		printk(KERN_ERR "%s: get_iotiming undefined\n", __func__);
+		pr_err("%s: get_iotiming undefined\n", __func__);
 		return -ENOENT;
 	}
 
-	printk(KERN_INFO "%s: working out IO settings\n", __func__);
+	pr_info("%s: working out IO settings\n", __func__);
 
 	ret = (cpu_cur.info->get_iotiming)(&cpu_cur, &s3c24xx_iotiming);
 	if (ret)
-		printk(KERN_ERR "%s: failed to get timings\n", __func__);
+		pr_err("%s: failed to get timings\n", __func__);
 
 	return ret;
 }
@@ -561,7 +563,7 @@
 	val = calc_locktime(rate, cpu_cur.info->locktime_u) << bits;
 	val |= calc_locktime(rate, cpu_cur.info->locktime_m);
 
-	printk(KERN_INFO "%s: new locktime is 0x%08x\n", __func__, val);
+	pr_info("%s: new locktime is 0x%08x\n", __func__, val);
 	__raw_writel(val, S3C2410_LOCKTIME);
 }
 
@@ -580,7 +582,7 @@
 
 	ftab = kzalloc(sizeof(*ftab) * size, GFP_KERNEL);
 	if (!ftab) {
-		printk(KERN_ERR "%s: no memory for tables\n", __func__);
+		pr_err("%s: no memory for tables\n", __func__);
 		return -ENOMEM;
 	}
 
@@ -608,15 +610,14 @@
 		if (cpu_cur.board->auto_io) {
 			ret = s3c_cpufreq_auto_io();
 			if (ret) {
-				printk(KERN_ERR "%s: failed to get io timing\n",
+				pr_err("%s: failed to get io timing\n",
 				       __func__);
 				goto out;
 			}
 		}
 
 		if (cpu_cur.board->need_io && !cpu_cur.info->set_iotiming) {
-			printk(KERN_ERR "%s: no IO support registered\n",
-			       __func__);
+			pr_err("%s: no IO support registered\n", __func__);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -666,9 +667,9 @@
 		vals += plls_no;
 		vals->frequency = CPUFREQ_TABLE_END;
 
-		printk(KERN_INFO "cpufreq: %d PLL entries\n", plls_no);
+		pr_info("%d PLL entries\n", plls_no);
 	} else
-		printk(KERN_ERR "cpufreq: no memory for PLL tables\n");
+		pr_err("no memory for PLL tables\n");
 
 	return vals ? 0 : -ENOMEM;
 }

diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index a145b31..06d8591 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c

@@ -9,6 +9,8 @@
  * published by the Free Software Foundation.
 */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -205,7 +207,7 @@
 	} else if (ch == DMC1) {
 		reg = (dmc_base[1] + 0x30);
 	} else {
-		printk(KERN_ERR "Cannot find DMC port\n");
+		pr_err("Cannot find DMC port\n");
 		return;
 	}
 
@@ -534,7 +536,7 @@
 	mem_type = check_mem_type(dmc_base[0]);
 
 	if ((mem_type != LPDDR) && (mem_type != LPDDR2)) {
-		printk(KERN_ERR "CPUFreq doesn't support this memory type\n");
+		pr_err("CPUFreq doesn't support this memory type\n");
 		ret = -EINVAL;
 		goto out_dmc1;
 	}
@@ -635,13 +637,13 @@
 
 	arm_regulator = regulator_get(NULL, "vddarm");
 	if (IS_ERR(arm_regulator)) {
-		pr_err("failed to get regulator vddarm");
+		pr_err("failed to get regulator vddarm\n");
 		return PTR_ERR(arm_regulator);
 	}
 
 	int_regulator = regulator_get(NULL, "vddint");
 	if (IS_ERR(int_regulator)) {
-		pr_err("failed to get regulator vddint");
+		pr_err("failed to get regulator vddint\n");
 		regulator_put(arm_regulator);
 		return PTR_ERR(int_regulator);
 	}

diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index ac84e48..4225501 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c

@@ -13,6 +13,8 @@
  *	2005-03-30: - initial revision
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -30,8 +32,6 @@
 
 static __u8 __iomem *cpuctl;
 
-#define PFX "sc520_freq: "
-
 static struct cpufreq_frequency_table sc520_freq_table[] = {
 	{0, 0x01,	100000},
 	{0, 0x02,	133000},
@@ -44,8 +44,8 @@
 
 	switch (clockspeed_reg & 0x03) {
 	default:
-		printk(KERN_ERR PFX "error: cpuctl register has unexpected "
-				"value %02x\n", clockspeed_reg);
+		pr_err("error: cpuctl register has unexpected value %02x\n",
+		       clockspeed_reg);
 	case 0x01:
 		return 100000;
 	case 0x02:
@@ -112,7 +112,7 @@
 
 	cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
 	if (!cpuctl) {
-		printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
+		pr_err("sc520_freq: error: failed to remap memory\n");
 		return -ENOMEM;
 	}
 

diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index de5e89b..e8a7bf5 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c

@@ -18,6 +18,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -38,35 +39,6 @@
 	return scpi_ops->dvfs_get_info(domain);
 }
 
-static int scpi_opp_table_ops(struct device *cpu_dev, bool remove)
-{
-	int idx, ret = 0;
-	struct scpi_opp *opp;
-	struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
-
-	if (IS_ERR(info))
-		return PTR_ERR(info);
-
-	if (!info->opps)
-		return -EIO;
-
-	for (opp = info->opps, idx = 0; idx < info->count; idx++, opp++) {
-		if (remove)
-			dev_pm_opp_remove(cpu_dev, opp->freq);
-		else
-			ret = dev_pm_opp_add(cpu_dev, opp->freq,
-					     opp->m_volt * 1000);
-		if (ret) {
-			dev_warn(cpu_dev, "failed to add opp %uHz %umV\n",
-				 opp->freq, opp->m_volt);
-			while (idx-- > 0)
-				dev_pm_opp_remove(cpu_dev, (--opp)->freq);
-			return ret;
-		}
-	}
-	return ret;
-}
-
 static int scpi_get_transition_latency(struct device *cpu_dev)
 {
 	struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
@@ -76,21 +48,42 @@
 	return info->latency;
 }
 
-static int scpi_init_opp_table(struct device *cpu_dev)
+static int scpi_init_opp_table(const struct cpumask *cpumask)
 {
-	return scpi_opp_table_ops(cpu_dev, false);
-}
+	int idx, ret;
+	struct scpi_opp *opp;
+	struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
+	struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
 
-static void scpi_free_opp_table(struct device *cpu_dev)
-{
-	scpi_opp_table_ops(cpu_dev, true);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	if (!info->opps)
+		return -EIO;
+
+	for (opp = info->opps, idx = 0; idx < info->count; idx++, opp++) {
+		ret = dev_pm_opp_add(cpu_dev, opp->freq, opp->m_volt * 1000);
+		if (ret) {
+			dev_warn(cpu_dev, "failed to add opp %uHz %umV\n",
+				 opp->freq, opp->m_volt);
+			while (idx-- > 0)
+				dev_pm_opp_remove(cpu_dev, (--opp)->freq);
+			return ret;
+		}
+	}
+
+	ret = dev_pm_opp_set_sharing_cpus(cpu_dev, cpumask);
+	if (ret)
+		dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+			__func__, ret);
+	return ret;
 }
 
 static struct cpufreq_arm_bL_ops scpi_cpufreq_ops = {
 	.name	= "scpi",
 	.get_transition_latency = scpi_get_transition_latency,
 	.init_opp_table = scpi_init_opp_table,
-	.free_opp_table = scpi_free_opp_table,
+	.free_opp_table = dev_pm_opp_cpumask_remove_table,
 };
 
 static int scpi_cpufreq_probe(struct platform_device *pdev)

diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 7d4a315..41bc539 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c

@@ -13,6 +13,8 @@
  * Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -27,7 +29,6 @@
 #include <asm/cpufeature.h>
 #include <asm/cpu_device_id.h>
 
-#define PFX		"speedstep-centrino: "
 #define MAINTAINER	"linux-pm@vger.kernel.org"
 
 #define INTEL_MSR_RANGE	(0xffff)
@@ -386,8 +387,7 @@
 		/* check to see if it stuck */
 		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
 		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
-			printk(KERN_INFO PFX
-				"couldn't enable Enhanced SpeedStep\n");
+			pr_info("couldn't enable Enhanced SpeedStep\n");
 			return -ENODEV;
 		}
 	}

diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 37555c6..b86953a 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c

@@ -18,6 +18,8 @@
  *                        SPEEDSTEP - DEFINITIONS                    *
  *********************************************************************/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -68,13 +70,13 @@
 	/* get PMBASE */
 	pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
 	if (!(pmbase & 0x01)) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		pr_err("could not find speedstep register\n");
 		return -ENODEV;
 	}
 
 	pmbase &= 0xFFFFFFFE;
 	if (!pmbase) {
-		printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+		pr_err("could not find speedstep register\n");
 		return -ENODEV;
 	}
 
@@ -136,7 +138,7 @@
 		pr_debug("change to %u MHz succeeded\n",
 			speedstep_get_frequency(speedstep_processor) / 1000);
 	else
-		printk(KERN_ERR "cpufreq: change failed - I/O error\n");
+		pr_err("change failed - I/O error\n");
 
 	return;
 }

diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 15d3214..1b80621 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c

@@ -8,6 +8,8 @@
  *  BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -153,7 +155,7 @@
 		fsb = 333333;
 		break;
 	default:
-		printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
+		pr_err("PCORE - MSR_FSB_FREQ undefined value\n");
 	}
 
 	rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
@@ -453,11 +455,8 @@
 		 */
 		if (*transition_latency > 10000000 ||
 		    *transition_latency < 50000) {
-			printk(KERN_WARNING PFX "frequency transition "
-					"measured seems out of range (%u "
-					"nSec), falling back to a safe one of"
-					"%u nSec.\n",
-					*transition_latency, 500000);
+			pr_warn("frequency transition measured seems out of range (%u nSec), falling back to a safe one of %u nSec\n",
+				*transition_latency, 500000);
 			*transition_latency = 500000;
 		}
 	}

diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index 819229e..770a9ae 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c

@@ -12,6 +12,8 @@
  *                        SPEEDSTEP - DEFINITIONS                    *
  *********************************************************************/
 
+#define pr_fmt(fmt) "cpufreq: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -204,9 +206,8 @@
 			(speedstep_freqs[new_state].frequency / 1000),
 			retry, result);
 	else
-		printk(KERN_ERR "cpufreq: change to state %u "
-			"failed with new_state %u and result %u\n",
-			state, new_state, result);
+		pr_err("change to state %u failed with new_state %u and result %u\n",
+		       state, new_state, result);
 
 	return;
 }

diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c
index 20bcceb..4353025 100644
--- a/drivers/cpufreq/tegra124-cpufreq.c
+++ b/drivers/cpufreq/tegra124-cpufreq.c

@@ -14,7 +14,6 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/clk.h>
-#include <linux/cpufreq-dt.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -69,10 +68,6 @@
 	clk_set_parent(priv->cpu_clk, priv->pllx_clk);
 }
 
-static struct cpufreq_dt_platform_data cpufreq_dt_pd = {
-	.independent_clocks = false,
-};
-
 static int tegra124_cpufreq_probe(struct platform_device *pdev)
 {
 	struct tegra124_cpufreq_priv *priv;
@@ -129,8 +124,6 @@
 
 	cpufreq_dt_devinfo.name = "cpufreq-dt";
 	cpufreq_dt_devinfo.parent = &pdev->dev;
-	cpufreq_dt_devinfo.data = &cpufreq_dt_pd;
-	cpufreq_dt_devinfo.size_data = sizeof(cpufreq_dt_pd);
 
 	priv->cpufreq_dt_pdev =
 		platform_device_register_full(&cpufreq_dt_devinfo);

diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 433e93f..87e5bdc 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c

@@ -18,6 +18,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
@@ -26,8 +27,9 @@
 
 #include "arm_big_little.h"
 
-static int ve_spc_init_opp_table(struct device *cpu_dev)
+static int ve_spc_init_opp_table(const struct cpumask *cpumask)
 {
+	struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
 	/*
 	 * platform specific SPC code must initialise the opp table
 	 * so just check if the OPP count is non-zero

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index f996efc..2b8e6ce 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c

@@ -173,7 +173,7 @@
 
 	struct cpuidle_state *target_state = &drv->states[index];
 	bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
-	ktime_t time_start, time_end;
+	u64 time_start, time_end;
 	s64 diff;
 
 	/*
@@ -195,13 +195,13 @@
 	sched_idle_set_state(target_state);
 
 	trace_cpu_idle_rcuidle(index, dev->cpu);
-	time_start = ktime_get();
+	time_start = local_clock();
 
 	stop_critical_timings();
 	entered_state = target_state->enter(dev, drv, index);
 	start_critical_timings();
 
-	time_end = ktime_get();
+	time_end = local_clock();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
 
 	/* The cpu is no longer idle or about to enter idle. */
@@ -217,7 +217,11 @@
 	if (!cpuidle_state_is_coupled(drv, entered_state))
 		local_irq_enable();
 
-	diff = ktime_to_us(ktime_sub(time_end, time_start));
+	/*
+	 * local_clock() returns the time in nanosecond, let's shift
+	 * by 10 (divide by 1024) to have microsecond based time.
+	 */
+	diff = (time_end - time_start) >> 10;
 	if (diff > INT_MAX)
 		diff = INT_MAX;
 
@@ -433,6 +437,8 @@
 	list_del(&dev->device_list);
 	per_cpu(cpuidle_devices, dev->cpu) = NULL;
 	module_put(drv->owner);
+
+	dev->registered = 0;
 }
 
 static void __cpuidle_device_init(struct cpuidle_device *dev)

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 4de78c5..78dac0e 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig

@@ -64,30 +64,32 @@
 	  Otherwise, the governor does not change the frequency
 	  given at the initialization.
 
+config DEVFREQ_GOV_PASSIVE
+	tristate "Passive"
+	help
+	  Sets the frequency based on the frequency of its parent devfreq
+	  device. This governor does not change the frequency by itself
+	  through sysfs entries. The passive governor recommends that
+	  devfreq device uses the OPP table to get the frequency/voltage.
+
 comment "DEVFREQ Drivers"
 
-config ARM_EXYNOS4_BUS_DEVFREQ
-	bool "ARM Exynos4210/4212/4412 Memory Bus DEVFREQ Driver"
-	depends on (CPU_EXYNOS4210 || SOC_EXYNOS4212 || SOC_EXYNOS4412) && !ARCH_MULTIPLATFORM
+config ARM_EXYNOS_BUS_DEVFREQ
+	bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
+	depends on ARCH_EXYNOS
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DEVFREQ_GOV_PASSIVE
+	select DEVFREQ_EVENT_EXYNOS_PPMU
+	select PM_DEVFREQ_EVENT
 	select PM_OPP
 	help
-	  This adds the DEVFREQ driver for Exynos4210 memory bus (vdd_int)
-	  and Exynos4212/4412 memory interface and bus (vdd_mif + vdd_int).
-	  It reads PPMU counters of memory controllers and adjusts
-	  the operating frequencies and voltages with OPP support.
+	  This adds the common DEVFREQ driver for Exynos Memory bus. Exynos
+	  Memory bus has one more group of memory bus (e.g, MIF and INT block).
+	  Each memory bus group could contain many memoby bus block. It reads
+	  PPMU counters of memory controllers by using DEVFREQ-event device
+	  and adjusts the operating frequencies and voltages with OPP support.
 	  This does not yet operate with optimal voltages.
 
-config ARM_EXYNOS5_BUS_DEVFREQ
-	tristate "ARM Exynos5250 Bus DEVFREQ Driver"
-	depends on SOC_EXYNOS5250
-	select DEVFREQ_GOV_SIMPLE_ONDEMAND
-	select PM_OPP
-	help
-	  This adds the DEVFREQ driver for Exynos5250 bus interface (vdd_int).
-	  It reads PPMU counters of memory controllers and adjusts the
-	  operating frequencies and voltages with OPP support.
-
 config ARM_TEGRA_DEVFREQ
        tristate "Tegra DEVFREQ Driver"
        depends on ARCH_TEGRA_124_SOC

diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 5134f9e..09f11d9 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile

@@ -4,10 +4,10 @@
 obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE)	+= governor_performance.o
 obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE)	+= governor_powersave.o
 obj-$(CONFIG_DEVFREQ_GOV_USERSPACE)	+= governor_userspace.o
+obj-$(CONFIG_DEVFREQ_GOV_PASSIVE)	+= governor_passive.o
 
 # DEVFREQ Drivers
-obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ)	+= exynos/
-obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ)	+= exynos/
+obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
 obj-$(CONFIG_ARM_TEGRA_DEVFREQ)		+= tegra-devfreq.o
 
 # DEVFREQ Event Drivers

diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index 38bf144..39b048e 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c

@@ -235,6 +235,11 @@
 
 	mutex_lock(&devfreq_event_list_lock);
 	list_for_each_entry(edev, &devfreq_event_list, node) {
+		if (edev->dev.parent && edev->dev.parent->of_node == node)
+			goto out;
+	}
+
+	list_for_each_entry(edev, &devfreq_event_list, node) {
 		if (!strcmp(edev->desc->name, node->name))
 			goto out;
 	}

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 984c5e9..1d6c803 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c

@@ -25,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
+#include <linux/of.h>
 #include "governor.h"
 
 static struct class *devfreq_class;
@@ -188,6 +189,29 @@
 	return ERR_PTR(-ENODEV);
 }
 
+static int devfreq_notify_transition(struct devfreq *devfreq,
+		struct devfreq_freqs *freqs, unsigned int state)
+{
+	if (!devfreq)
+		return -EINVAL;
+
+	switch (state) {
+	case DEVFREQ_PRECHANGE:
+		srcu_notifier_call_chain(&devfreq->transition_notifier_list,
+				DEVFREQ_PRECHANGE, freqs);
+		break;
+
+	case DEVFREQ_POSTCHANGE:
+		srcu_notifier_call_chain(&devfreq->transition_notifier_list,
+				DEVFREQ_POSTCHANGE, freqs);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* Load monitoring helper functions for governors use */
 
 /**
@@ -199,7 +223,8 @@
  */
 int update_devfreq(struct devfreq *devfreq)
 {
-	unsigned long freq;
+	struct devfreq_freqs freqs;
+	unsigned long freq, cur_freq;
 	int err = 0;
 	u32 flags = 0;
 
@@ -233,10 +258,22 @@
 		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */
 	}
 
+	if (devfreq->profile->get_cur_freq)
+		devfreq->profile->get_cur_freq(devfreq->dev.parent, &cur_freq);
+	else
+		cur_freq = devfreq->previous_freq;
+
+	freqs.old = cur_freq;
+	freqs.new = freq;
+	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
+
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
 	if (err)
 		return err;
 
+	freqs.new = freq;
+	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
+
 	if (devfreq->profile->freq_table)
 		if (devfreq_update_status(devfreq, freq))
 			dev_err(&devfreq->dev,
@@ -541,6 +578,8 @@
 		goto err_out;
 	}
 
+	srcu_init_notifier_head(&devfreq->transition_notifier_list);
+
 	mutex_unlock(&devfreq->lock);
 
 	mutex_lock(&devfreq_list_lock);
@@ -639,6 +678,49 @@
 }
 EXPORT_SYMBOL(devm_devfreq_add_device);
 
+#ifdef CONFIG_OF
+/*
+ * devfreq_get_devfreq_by_phandle - Get the devfreq device from devicetree
+ * @dev - instance to the given device
+ * @index - index into list of devfreq
+ *
+ * return the instance of devfreq device
+ */
+struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
+{
+	struct device_node *node;
+	struct devfreq *devfreq;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	if (!dev->of_node)
+		return ERR_PTR(-EINVAL);
+
+	node = of_parse_phandle(dev->of_node, "devfreq", index);
+	if (!node)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&devfreq_list_lock);
+	list_for_each_entry(devfreq, &devfreq_list, node) {
+		if (devfreq->dev.parent
+			&& devfreq->dev.parent->of_node == node) {
+			mutex_unlock(&devfreq_list_lock);
+			return devfreq;
+		}
+	}
+	mutex_unlock(&devfreq_list_lock);
+
+	return ERR_PTR(-EPROBE_DEFER);
+}
+#else
+struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif /* CONFIG_OF */
+EXPORT_SYMBOL_GPL(devfreq_get_devfreq_by_phandle);
+
 /**
  * devm_devfreq_remove_device() - Resource-managed devfreq_remove_device()
  * @dev:	the device to add devfreq feature.
@@ -1266,6 +1348,129 @@
 }
 EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier);
 
+/**
+ * devfreq_register_notifier() - Register a driver with devfreq
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to register.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devfreq_register_notifier(struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	int ret = 0;
+
+	if (!devfreq)
+		return -EINVAL;
+
+	switch (list) {
+	case DEVFREQ_TRANSITION_NOTIFIER:
+		ret = srcu_notifier_chain_register(
+				&devfreq->transition_notifier_list, nb);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_register_notifier);
+
+/*
+ * devfreq_unregister_notifier() - Unregister a driver with devfreq
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to be unregistered.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devfreq_unregister_notifier(struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	int ret = 0;
+
+	if (!devfreq)
+		return -EINVAL;
+
+	switch (list) {
+	case DEVFREQ_TRANSITION_NOTIFIER:
+		ret = srcu_notifier_chain_unregister(
+				&devfreq->transition_notifier_list, nb);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(devfreq_unregister_notifier);
+
+struct devfreq_notifier_devres {
+	struct devfreq *devfreq;
+	struct notifier_block *nb;
+	unsigned int list;
+};
+
+static void devm_devfreq_notifier_release(struct device *dev, void *res)
+{
+	struct devfreq_notifier_devres *this = res;
+
+	devfreq_unregister_notifier(this->devfreq, this->nb, this->list);
+}
+
+/**
+ * devm_devfreq_register_notifier()
+	- Resource-managed devfreq_register_notifier()
+ * @dev:	The devfreq user device. (parent of devfreq)
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to be unregistered.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devm_devfreq_register_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	struct devfreq_notifier_devres *ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_devfreq_notifier_release, sizeof(*ptr),
+				GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = devfreq_register_notifier(devfreq, nb, list);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	ptr->devfreq = devfreq;
+	ptr->nb = nb;
+	ptr->list = list;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_devfreq_register_notifier);
+
+/**
+ * devm_devfreq_unregister_notifier()
+	- Resource-managed devfreq_unregister_notifier()
+ * @dev:	The devfreq user device. (parent of devfreq)
+ * @devfreq:	The devfreq object.
+ * @nb:		The notifier block to be unregistered.
+ * @list:	DEVFREQ_TRANSITION_NOTIFIER.
+ */
+void devm_devfreq_unregister_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	WARN_ON(devres_release(dev, devm_devfreq_notifier_release,
+			       devm_devfreq_dev_match, devfreq));
+}
+EXPORT_SYMBOL(devm_devfreq_unregister_notifier);
+
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
 MODULE_DESCRIPTION("devfreq class support");
 MODULE_LICENSE("GPL");

diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index a11720a..1e8b4f4 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig

@@ -13,6 +13,14 @@
 
 if PM_DEVFREQ_EVENT
 
+config DEVFREQ_EVENT_EXYNOS_NOCP
+	bool "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
+	depends on ARCH_EXYNOS
+	select PM_OPP
+	help
+	  This add the devfreq-event driver for Exynos SoC. It provides NoC
+	  (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
+
 config DEVFREQ_EVENT_EXYNOS_PPMU
 	bool "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
 	depends on ARCH_EXYNOS

diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile
index be146ea..3d6afd3 100644
--- a/drivers/devfreq/event/Makefile
+++ b/drivers/devfreq/event/Makefile

@@ -1,2 +1,4 @@
 # Exynos DEVFREQ Event Drivers
+
+obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o
 obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o

diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
new file mode 100644
index 0000000..6b6a5f3
--- /dev/null
+++ b/drivers/devfreq/event/exynos-nocp.c

@@ -0,0 +1,304 @@
+/*
+ * exynos-nocp.c - EXYNOS NoC (Network On Chip) Probe support
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/devfreq-event.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "exynos-nocp.h"
+
+struct exynos_nocp {
+	struct devfreq_event_dev *edev;
+	struct devfreq_event_desc desc;
+
+	struct device *dev;
+
+	struct regmap *regmap;
+	struct clk *clk;
+};
+
+/*
+ * The devfreq-event ops structure for nocp probe.
+ */
+static int exynos_nocp_set_event(struct devfreq_event_dev *edev)
+{
+	struct exynos_nocp *nocp = devfreq_event_get_drvdata(edev);
+	int ret;
+
+	/* Disable NoC probe */
+	ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+				NOCP_MAIN_CTL_STATEN_MASK, 0);
+	if (ret < 0) {
+		dev_err(nocp->dev, "failed to disable the NoC probe device\n");
+		return ret;
+	}
+
+	/* Set a statistics dump period to 0 */
+	ret = regmap_write(nocp->regmap, NOCP_STAT_PERIOD, 0x0);
+	if (ret < 0)
+		goto out;
+
+	/* Set the IntEvent fields of *_SRC */
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_0_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_BYTE_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_1_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_CHAIN_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_2_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_CYCLE_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_3_SRC,
+				NOCP_CNT_SRC_INTEVENT_MASK,
+				NOCP_CNT_SRC_INTEVENT_CHAIN_MASK);
+	if (ret < 0)
+		goto out;
+
+
+	/* Set an alarm with a max/min value of 0 to generate StatALARM */
+	ret = regmap_write(nocp->regmap, NOCP_STAT_ALARM_MIN, 0x0);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_write(nocp->regmap, NOCP_STAT_ALARM_MAX, 0x0);
+	if (ret < 0)
+		goto out;
+
+	/* Set AlarmMode */
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_0_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_1_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_2_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_3_ALARM_MODE,
+				NOCP_CNT_ALARM_MODE_MASK,
+				NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+	if (ret < 0)
+		goto out;
+
+	/* Enable the measurements by setting AlarmEn and StatEn */
+	ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+			NOCP_MAIN_CTL_STATEN_MASK | NOCP_MAIN_CTL_ALARMEN_MASK,
+			NOCP_MAIN_CTL_STATEN_MASK | NOCP_MAIN_CTL_ALARMEN_MASK);
+	if (ret < 0)
+		goto out;
+
+	/* Set GlobalEN */
+	ret = regmap_update_bits(nocp->regmap, NOCP_CFG_CTL,
+				NOCP_CFG_CTL_GLOBALEN_MASK,
+				NOCP_CFG_CTL_GLOBALEN_MASK);
+	if (ret < 0)
+		goto out;
+
+	/* Enable NoC probe */
+	ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+				NOCP_MAIN_CTL_STATEN_MASK,
+				NOCP_MAIN_CTL_STATEN_MASK);
+	if (ret < 0)
+		goto out;
+
+	return 0;
+
+out:
+	/* Reset NoC probe */
+	if (regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+				NOCP_MAIN_CTL_STATEN_MASK, 0)) {
+		dev_err(nocp->dev, "Failed to reset NoC probe device\n");
+	}
+
+	return ret;
+}
+
+static int exynos_nocp_get_event(struct devfreq_event_dev *edev,
+				struct devfreq_event_data *edata)
+{
+	struct exynos_nocp *nocp = devfreq_event_get_drvdata(edev);
+	unsigned int counter[4];
+	int ret;
+
+	/* Read cycle count */
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_0_VAL, &counter[0]);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_1_VAL, &counter[1]);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_2_VAL, &counter[2]);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_read(nocp->regmap, NOCP_COUNTERS_3_VAL, &counter[3]);
+	if (ret < 0)
+		goto out;
+
+	edata->load_count = ((counter[1] << 16) | counter[0]);
+	edata->total_count = ((counter[3] << 16) | counter[2]);
+
+	dev_dbg(&edev->dev, "%s (event: %ld/%ld)\n", edev->desc->name,
+					edata->load_count, edata->total_count);
+
+	return 0;
+
+out:
+	edata->load_count = 0;
+	edata->total_count = 0;
+
+	dev_err(nocp->dev, "Failed to read the counter of NoC probe device\n");
+
+	return ret;
+}
+
+static const struct devfreq_event_ops exynos_nocp_ops = {
+	.set_event = exynos_nocp_set_event,
+	.get_event = exynos_nocp_get_event,
+};
+
+static const struct of_device_id exynos_nocp_id_match[] = {
+	{ .compatible = "samsung,exynos5420-nocp", },
+	{ /* sentinel */ },
+};
+
+static struct regmap_config exynos_nocp_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = NOCP_COUNTERS_3_VAL,
+};
+
+static int exynos_nocp_parse_dt(struct platform_device *pdev,
+				struct exynos_nocp *nocp)
+{
+	struct device *dev = nocp->dev;
+	struct device_node *np = dev->of_node;
+	struct resource *res;
+	void __iomem *base;
+
+	if (!np) {
+		dev_err(dev, "failed to find devicetree node\n");
+		return -EINVAL;
+	}
+
+	nocp->clk = devm_clk_get(dev, "nocp");
+	if (IS_ERR(nocp->clk))
+		nocp->clk = NULL;
+
+	/* Maps the memory mapped IO to control nocp register */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	exynos_nocp_regmap_config.max_register = resource_size(res) - 4;
+
+	nocp->regmap = devm_regmap_init_mmio(dev, base,
+					&exynos_nocp_regmap_config);
+	if (IS_ERR(nocp->regmap)) {
+		dev_err(dev, "failed to initialize regmap\n");
+		return PTR_ERR(nocp->regmap);
+	}
+
+	return 0;
+}
+
+static int exynos_nocp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct exynos_nocp *nocp;
+	int ret;
+
+	nocp = devm_kzalloc(&pdev->dev, sizeof(*nocp), GFP_KERNEL);
+	if (!nocp)
+		return -ENOMEM;
+
+	nocp->dev = &pdev->dev;
+
+	/* Parse dt data to get resource */
+	ret = exynos_nocp_parse_dt(pdev, nocp);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"failed to parse devicetree for resource\n");
+		return ret;
+	}
+
+	/* Add devfreq-event device to measure the bandwidth of NoC */
+	nocp->desc.ops = &exynos_nocp_ops;
+	nocp->desc.driver_data = nocp;
+	nocp->desc.name = np->full_name;
+	nocp->edev = devm_devfreq_event_add_edev(&pdev->dev, &nocp->desc);
+	if (IS_ERR(nocp->edev)) {
+		dev_err(&pdev->dev,
+			"failed to add devfreq-event device\n");
+		return PTR_ERR(nocp->edev);
+	}
+	platform_set_drvdata(pdev, nocp);
+
+	clk_prepare_enable(nocp->clk);
+
+	pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
+			dev_name(dev));
+
+	return 0;
+}
+
+static int exynos_nocp_remove(struct platform_device *pdev)
+{
+	struct exynos_nocp *nocp = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(nocp->clk);
+
+	return 0;
+}
+
+static struct platform_driver exynos_nocp_driver = {
+	.probe	= exynos_nocp_probe,
+	.remove	= exynos_nocp_remove,
+	.driver = {
+		.name	= "exynos-nocp",
+		.of_match_table = exynos_nocp_id_match,
+	},
+};
+module_platform_driver(exynos_nocp_driver);
+
+MODULE_DESCRIPTION("Exynos NoC (Network on Chip) Probe driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/devfreq/event/exynos-nocp.h b/drivers/devfreq/event/exynos-nocp.h
new file mode 100644
index 0000000..28564db
--- /dev/null
+++ b/drivers/devfreq/event/exynos-nocp.h

@@ -0,0 +1,78 @@
+/*
+ * exynos-nocp.h - EXYNOS NoC (Network on Chip) Probe header file
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __EXYNOS_NOCP_H__
+#define __EXYNOS_NOCP_H__
+
+enum nocp_reg {
+	NOCP_ID_REVISION_ID		= 0x04,
+	NOCP_MAIN_CTL			= 0x08,
+	NOCP_CFG_CTL			= 0x0C,
+
+	NOCP_STAT_PERIOD		= 0x24,
+	NOCP_STAT_GO			= 0x28,
+	NOCP_STAT_ALARM_MIN		= 0x2C,
+	NOCP_STAT_ALARM_MAX		= 0x30,
+	NOCP_STAT_ALARM_STATUS		= 0x34,
+	NOCP_STAT_ALARM_CLR		= 0x38,
+
+	NOCP_COUNTERS_0_SRC		= 0x138,
+	NOCP_COUNTERS_0_ALARM_MODE	= 0x13C,
+	NOCP_COUNTERS_0_VAL		= 0x140,
+
+	NOCP_COUNTERS_1_SRC		= 0x14C,
+	NOCP_COUNTERS_1_ALARM_MODE	= 0x150,
+	NOCP_COUNTERS_1_VAL		= 0x154,
+
+	NOCP_COUNTERS_2_SRC		= 0x160,
+	NOCP_COUNTERS_2_ALARM_MODE	= 0x164,
+	NOCP_COUNTERS_2_VAL		= 0x168,
+
+	NOCP_COUNTERS_3_SRC		= 0x174,
+	NOCP_COUNTERS_3_ALARM_MODE	= 0x178,
+	NOCP_COUNTERS_3_VAL		= 0x17C,
+};
+
+/* NOCP_MAIN_CTL register */
+#define NOCP_MAIN_CTL_ERREN_MASK		BIT(0)
+#define NOCP_MAIN_CTL_TRACEEN_MASK		BIT(1)
+#define NOCP_MAIN_CTL_PAYLOADEN_MASK		BIT(2)
+#define NOCP_MAIN_CTL_STATEN_MASK		BIT(3)
+#define NOCP_MAIN_CTL_ALARMEN_MASK		BIT(4)
+#define NOCP_MAIN_CTL_STATCONDDUMP_MASK	BIT(5)
+#define NOCP_MAIN_CTL_INTRUSIVEMODE_MASK	BIT(6)
+
+/* NOCP_CFG_CTL register */
+#define NOCP_CFG_CTL_GLOBALEN_MASK		BIT(0)
+#define NOCP_CFG_CTL_ACTIVE_MASK		BIT(1)
+
+/* NOCP_COUNTERS_x_SRC register */
+#define NOCP_CNT_SRC_INTEVENT_SHIFT		0
+#define NOCP_CNT_SRC_INTEVENT_MASK		(0x1F << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_OFF_MASK		(0x0 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_CYCLE_MASK	(0x1 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_IDLE_MASK		(0x2 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_XFER_MASK		(0x3 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_BUSY_MASK		(0x4 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_WAIT_MASK		(0x5 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_PKT_MASK		(0x6 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_BYTE_MASK		(0x8 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_CHAIN_MASK	(0x10 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+
+/* NOCP_COUNTERS_x_ALARM_MODE register */
+#define NOCP_CNT_ALARM_MODE_SHIFT		0
+#define NOCP_CNT_ALARM_MODE_MASK		(0x3 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_OFF_MASK		(0x0 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MIN_MASK		(0x1 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MAX_MASK		(0x2 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MIN_MAX_MASK	(0x3 << NOCP_CNT_ALARM_MODE_SHIFT)
+
+#endif /* __EXYNOS_NOCP_H__ */

diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
new file mode 100644
index 0000000..2363d0a
--- /dev/null
+++ b/drivers/devfreq/exynos-bus.c

@@ -0,0 +1,570 @@
+/*
+ * Generic Exynos Bus frequency driver with DEVFREQ Framework
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This driver support Exynos Bus frequency feature by using
+ * DEVFREQ framework and is based on drivers/devfreq/exynos/exynos4_bus.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq-event.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pm_opp.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#define DEFAULT_SATURATION_RATIO	40
+#define DEFAULT_VOLTAGE_TOLERANCE	2
+
+struct exynos_bus {
+	struct device *dev;
+
+	struct devfreq *devfreq;
+	struct devfreq_event_dev **edev;
+	unsigned int edev_count;
+	struct mutex lock;
+
+	struct dev_pm_opp *curr_opp;
+
+	struct regulator *regulator;
+	struct clk *clk;
+	unsigned int voltage_tolerance;
+	unsigned int ratio;
+};
+
+/*
+ * Control the devfreq-event device to get the current state of bus
+ */
+#define exynos_bus_ops_edev(ops)				\
+static int exynos_bus_##ops(struct exynos_bus *bus)		\
+{								\
+	int i, ret;						\
+								\
+	for (i = 0; i < bus->edev_count; i++) {			\
+		if (!bus->edev[i])				\
+			continue;				\
+		ret = devfreq_event_##ops(bus->edev[i]);	\
+		if (ret < 0)					\
+			return ret;				\
+	}							\
+								\
+	return 0;						\
+}
+exynos_bus_ops_edev(enable_edev);
+exynos_bus_ops_edev(disable_edev);
+exynos_bus_ops_edev(set_event);
+
+static int exynos_bus_get_event(struct exynos_bus *bus,
+				struct devfreq_event_data *edata)
+{
+	struct devfreq_event_data event_data;
+	unsigned long load_count = 0, total_count = 0;
+	int i, ret = 0;
+
+	for (i = 0; i < bus->edev_count; i++) {
+		if (!bus->edev[i])
+			continue;
+
+		ret = devfreq_event_get_event(bus->edev[i], &event_data);
+		if (ret < 0)
+			return ret;
+
+		if (i == 0 || event_data.load_count > load_count) {
+			load_count = event_data.load_count;
+			total_count = event_data.total_count;
+		}
+	}
+
+	edata->load_count = load_count;
+	edata->total_count = total_count;
+
+	return ret;
+}
+
+/*
+ * Must necessary function for devfreq simple-ondemand governor
+ */
+static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	struct dev_pm_opp *new_opp;
+	unsigned long old_freq, new_freq, old_volt, new_volt, tol;
+	int ret = 0;
+
+	/* Get new opp-bus instance according to new bus clock */
+	rcu_read_lock();
+	new_opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(new_opp)) {
+		dev_err(dev, "failed to get recommended opp instance\n");
+		rcu_read_unlock();
+		return PTR_ERR(new_opp);
+	}
+
+	new_freq = dev_pm_opp_get_freq(new_opp);
+	new_volt = dev_pm_opp_get_voltage(new_opp);
+	old_freq = dev_pm_opp_get_freq(bus->curr_opp);
+	old_volt = dev_pm_opp_get_voltage(bus->curr_opp);
+	rcu_read_unlock();
+
+	if (old_freq == new_freq)
+		return 0;
+	tol = new_volt * bus->voltage_tolerance / 100;
+
+	/* Change voltage and frequency according to new OPP level */
+	mutex_lock(&bus->lock);
+
+	if (old_freq < new_freq) {
+		ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol);
+		if (ret < 0) {
+			dev_err(bus->dev, "failed to set voltage\n");
+			goto out;
+		}
+	}
+
+	ret = clk_set_rate(bus->clk, new_freq);
+	if (ret < 0) {
+		dev_err(dev, "failed to change clock of bus\n");
+		clk_set_rate(bus->clk, old_freq);
+		goto out;
+	}
+
+	if (old_freq > new_freq) {
+		ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol);
+		if (ret < 0) {
+			dev_err(bus->dev, "failed to set voltage\n");
+			goto out;
+		}
+	}
+	bus->curr_opp = new_opp;
+
+	dev_dbg(dev, "Set the frequency of bus (%lukHz -> %lukHz)\n",
+			old_freq/1000, new_freq/1000);
+out:
+	mutex_unlock(&bus->lock);
+
+	return ret;
+}
+
+static int exynos_bus_get_dev_status(struct device *dev,
+				     struct devfreq_dev_status *stat)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	struct devfreq_event_data edata;
+	int ret;
+
+	rcu_read_lock();
+	stat->current_frequency = dev_pm_opp_get_freq(bus->curr_opp);
+	rcu_read_unlock();
+
+	ret = exynos_bus_get_event(bus, &edata);
+	if (ret < 0) {
+		stat->total_time = stat->busy_time = 0;
+		goto err;
+	}
+
+	stat->busy_time = (edata.load_count * 100) / bus->ratio;
+	stat->total_time = edata.total_count;
+
+	dev_dbg(dev, "Usage of devfreq-event : %lu/%lu\n", stat->busy_time,
+							stat->total_time);
+
+err:
+	ret = exynos_bus_set_event(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to set event to devfreq-event devices\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void exynos_bus_exit(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = exynos_bus_disable_edev(bus);
+	if (ret < 0)
+		dev_warn(dev, "failed to disable the devfreq-event devices\n");
+
+	if (bus->regulator)
+		regulator_disable(bus->regulator);
+
+	dev_pm_opp_of_remove_table(dev);
+	clk_disable_unprepare(bus->clk);
+}
+
+/*
+ * Must necessary function for devfreq passive governor
+ */
+static int exynos_bus_passive_target(struct device *dev, unsigned long *freq,
+					u32 flags)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	struct dev_pm_opp *new_opp;
+	unsigned long old_freq, new_freq;
+	int ret = 0;
+
+	/* Get new opp-bus instance according to new bus clock */
+	rcu_read_lock();
+	new_opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(new_opp)) {
+		dev_err(dev, "failed to get recommended opp instance\n");
+		rcu_read_unlock();
+		return PTR_ERR(new_opp);
+	}
+
+	new_freq = dev_pm_opp_get_freq(new_opp);
+	old_freq = dev_pm_opp_get_freq(bus->curr_opp);
+	rcu_read_unlock();
+
+	if (old_freq == new_freq)
+		return 0;
+
+	/* Change the frequency according to new OPP level */
+	mutex_lock(&bus->lock);
+
+	ret = clk_set_rate(bus->clk, new_freq);
+	if (ret < 0) {
+		dev_err(dev, "failed to set the clock of bus\n");
+		goto out;
+	}
+
+	*freq = new_freq;
+	bus->curr_opp = new_opp;
+
+	dev_dbg(dev, "Set the frequency of bus (%lukHz -> %lukHz)\n",
+			old_freq/1000, new_freq/1000);
+out:
+	mutex_unlock(&bus->lock);
+
+	return ret;
+}
+
+static void exynos_bus_passive_exit(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+
+	dev_pm_opp_of_remove_table(dev);
+	clk_disable_unprepare(bus->clk);
+}
+
+static int exynos_bus_parent_parse_of(struct device_node *np,
+					struct exynos_bus *bus)
+{
+	struct device *dev = bus->dev;
+	int i, ret, count, size;
+
+	/* Get the regulator to provide each bus with the power */
+	bus->regulator = devm_regulator_get(dev, "vdd");
+	if (IS_ERR(bus->regulator)) {
+		dev_err(dev, "failed to get VDD regulator\n");
+		return PTR_ERR(bus->regulator);
+	}
+
+	ret = regulator_enable(bus->regulator);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable VDD regulator\n");
+		return ret;
+	}
+
+	/*
+	 * Get the devfreq-event devices to get the current utilization of
+	 * buses. This raw data will be used in devfreq ondemand governor.
+	 */
+	count = devfreq_event_get_edev_count(dev);
+	if (count < 0) {
+		dev_err(dev, "failed to get the count of devfreq-event dev\n");
+		ret = count;
+		goto err_regulator;
+	}
+	bus->edev_count = count;
+
+	size = sizeof(*bus->edev) * count;
+	bus->edev = devm_kzalloc(dev, size, GFP_KERNEL);
+	if (!bus->edev) {
+		ret = -ENOMEM;
+		goto err_regulator;
+	}
+
+	for (i = 0; i < count; i++) {
+		bus->edev[i] = devfreq_event_get_edev_by_phandle(dev, i);
+		if (IS_ERR(bus->edev[i])) {
+			ret = -EPROBE_DEFER;
+			goto err_regulator;
+		}
+	}
+
+	/*
+	 * Optionally, Get the saturation ratio according to Exynos SoC
+	 * When measuring the utilization of each AXI bus with devfreq-event
+	 * devices, the measured real cycle might be much lower than the
+	 * total cycle of bus during sampling rate. In result, the devfreq
+	 * simple-ondemand governor might not decide to change the current
+	 * frequency due to too utilization (= real cycle/total cycle).
+	 * So, this property is used to adjust the utilization when calculating
+	 * the busy_time in exynos_bus_get_dev_status().
+	 */
+	if (of_property_read_u32(np, "exynos,saturation-ratio", &bus->ratio))
+		bus->ratio = DEFAULT_SATURATION_RATIO;
+
+	if (of_property_read_u32(np, "exynos,voltage-tolerance",
+					&bus->voltage_tolerance))
+		bus->voltage_tolerance = DEFAULT_VOLTAGE_TOLERANCE;
+
+	return 0;
+
+err_regulator:
+	regulator_disable(bus->regulator);
+
+	return ret;
+}
+
+static int exynos_bus_parse_of(struct device_node *np,
+			      struct exynos_bus *bus)
+{
+	struct device *dev = bus->dev;
+	unsigned long rate;
+	int ret;
+
+	/* Get the clock to provide each bus with source clock */
+	bus->clk = devm_clk_get(dev, "bus");
+	if (IS_ERR(bus->clk)) {
+		dev_err(dev, "failed to get bus clock\n");
+		return PTR_ERR(bus->clk);
+	}
+
+	ret = clk_prepare_enable(bus->clk);
+	if (ret < 0) {
+		dev_err(dev, "failed to get enable clock\n");
+		return ret;
+	}
+
+	/* Get the freq and voltage from OPP table to scale the bus freq */
+	rcu_read_lock();
+	ret = dev_pm_opp_of_add_table(dev);
+	if (ret < 0) {
+		dev_err(dev, "failed to get OPP table\n");
+		rcu_read_unlock();
+		goto err_clk;
+	}
+
+	rate = clk_get_rate(bus->clk);
+	bus->curr_opp = devfreq_recommended_opp(dev, &rate, 0);
+	if (IS_ERR(bus->curr_opp)) {
+		dev_err(dev, "failed to find dev_pm_opp\n");
+		rcu_read_unlock();
+		ret = PTR_ERR(bus->curr_opp);
+		goto err_opp;
+	}
+	rcu_read_unlock();
+
+	return 0;
+
+err_opp:
+	dev_pm_opp_of_remove_table(dev);
+err_clk:
+	clk_disable_unprepare(bus->clk);
+
+	return ret;
+}
+
+static int exynos_bus_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct devfreq_dev_profile *profile;
+	struct devfreq_simple_ondemand_data *ondemand_data;
+	struct devfreq_passive_data *passive_data;
+	struct devfreq *parent_devfreq;
+	struct exynos_bus *bus;
+	int ret, max_state;
+	unsigned long min_freq, max_freq;
+
+	if (!np) {
+		dev_err(dev, "failed to find devicetree node\n");
+		return -EINVAL;
+	}
+
+	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+	mutex_init(&bus->lock);
+	bus->dev = &pdev->dev;
+	platform_set_drvdata(pdev, bus);
+
+	/* Parse the device-tree to get the resource information */
+	ret = exynos_bus_parse_of(np, bus);
+	if (ret < 0)
+		goto err;
+
+	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
+	if (!profile) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	if (of_parse_phandle(dev->of_node, "devfreq", 0))
+		goto passive;
+	else
+		ret = exynos_bus_parent_parse_of(np, bus);
+
+	if (ret < 0)
+		goto err;
+
+	/* Initialize the struct profile and governor data for parent device */
+	profile->polling_ms = 50;
+	profile->target = exynos_bus_target;
+	profile->get_dev_status = exynos_bus_get_dev_status;
+	profile->exit = exynos_bus_exit;
+
+	ondemand_data = devm_kzalloc(dev, sizeof(*ondemand_data), GFP_KERNEL);
+	if (!ondemand_data) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	ondemand_data->upthreshold = 40;
+	ondemand_data->downdifferential = 5;
+
+	/* Add devfreq device to monitor and handle the exynos bus */
+	bus->devfreq = devm_devfreq_add_device(dev, profile, "simple_ondemand",
+						ondemand_data);
+	if (IS_ERR(bus->devfreq)) {
+		dev_err(dev, "failed to add devfreq device\n");
+		ret = PTR_ERR(bus->devfreq);
+		goto err;
+	}
+
+	/* Register opp_notifier to catch the change of OPP  */
+	ret = devm_devfreq_register_opp_notifier(dev, bus->devfreq);
+	if (ret < 0) {
+		dev_err(dev, "failed to register opp notifier\n");
+		goto err;
+	}
+
+	/*
+	 * Enable devfreq-event to get raw data which is used to determine
+	 * current bus load.
+	 */
+	ret = exynos_bus_enable_edev(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable devfreq-event devices\n");
+		goto err;
+	}
+
+	ret = exynos_bus_set_event(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to set event to devfreq-event devices\n");
+		goto err;
+	}
+
+	goto out;
+passive:
+	/* Initialize the struct profile and governor data for passive device */
+	profile->target = exynos_bus_passive_target;
+	profile->exit = exynos_bus_passive_exit;
+
+	/* Get the instance of parent devfreq device */
+	parent_devfreq = devfreq_get_devfreq_by_phandle(dev, 0);
+	if (IS_ERR(parent_devfreq)) {
+		ret = -EPROBE_DEFER;
+		goto err;
+	}
+
+	passive_data = devm_kzalloc(dev, sizeof(*passive_data), GFP_KERNEL);
+	if (!passive_data) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	passive_data->parent = parent_devfreq;
+
+	/* Add devfreq device for exynos bus with passive governor */
+	bus->devfreq = devm_devfreq_add_device(dev, profile, "passive",
+						passive_data);
+	if (IS_ERR(bus->devfreq)) {
+		dev_err(dev,
+			"failed to add devfreq dev with passive governor\n");
+		ret = -EPROBE_DEFER;
+		goto err;
+	}
+
+out:
+	max_state = bus->devfreq->profile->max_state;
+	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
+	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
+	pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
+			dev_name(dev), min_freq, max_freq);
+
+	return 0;
+
+err:
+	dev_pm_opp_of_remove_table(dev);
+	clk_disable_unprepare(bus->clk);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int exynos_bus_resume(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = exynos_bus_enable_edev(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable the devfreq-event devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int exynos_bus_suspend(struct device *dev)
+{
+	struct exynos_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = exynos_bus_disable_edev(bus);
+	if (ret < 0) {
+		dev_err(dev, "failed to disable the devfreq-event devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_bus_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(exynos_bus_suspend, exynos_bus_resume)
+};
+
+static const struct of_device_id exynos_bus_of_match[] = {
+	{ .compatible = "samsung,exynos-bus", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, exynos_bus_of_match);
+
+static struct platform_driver exynos_bus_platdrv = {
+	.probe		= exynos_bus_probe,
+	.driver = {
+		.name	= "exynos-bus",
+		.pm	= &exynos_bus_pm,
+		.of_match_table = of_match_ptr(exynos_bus_of_match),
+	},
+};
+module_platform_driver(exynos_bus_platdrv);
+
+MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/devfreq/exynos/Makefile b/drivers/devfreq/exynos/Makefile
deleted file mode 100644
index 49bc917..0000000
--- a/drivers/devfreq/exynos/Makefile
+++ /dev/null

@@ -1,3 +0,0 @@
-# Exynos DEVFREQ Drivers
-obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ)	+= exynos_ppmu.o exynos4_bus.o
-obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ)	+= exynos_ppmu.o exynos5_bus.o

diff --git a/drivers/devfreq/exynos/exynos4_bus.c b/drivers/devfreq/exynos/exynos4_bus.c
deleted file mode 100644
index da95092..0000000
--- a/drivers/devfreq/exynos/exynos4_bus.c
+++ /dev/null

@@ -1,1055 +0,0 @@
-/* drivers/devfreq/exynos4210_memorybus.c
- *
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *	MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * EXYNOS4 - Memory/Bus clock frequency scaling support in DEVFREQ framework
- *	This version supports EXYNOS4210 only. This changes bus frequencies
- *	and vddint voltages. Exynos4412/4212 should be able to be supported
- *	with minor modifications.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/suspend.h>
-#include <linux/pm_opp.h>
-#include <linux/devfreq.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/module.h>
-
-#include <mach/map.h>
-
-#include "exynos_ppmu.h"
-#include "exynos4_bus.h"
-
-#define MAX_SAFEVOLT	1200000 /* 1.2V */
-
-enum exynos4_busf_type {
-	TYPE_BUSF_EXYNOS4210,
-	TYPE_BUSF_EXYNOS4x12,
-};
-
-/* Assume that the bus is saturated if the utilization is 40% */
-#define BUS_SATURATION_RATIO	40
-
-enum busclk_level_idx {
-	LV_0 = 0,
-	LV_1,
-	LV_2,
-	LV_3,
-	LV_4,
-	_LV_END
-};
-
-enum exynos_ppmu_idx {
-	PPMU_DMC0,
-	PPMU_DMC1,
-	PPMU_END,
-};
-
-#define EX4210_LV_MAX	LV_2
-#define EX4x12_LV_MAX	LV_4
-#define EX4210_LV_NUM	(LV_2 + 1)
-#define EX4x12_LV_NUM	(LV_4 + 1)
-
-/**
- * struct busfreq_opp_info - opp information for bus
- * @rate:	Frequency in hertz
- * @volt:	Voltage in microvolts corresponding to this OPP
- */
-struct busfreq_opp_info {
-	unsigned long rate;
-	unsigned long volt;
-};
-
-struct busfreq_data {
-	enum exynos4_busf_type type;
-	struct device *dev;
-	struct devfreq *devfreq;
-	bool disabled;
-	struct regulator *vdd_int;
-	struct regulator *vdd_mif; /* Exynos4412/4212 only */
-	struct busfreq_opp_info curr_oppinfo;
-	struct busfreq_ppmu_data ppmu_data;
-
-	struct notifier_block pm_notifier;
-	struct mutex lock;
-
-	/* Dividers calculated at boot/probe-time */
-	unsigned int dmc_divtable[_LV_END]; /* DMC0 */
-	unsigned int top_divtable[_LV_END];
-};
-
-/* 4210 controls clock of mif and voltage of int */
-static struct bus_opp_table exynos4210_busclk_table[] = {
-	{LV_0, 400000, 1150000},
-	{LV_1, 267000, 1050000},
-	{LV_2, 133000, 1025000},
-	{0, 0, 0},
-};
-
-/*
- * MIF is the main control knob clock for Exynos4x12 MIF/INT
- * clock and voltage of both mif/int are controlled.
- */
-static struct bus_opp_table exynos4x12_mifclk_table[] = {
-	{LV_0, 400000, 1100000},
-	{LV_1, 267000, 1000000},
-	{LV_2, 160000, 950000},
-	{LV_3, 133000, 950000},
-	{LV_4, 100000, 950000},
-	{0, 0, 0},
-};
-
-/*
- * INT is not the control knob of 4x12. LV_x is not meant to represent
- * the current performance. (MIF does)
- */
-static struct bus_opp_table exynos4x12_intclk_table[] = {
-	{LV_0, 200000, 1000000},
-	{LV_1, 160000, 950000},
-	{LV_2, 133000, 925000},
-	{LV_3, 100000, 900000},
-	{0, 0, 0},
-};
-
-/* TODO: asv volt definitions are "__initdata"? */
-/* Some chips have different operating voltages */
-static unsigned int exynos4210_asv_volt[][EX4210_LV_NUM] = {
-	{1150000, 1050000, 1050000},
-	{1125000, 1025000, 1025000},
-	{1100000, 1000000, 1000000},
-	{1075000, 975000, 975000},
-	{1050000, 950000, 950000},
-};
-
-static unsigned int exynos4x12_mif_step_50[][EX4x12_LV_NUM] = {
-	/* 400      267     160     133     100 */
-	{1050000, 950000, 900000, 900000, 900000}, /* ASV0 */
-	{1050000, 950000, 900000, 900000, 900000}, /* ASV1 */
-	{1050000, 950000, 900000, 900000, 900000}, /* ASV2 */
-	{1050000, 900000, 900000, 900000, 900000}, /* ASV3 */
-	{1050000, 900000, 900000, 900000, 850000}, /* ASV4 */
-	{1050000, 900000, 900000, 850000, 850000}, /* ASV5 */
-	{1050000, 900000, 850000, 850000, 850000}, /* ASV6 */
-	{1050000, 900000, 850000, 850000, 850000}, /* ASV7 */
-	{1050000, 900000, 850000, 850000, 850000}, /* ASV8 */
-};
-
-static unsigned int exynos4x12_int_volt[][EX4x12_LV_NUM] = {
-	/* 200    160      133     100 */
-	{1000000, 950000, 925000, 900000}, /* ASV0 */
-	{975000,  925000, 925000, 900000}, /* ASV1 */
-	{950000,  925000, 900000, 875000}, /* ASV2 */
-	{950000,  900000, 900000, 875000}, /* ASV3 */
-	{925000,  875000, 875000, 875000}, /* ASV4 */
-	{900000,  850000, 850000, 850000}, /* ASV5 */
-	{900000,  850000, 850000, 850000}, /* ASV6 */
-	{900000,  850000, 850000, 850000}, /* ASV7 */
-	{900000,  850000, 850000, 850000}, /* ASV8 */
-};
-
-/*** Clock Divider Data for Exynos4210 ***/
-static unsigned int exynos4210_clkdiv_dmc0[][8] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD
-	 *		DIVDMCP, DIVCOPY2, DIVCORE_TIMERS }
-	 */
-
-	/* DMC L0: 400MHz */
-	{ 3, 1, 1, 1, 1, 1, 3, 1 },
-	/* DMC L1: 266.7MHz */
-	{ 4, 1, 1, 2, 1, 1, 3, 1 },
-	/* DMC L2: 133MHz */
-	{ 5, 1, 1, 5, 1, 1, 3, 1 },
-};
-static unsigned int exynos4210_clkdiv_top[][5] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACLK200, DIVACLK100, DIVACLK160, DIVACLK133, DIVONENAND }
-	 */
-	/* ACLK200 L0: 200MHz */
-	{ 3, 7, 4, 5, 1 },
-	/* ACLK200 L1: 160MHz */
-	{ 4, 7, 5, 6, 1 },
-	/* ACLK200 L2: 133MHz */
-	{ 5, 7, 7, 7, 1 },
-};
-static unsigned int exynos4210_clkdiv_lr_bus[][2] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVGDL/R, DIVGPL/R }
-	 */
-	/* ACLK_GDL/R L1: 200MHz */
-	{ 3, 1 },
-	/* ACLK_GDL/R L2: 160MHz */
-	{ 4, 1 },
-	/* ACLK_GDL/R L3: 133MHz */
-	{ 5, 1 },
-};
-
-/*** Clock Divider Data for Exynos4212/4412 ***/
-static unsigned int exynos4x12_clkdiv_dmc0[][6] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD
-	 *              DIVDMCP}
-	 */
-
-	/* DMC L0: 400MHz */
-	{3, 1, 1, 1, 1, 1},
-	/* DMC L1: 266.7MHz */
-	{4, 1, 1, 2, 1, 1},
-	/* DMC L2: 160MHz */
-	{5, 1, 1, 4, 1, 1},
-	/* DMC L3: 133MHz */
-	{5, 1, 1, 5, 1, 1},
-	/* DMC L4: 100MHz */
-	{7, 1, 1, 7, 1, 1},
-};
-static unsigned int exynos4x12_clkdiv_dmc1[][6] = {
-	/*
-	 * Clock divider value for following
-	 * { G2DACP, DIVC2C, DIVC2C_ACLK }
-	 */
-
-	/* DMC L0: 400MHz */
-	{3, 1, 1},
-	/* DMC L1: 266.7MHz */
-	{4, 2, 1},
-	/* DMC L2: 160MHz */
-	{5, 4, 1},
-	/* DMC L3: 133MHz */
-	{5, 5, 1},
-	/* DMC L4: 100MHz */
-	{7, 7, 1},
-};
-static unsigned int exynos4x12_clkdiv_top[][5] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVACLK266_GPS, DIVACLK100, DIVACLK160,
-		DIVACLK133, DIVONENAND }
-	 */
-
-	/* ACLK_GDL/R L0: 200MHz */
-	{2, 7, 4, 5, 1},
-	/* ACLK_GDL/R L1: 200MHz */
-	{2, 7, 4, 5, 1},
-	/* ACLK_GDL/R L2: 160MHz */
-	{4, 7, 5, 7, 1},
-	/* ACLK_GDL/R L3: 133MHz */
-	{4, 7, 5, 7, 1},
-	/* ACLK_GDL/R L4: 100MHz */
-	{7, 7, 7, 7, 1},
-};
-static unsigned int exynos4x12_clkdiv_lr_bus[][2] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVGDL/R, DIVGPL/R }
-	 */
-
-	/* ACLK_GDL/R L0: 200MHz */
-	{3, 1},
-	/* ACLK_GDL/R L1: 200MHz */
-	{3, 1},
-	/* ACLK_GDL/R L2: 160MHz */
-	{4, 1},
-	/* ACLK_GDL/R L3: 133MHz */
-	{5, 1},
-	/* ACLK_GDL/R L4: 100MHz */
-	{7, 1},
-};
-static unsigned int exynos4x12_clkdiv_sclkip[][3] = {
-	/*
-	 * Clock divider value for following
-	 * { DIVMFC, DIVJPEG, DIVFIMC0~3}
-	 */
-
-	/* SCLK_MFC: 200MHz */
-	{3, 3, 4},
-	/* SCLK_MFC: 200MHz */
-	{3, 3, 4},
-	/* SCLK_MFC: 160MHz */
-	{4, 4, 5},
-	/* SCLK_MFC: 133MHz */
-	{5, 5, 5},
-	/* SCLK_MFC: 100MHz */
-	{7, 7, 7},
-};
-
-
-static int exynos4210_set_busclk(struct busfreq_data *data,
-				 struct busfreq_opp_info *oppi)
-{
-	unsigned int index;
-	unsigned int tmp;
-
-	for (index = LV_0; index < EX4210_LV_NUM; index++)
-		if (oppi->rate == exynos4210_busclk_table[index].clk)
-			break;
-
-	if (index == EX4210_LV_NUM)
-		return -EINVAL;
-
-	/* Change Divider - DMC0 */
-	tmp = data->dmc_divtable[index];
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_DMC0);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC0);
-	} while (tmp & 0x11111111);
-
-	/* Change Divider - TOP */
-	tmp = data->top_divtable[index];
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_TOP);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_TOP);
-	} while (tmp & 0x11111);
-
-	/* Change Divider - LEFTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_LEFTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4210_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4210_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_LEFTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_LEFTBUS);
-	} while (tmp & 0x11);
-
-	/* Change Divider - RIGHTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_RIGHTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4210_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4210_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_RIGHTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_RIGHTBUS);
-	} while (tmp & 0x11);
-
-	return 0;
-}
-
-static int exynos4x12_set_busclk(struct busfreq_data *data,
-				 struct busfreq_opp_info *oppi)
-{
-	unsigned int index;
-	unsigned int tmp;
-
-	for (index = LV_0; index < EX4x12_LV_NUM; index++)
-		if (oppi->rate == exynos4x12_mifclk_table[index].clk)
-			break;
-
-	if (index == EX4x12_LV_NUM)
-		return -EINVAL;
-
-	/* Change Divider - DMC0 */
-	tmp = data->dmc_divtable[index];
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_DMC0);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC0);
-	} while (tmp & 0x11111111);
-
-	/* Change Divider - DMC1 */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_DMC1);
-
-	tmp &= ~(EXYNOS4_CLKDIV_DMC1_G2D_ACP_MASK |
-		EXYNOS4_CLKDIV_DMC1_C2C_MASK |
-		EXYNOS4_CLKDIV_DMC1_C2CACLK_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_dmc1[index][0] <<
-				EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT) |
-		(exynos4x12_clkdiv_dmc1[index][1] <<
-				EXYNOS4_CLKDIV_DMC1_C2C_SHIFT) |
-		(exynos4x12_clkdiv_dmc1[index][2] <<
-				EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_DMC1);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC1);
-	} while (tmp & 0x111111);
-
-	/* Change Divider - TOP */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_TOP);
-
-	tmp &= ~(EXYNOS4_CLKDIV_TOP_ACLK266_GPS_MASK |
-		EXYNOS4_CLKDIV_TOP_ACLK100_MASK |
-		EXYNOS4_CLKDIV_TOP_ACLK160_MASK |
-		EXYNOS4_CLKDIV_TOP_ACLK133_MASK |
-		EXYNOS4_CLKDIV_TOP_ONENAND_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_top[index][0] <<
-				EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT) |
-		(exynos4x12_clkdiv_top[index][1] <<
-				EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT) |
-		(exynos4x12_clkdiv_top[index][2] <<
-				EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT) |
-		(exynos4x12_clkdiv_top[index][3] <<
-				EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT) |
-		(exynos4x12_clkdiv_top[index][4] <<
-				EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_TOP);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_TOP);
-	} while (tmp & 0x11111);
-
-	/* Change Divider - LEFTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_LEFTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4x12_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_LEFTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_LEFTBUS);
-	} while (tmp & 0x11);
-
-	/* Change Divider - RIGHTBUS */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_RIGHTBUS);
-
-	tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] <<
-				EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
-		(exynos4x12_clkdiv_lr_bus[index][1] <<
-				EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_RIGHTBUS);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_RIGHTBUS);
-	} while (tmp & 0x11);
-
-	/* Change Divider - MFC */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_MFC);
-
-	tmp &= ~(EXYNOS4_CLKDIV_MFC_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_sclkip[index][0] <<
-				EXYNOS4_CLKDIV_MFC_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_MFC);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_MFC);
-	} while (tmp & 0x1);
-
-	/* Change Divider - JPEG */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_CAM1);
-
-	tmp &= ~(EXYNOS4_CLKDIV_CAM1_JPEG_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_sclkip[index][1] <<
-				EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_CAM1);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_CAM1);
-	} while (tmp & 0x1);
-
-	/* Change Divider - FIMC0~3 */
-	tmp = __raw_readl(EXYNOS4_CLKDIV_CAM);
-
-	tmp &= ~(EXYNOS4_CLKDIV_CAM_FIMC0_MASK | EXYNOS4_CLKDIV_CAM_FIMC1_MASK |
-		EXYNOS4_CLKDIV_CAM_FIMC2_MASK | EXYNOS4_CLKDIV_CAM_FIMC3_MASK);
-
-	tmp |= ((exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT) |
-		(exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT) |
-		(exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT) |
-		(exynos4x12_clkdiv_sclkip[index][2] <<
-				EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT));
-
-	__raw_writel(tmp, EXYNOS4_CLKDIV_CAM);
-
-	do {
-		tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_CAM1);
-	} while (tmp & 0x1111);
-
-	return 0;
-}
-
-static int exynos4x12_get_intspec(unsigned long mifclk)
-{
-	int i = 0;
-
-	while (exynos4x12_intclk_table[i].clk) {
-		if (exynos4x12_intclk_table[i].clk <= mifclk)
-			return i;
-		i++;
-	}
-
-	return -EINVAL;
-}
-
-static int exynos4_bus_setvolt(struct busfreq_data *data,
-			       struct busfreq_opp_info *oppi,
-			       struct busfreq_opp_info *oldoppi)
-{
-	int err = 0, tmp;
-	unsigned long volt = oppi->volt;
-
-	switch (data->type) {
-	case TYPE_BUSF_EXYNOS4210:
-		/* OPP represents DMC clock + INT voltage */
-		err = regulator_set_voltage(data->vdd_int, volt,
-					    MAX_SAFEVOLT);
-		break;
-	case TYPE_BUSF_EXYNOS4x12:
-		/* OPP represents MIF clock + MIF voltage */
-		err = regulator_set_voltage(data->vdd_mif, volt,
-					    MAX_SAFEVOLT);
-		if (err)
-			break;
-
-		tmp = exynos4x12_get_intspec(oppi->rate);
-		if (tmp < 0) {
-			err = tmp;
-			regulator_set_voltage(data->vdd_mif,
-					      oldoppi->volt,
-					      MAX_SAFEVOLT);
-			break;
-		}
-		err = regulator_set_voltage(data->vdd_int,
-					    exynos4x12_intclk_table[tmp].volt,
-					    MAX_SAFEVOLT);
-		/*  Try to recover */
-		if (err)
-			regulator_set_voltage(data->vdd_mif,
-					      oldoppi->volt,
-					      MAX_SAFEVOLT);
-		break;
-	default:
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
-static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
-			      u32 flags)
-{
-	int err = 0;
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data *data = platform_get_drvdata(pdev);
-	struct dev_pm_opp *opp;
-	unsigned long freq;
-	unsigned long old_freq = data->curr_oppinfo.rate;
-	struct busfreq_opp_info	new_oppinfo;
-
-	rcu_read_lock();
-	opp = devfreq_recommended_opp(dev, _freq, flags);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		return PTR_ERR(opp);
-	}
-	new_oppinfo.rate = dev_pm_opp_get_freq(opp);
-	new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-	freq = new_oppinfo.rate;
-
-	if (old_freq == freq)
-		return 0;
-
-	dev_dbg(dev, "targeting %lukHz %luuV\n", freq, new_oppinfo.volt);
-
-	mutex_lock(&data->lock);
-
-	if (data->disabled)
-		goto out;
-
-	if (old_freq < freq)
-		err = exynos4_bus_setvolt(data, &new_oppinfo,
-					  &data->curr_oppinfo);
-	if (err)
-		goto out;
-
-	if (old_freq != freq) {
-		switch (data->type) {
-		case TYPE_BUSF_EXYNOS4210:
-			err = exynos4210_set_busclk(data, &new_oppinfo);
-			break;
-		case TYPE_BUSF_EXYNOS4x12:
-			err = exynos4x12_set_busclk(data, &new_oppinfo);
-			break;
-		default:
-			err = -EINVAL;
-		}
-	}
-	if (err)
-		goto out;
-
-	if (old_freq > freq)
-		err = exynos4_bus_setvolt(data, &new_oppinfo,
-					  &data->curr_oppinfo);
-	if (err)
-		goto out;
-
-	data->curr_oppinfo = new_oppinfo;
-out:
-	mutex_unlock(&data->lock);
-	return err;
-}
-
-static int exynos4_bus_get_dev_status(struct device *dev,
-				      struct devfreq_dev_status *stat)
-{
-	struct busfreq_data *data = dev_get_drvdata(dev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-	int busier;
-
-	exynos_read_ppmu(ppmu_data);
-	busier = exynos_get_busier_ppmu(ppmu_data);
-	stat->current_frequency = data->curr_oppinfo.rate;
-
-	/* Number of cycles spent on memory access */
-	stat->busy_time = ppmu_data->ppmu[busier].count[PPMU_PMNCNT3];
-	stat->busy_time *= 100 / BUS_SATURATION_RATIO;
-	stat->total_time = ppmu_data->ppmu[busier].ccnt;
-
-	/* If the counters have overflown, retry */
-	if (ppmu_data->ppmu[busier].ccnt_overflow ||
-	    ppmu_data->ppmu[busier].count_overflow[0])
-		return -EAGAIN;
-
-	return 0;
-}
-
-static struct devfreq_dev_profile exynos4_devfreq_profile = {
-	.initial_freq	= 400000,
-	.polling_ms	= 50,
-	.target		= exynos4_bus_target,
-	.get_dev_status	= exynos4_bus_get_dev_status,
-};
-
-static int exynos4210_init_tables(struct busfreq_data *data)
-{
-	u32 tmp;
-	int mgrp;
-	int i, err = 0;
-
-	tmp = __raw_readl(EXYNOS4_CLKDIV_DMC0);
-	for (i = LV_0; i < EX4210_LV_NUM; i++) {
-		tmp &= ~(EXYNOS4_CLKDIV_DMC0_ACP_MASK |
-			EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK |
-			EXYNOS4_CLKDIV_DMC0_DPHY_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMC_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCD_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCP_MASK |
-			EXYNOS4_CLKDIV_DMC0_COPY2_MASK |
-			EXYNOS4_CLKDIV_DMC0_CORETI_MASK);
-
-		tmp |= ((exynos4210_clkdiv_dmc0[i][0] <<
-					EXYNOS4_CLKDIV_DMC0_ACP_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][1] <<
-					EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][2] <<
-					EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][3] <<
-					EXYNOS4_CLKDIV_DMC0_DMC_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][4] <<
-					EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][5] <<
-					EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][6] <<
-					EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT) |
-			(exynos4210_clkdiv_dmc0[i][7] <<
-					EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT));
-
-		data->dmc_divtable[i] = tmp;
-	}
-
-	tmp = __raw_readl(EXYNOS4_CLKDIV_TOP);
-	for (i = LV_0; i <  EX4210_LV_NUM; i++) {
-		tmp &= ~(EXYNOS4_CLKDIV_TOP_ACLK200_MASK |
-			EXYNOS4_CLKDIV_TOP_ACLK100_MASK |
-			EXYNOS4_CLKDIV_TOP_ACLK160_MASK |
-			EXYNOS4_CLKDIV_TOP_ACLK133_MASK |
-			EXYNOS4_CLKDIV_TOP_ONENAND_MASK);
-
-		tmp |= ((exynos4210_clkdiv_top[i][0] <<
-					EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT) |
-			(exynos4210_clkdiv_top[i][1] <<
-					EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT) |
-			(exynos4210_clkdiv_top[i][2] <<
-					EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT) |
-			(exynos4210_clkdiv_top[i][3] <<
-					EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT) |
-			(exynos4210_clkdiv_top[i][4] <<
-					EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT));
-
-		data->top_divtable[i] = tmp;
-	}
-
-	/*
-	 * TODO: init tmp based on busfreq_data
-	 * (device-tree or platform-data)
-	 */
-	tmp = 0; /* Max voltages for the reliability of the unknown */
-
-	pr_debug("ASV Group of Exynos4 is %d\n", tmp);
-	/* Use merged grouping for voltage */
-	switch (tmp) {
-	case 0:
-		mgrp = 0;
-		break;
-	case 1:
-	case 2:
-		mgrp = 1;
-		break;
-	case 3:
-	case 4:
-		mgrp = 2;
-		break;
-	case 5:
-	case 6:
-		mgrp = 3;
-		break;
-	case 7:
-		mgrp = 4;
-		break;
-	default:
-		pr_warn("Unknown ASV Group. Use max voltage.\n");
-		mgrp = 0;
-	}
-
-	for (i = LV_0; i < EX4210_LV_NUM; i++)
-		exynos4210_busclk_table[i].volt = exynos4210_asv_volt[mgrp][i];
-
-	for (i = LV_0; i < EX4210_LV_NUM; i++) {
-		err = dev_pm_opp_add(data->dev, exynos4210_busclk_table[i].clk,
-			      exynos4210_busclk_table[i].volt);
-		if (err) {
-			dev_err(data->dev, "Cannot add opp entries.\n");
-			return err;
-		}
-	}
-
-
-	return 0;
-}
-
-static int exynos4x12_init_tables(struct busfreq_data *data)
-{
-	unsigned int i;
-	unsigned int tmp;
-	int ret;
-
-	/* Enable pause function for DREX2 DVFS */
-	tmp = __raw_readl(EXYNOS4_DMC_PAUSE_CTRL);
-	tmp |= EXYNOS4_DMC_PAUSE_ENABLE;
-	__raw_writel(tmp, EXYNOS4_DMC_PAUSE_CTRL);
-
-	tmp = __raw_readl(EXYNOS4_CLKDIV_DMC0);
-
-	for (i = 0; i <  EX4x12_LV_NUM; i++) {
-		tmp &= ~(EXYNOS4_CLKDIV_DMC0_ACP_MASK |
-			EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK |
-			EXYNOS4_CLKDIV_DMC0_DPHY_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMC_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCD_MASK |
-			EXYNOS4_CLKDIV_DMC0_DMCP_MASK);
-
-		tmp |= ((exynos4x12_clkdiv_dmc0[i][0] <<
-					EXYNOS4_CLKDIV_DMC0_ACP_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][1] <<
-					EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][2] <<
-					EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][3] <<
-					EXYNOS4_CLKDIV_DMC0_DMC_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][4] <<
-					EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT) |
-			(exynos4x12_clkdiv_dmc0[i][5] <<
-					EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT));
-
-		data->dmc_divtable[i] = tmp;
-	}
-
-	tmp = 0; /* Max voltages for the reliability of the unknown */
-
-	if (tmp > 8)
-		tmp = 0;
-	pr_debug("ASV Group of Exynos4x12 is %d\n", tmp);
-
-	for (i = 0; i < EX4x12_LV_NUM; i++) {
-		exynos4x12_mifclk_table[i].volt =
-			exynos4x12_mif_step_50[tmp][i];
-		exynos4x12_intclk_table[i].volt =
-			exynos4x12_int_volt[tmp][i];
-	}
-
-	for (i = 0; i < EX4x12_LV_NUM; i++) {
-		ret = dev_pm_opp_add(data->dev, exynos4x12_mifclk_table[i].clk,
-			      exynos4x12_mifclk_table[i].volt);
-		if (ret) {
-			dev_err(data->dev, "Fail to add opp entries.\n");
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this,
-		unsigned long event, void *ptr)
-{
-	struct busfreq_data *data = container_of(this, struct busfreq_data,
-						 pm_notifier);
-	struct dev_pm_opp *opp;
-	struct busfreq_opp_info	new_oppinfo;
-	unsigned long maxfreq = ULONG_MAX;
-	int err = 0;
-
-	switch (event) {
-	case PM_SUSPEND_PREPARE:
-		/* Set Fastest and Deactivate DVFS */
-		mutex_lock(&data->lock);
-
-		data->disabled = true;
-
-		rcu_read_lock();
-		opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
-		if (IS_ERR(opp)) {
-			rcu_read_unlock();
-			dev_err(data->dev, "%s: unable to find a min freq\n",
-				__func__);
-			mutex_unlock(&data->lock);
-			return PTR_ERR(opp);
-		}
-		new_oppinfo.rate = dev_pm_opp_get_freq(opp);
-		new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
-		rcu_read_unlock();
-
-		err = exynos4_bus_setvolt(data, &new_oppinfo,
-					  &data->curr_oppinfo);
-		if (err)
-			goto unlock;
-
-		switch (data->type) {
-		case TYPE_BUSF_EXYNOS4210:
-			err = exynos4210_set_busclk(data, &new_oppinfo);
-			break;
-		case TYPE_BUSF_EXYNOS4x12:
-			err = exynos4x12_set_busclk(data, &new_oppinfo);
-			break;
-		default:
-			err = -EINVAL;
-		}
-		if (err)
-			goto unlock;
-
-		data->curr_oppinfo = new_oppinfo;
-unlock:
-		mutex_unlock(&data->lock);
-		if (err)
-			return err;
-		return NOTIFY_OK;
-	case PM_POST_RESTORE:
-	case PM_POST_SUSPEND:
-		/* Reactivate */
-		mutex_lock(&data->lock);
-		data->disabled = false;
-		mutex_unlock(&data->lock);
-		return NOTIFY_OK;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int exynos4_busfreq_probe(struct platform_device *pdev)
-{
-	struct busfreq_data *data;
-	struct busfreq_ppmu_data *ppmu_data;
-	struct dev_pm_opp *opp;
-	struct device *dev = &pdev->dev;
-	int err = 0;
-
-	data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data), GFP_KERNEL);
-	if (data == NULL) {
-		dev_err(dev, "Cannot allocate memory.\n");
-		return -ENOMEM;
-	}
-
-	ppmu_data = &data->ppmu_data;
-	ppmu_data->ppmu_end = PPMU_END;
-	ppmu_data->ppmu = devm_kzalloc(dev,
-				       sizeof(struct exynos_ppmu) * PPMU_END,
-				       GFP_KERNEL);
-	if (!ppmu_data->ppmu) {
-		dev_err(dev, "Failed to allocate memory for exynos_ppmu\n");
-		return -ENOMEM;
-	}
-
-	data->type = pdev->id_entry->driver_data;
-	ppmu_data->ppmu[PPMU_DMC0].hw_base = S5P_VA_DMC0;
-	ppmu_data->ppmu[PPMU_DMC1].hw_base = S5P_VA_DMC1;
-	data->pm_notifier.notifier_call = exynos4_busfreq_pm_notifier_event;
-	data->dev = dev;
-	mutex_init(&data->lock);
-
-	switch (data->type) {
-	case TYPE_BUSF_EXYNOS4210:
-		err = exynos4210_init_tables(data);
-		break;
-	case TYPE_BUSF_EXYNOS4x12:
-		err = exynos4x12_init_tables(data);
-		break;
-	default:
-		dev_err(dev, "Cannot determine the device id %d\n", data->type);
-		err = -EINVAL;
-	}
-	if (err) {
-		dev_err(dev, "Cannot initialize busfreq table %d\n",
-			     data->type);
-		return err;
-	}
-
-	data->vdd_int = devm_regulator_get(dev, "vdd_int");
-	if (IS_ERR(data->vdd_int)) {
-		dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
-		return PTR_ERR(data->vdd_int);
-	}
-	if (data->type == TYPE_BUSF_EXYNOS4x12) {
-		data->vdd_mif = devm_regulator_get(dev, "vdd_mif");
-		if (IS_ERR(data->vdd_mif)) {
-			dev_err(dev, "Cannot get the regulator \"vdd_mif\"\n");
-			return PTR_ERR(data->vdd_mif);
-		}
-	}
-
-	rcu_read_lock();
-	opp = dev_pm_opp_find_freq_floor(dev,
-					 &exynos4_devfreq_profile.initial_freq);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		dev_err(dev, "Invalid initial frequency %lu kHz.\n",
-			exynos4_devfreq_profile.initial_freq);
-		return PTR_ERR(opp);
-	}
-	data->curr_oppinfo.rate = dev_pm_opp_get_freq(opp);
-	data->curr_oppinfo.volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-
-	platform_set_drvdata(pdev, data);
-
-	data->devfreq = devm_devfreq_add_device(dev, &exynos4_devfreq_profile,
-					   "simple_ondemand", NULL);
-	if (IS_ERR(data->devfreq))
-		return PTR_ERR(data->devfreq);
-
-	/*
-	 * Start PPMU (Performance Profiling Monitoring Unit) to check
-	 * utilization of each IP in the Exynos4 SoC.
-	 */
-	busfreq_mon_reset(ppmu_data);
-
-	/* Register opp_notifier for Exynos4 busfreq */
-	err = devm_devfreq_register_opp_notifier(dev, data->devfreq);
-	if (err < 0) {
-		dev_err(dev, "Failed to register opp notifier\n");
-		return err;
-	}
-
-	/* Register pm_notifier for Exynos4 busfreq */
-	err = register_pm_notifier(&data->pm_notifier);
-	if (err) {
-		dev_err(dev, "Failed to setup pm notifier\n");
-		return err;
-	}
-
-	return 0;
-}
-
-static int exynos4_busfreq_remove(struct platform_device *pdev)
-{
-	struct busfreq_data *data = platform_get_drvdata(pdev);
-
-	/* Unregister all of notifier chain */
-	unregister_pm_notifier(&data->pm_notifier);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos4_busfreq_resume(struct device *dev)
-{
-	struct busfreq_data *data = dev_get_drvdata(dev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-
-	busfreq_mon_reset(ppmu_data);
-	return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(exynos4_busfreq_pm_ops, NULL, exynos4_busfreq_resume);
-
-static const struct platform_device_id exynos4_busfreq_id[] = {
-	{ "exynos4210-busfreq", TYPE_BUSF_EXYNOS4210 },
-	{ "exynos4412-busfreq", TYPE_BUSF_EXYNOS4x12 },
-	{ "exynos4212-busfreq", TYPE_BUSF_EXYNOS4x12 },
-	{ },
-};
-
-static struct platform_driver exynos4_busfreq_driver = {
-	.probe	= exynos4_busfreq_probe,
-	.remove	= exynos4_busfreq_remove,
-	.id_table = exynos4_busfreq_id,
-	.driver = {
-		.name	= "exynos4-busfreq",
-		.pm	= &exynos4_busfreq_pm_ops,
-	},
-};
-
-static int __init exynos4_busfreq_init(void)
-{
-	return platform_driver_register(&exynos4_busfreq_driver);
-}
-late_initcall(exynos4_busfreq_init);
-
-static void __exit exynos4_busfreq_exit(void)
-{
-	platform_driver_unregister(&exynos4_busfreq_driver);
-}
-module_exit(exynos4_busfreq_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EXYNOS4 busfreq driver with devfreq framework");
-MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");

diff --git a/drivers/devfreq/exynos/exynos4_bus.h b/drivers/devfreq/exynos/exynos4_bus.h
deleted file mode 100644
index 94c73c1..0000000
--- a/drivers/devfreq/exynos/exynos4_bus.h
+++ /dev/null

@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS4 BUS header
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __DEVFREQ_EXYNOS4_BUS_H
-#define __DEVFREQ_EXYNOS4_BUS_H __FILE__
-
-#include <mach/map.h>
-
-#define EXYNOS4_CLKDIV_LEFTBUS			(S5P_VA_CMU + 0x04500)
-#define EXYNOS4_CLKDIV_STAT_LEFTBUS		(S5P_VA_CMU + 0x04600)
-
-#define EXYNOS4_CLKDIV_RIGHTBUS			(S5P_VA_CMU + 0x08500)
-#define EXYNOS4_CLKDIV_STAT_RIGHTBUS		(S5P_VA_CMU + 0x08600)
-
-#define EXYNOS4_CLKDIV_TOP			(S5P_VA_CMU + 0x0C510)
-#define EXYNOS4_CLKDIV_CAM			(S5P_VA_CMU + 0x0C520)
-#define EXYNOS4_CLKDIV_MFC			(S5P_VA_CMU + 0x0C528)
-
-#define EXYNOS4_CLKDIV_STAT_TOP			(S5P_VA_CMU + 0x0C610)
-#define EXYNOS4_CLKDIV_STAT_MFC			(S5P_VA_CMU + 0x0C628)
-
-#define EXYNOS4210_CLKGATE_IP_IMAGE		(S5P_VA_CMU + 0x0C930)
-#define EXYNOS4212_CLKGATE_IP_IMAGE		(S5P_VA_CMU + 0x04930)
-
-#define EXYNOS4_CLKDIV_DMC0			(S5P_VA_CMU + 0x10500)
-#define EXYNOS4_CLKDIV_DMC1			(S5P_VA_CMU + 0x10504)
-#define EXYNOS4_CLKDIV_STAT_DMC0		(S5P_VA_CMU + 0x10600)
-#define EXYNOS4_CLKDIV_STAT_DMC1		(S5P_VA_CMU + 0x10604)
-
-#define EXYNOS4_DMC_PAUSE_CTRL			(S5P_VA_CMU + 0x11094)
-#define EXYNOS4_DMC_PAUSE_ENABLE		(1 << 0)
-
-#define EXYNOS4_CLKDIV_DMC0_ACP_SHIFT		(0)
-#define EXYNOS4_CLKDIV_DMC0_ACP_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_ACP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT	(4)
-#define EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK	(0x7 << EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT		(8)
-#define EXYNOS4_CLKDIV_DMC0_DPHY_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMC_SHIFT		(12)
-#define EXYNOS4_CLKDIV_DMC0_DMC_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DMC_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT		(16)
-#define EXYNOS4_CLKDIV_DMC0_DMCD_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT		(20)
-#define EXYNOS4_CLKDIV_DMC0_DMCP_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT		(24)
-#define EXYNOS4_CLKDIV_DMC0_COPY2_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT	(28)
-#define EXYNOS4_CLKDIV_DMC0_CORETI_MASK		(0x7 << EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT)
-
-#define EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT	(0)
-#define EXYNOS4_CLKDIV_DMC1_G2D_ACP_MASK	(0xf << EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_C2C_SHIFT		(4)
-#define EXYNOS4_CLKDIV_DMC1_C2C_MASK		(0x7 << EXYNOS4_CLKDIV_DMC1_C2C_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_PWI_SHIFT		(8)
-#define EXYNOS4_CLKDIV_DMC1_PWI_MASK		(0xf << EXYNOS4_CLKDIV_DMC1_PWI_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT	(12)
-#define EXYNOS4_CLKDIV_DMC1_C2CACLK_MASK	(0x7 << EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_DVSEM_SHIFT		(16)
-#define EXYNOS4_CLKDIV_DMC1_DVSEM_MASK		(0x7f << EXYNOS4_CLKDIV_DMC1_DVSEM_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_DPM_SHIFT		(24)
-#define EXYNOS4_CLKDIV_DMC1_DPM_MASK		(0x7f << EXYNOS4_CLKDIV_DMC1_DPM_SHIFT)
-
-#define EXYNOS4_CLKDIV_MFC_SHIFT		(0)
-#define EXYNOS4_CLKDIV_MFC_MASK			(0x7 << EXYNOS4_CLKDIV_MFC_SHIFT)
-
-#define EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT	(0)
-#define EXYNOS4_CLKDIV_TOP_ACLK200_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT	(4)
-#define EXYNOS4_CLKDIV_TOP_ACLK100_MASK		(0xF << EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT	(8)
-#define EXYNOS4_CLKDIV_TOP_ACLK160_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT	(12)
-#define EXYNOS4_CLKDIV_TOP_ACLK133_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT	(16)
-#define EXYNOS4_CLKDIV_TOP_ONENAND_MASK		(0x7 << EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT	(20)
-#define EXYNOS4_CLKDIV_TOP_ACLK266_GPS_MASK	(0x7 << EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_SHIFT	(24)
-#define EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_MASK	(0x7 << EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_SHIFT)
-
-#define EXYNOS4_CLKDIV_BUS_GDLR_SHIFT		(0)
-#define EXYNOS4_CLKDIV_BUS_GDLR_MASK		(0x7 << EXYNOS4_CLKDIV_BUS_GDLR_SHIFT)
-#define EXYNOS4_CLKDIV_BUS_GPLR_SHIFT		(4)
-#define EXYNOS4_CLKDIV_BUS_GPLR_MASK		(0x7 << EXYNOS4_CLKDIV_BUS_GPLR_SHIFT)
-
-#define EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT		(0)
-#define EXYNOS4_CLKDIV_CAM_FIMC0_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT		(4)
-#define EXYNOS4_CLKDIV_CAM_FIMC1_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT		(8)
-#define EXYNOS4_CLKDIV_CAM_FIMC2_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT		(12)
-#define EXYNOS4_CLKDIV_CAM_FIMC3_MASK		(0xf << EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT)
-
-#define EXYNOS4_CLKDIV_CAM1			(S5P_VA_CMU + 0x0C568)
-
-#define EXYNOS4_CLKDIV_STAT_CAM1		(S5P_VA_CMU + 0x0C668)
-
-#define EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT		(0)
-#define EXYNOS4_CLKDIV_CAM1_JPEG_MASK		(0xf << EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT)
-
-#endif /* __DEVFREQ_EXYNOS4_BUS_H */

diff --git a/drivers/devfreq/exynos/exynos5_bus.c b/drivers/devfreq/exynos/exynos5_bus.c
deleted file mode 100644
index 297ea30..0000000
--- a/drivers/devfreq/exynos/exynos5_bus.c
+++ /dev/null

@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS5 INT clock frequency scaling support using DEVFREQ framework
- * Based on work done by Jonghwan Choi <jhbird.choi@samsung.com>
- * Support for only EXYNOS5250 is present.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/devfreq.h>
-#include <linux/io.h>
-#include <linux/pm_opp.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/pm_qos.h>
-#include <linux/regulator/consumer.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-
-#include "exynos_ppmu.h"
-
-#define MAX_SAFEVOLT			1100000 /* 1.10V */
-/* Assume that the bus is saturated if the utilization is 25% */
-#define INT_BUS_SATURATION_RATIO	25
-
-enum int_level_idx {
-	LV_0,
-	LV_1,
-	LV_2,
-	LV_3,
-	LV_4,
-	_LV_END
-};
-
-enum exynos_ppmu_list {
-	PPMU_RIGHT,
-	PPMU_END,
-};
-
-struct busfreq_data_int {
-	struct device *dev;
-	struct devfreq *devfreq;
-	struct regulator *vdd_int;
-	struct busfreq_ppmu_data ppmu_data;
-	unsigned long curr_freq;
-	bool disabled;
-
-	struct notifier_block pm_notifier;
-	struct mutex lock;
-	struct pm_qos_request int_req;
-	struct clk *int_clk;
-};
-
-struct int_bus_opp_table {
-	unsigned int idx;
-	unsigned long clk;
-	unsigned long volt;
-};
-
-static struct int_bus_opp_table exynos5_int_opp_table[] = {
-	{LV_0, 266000, 1025000},
-	{LV_1, 200000, 1025000},
-	{LV_2, 160000, 1025000},
-	{LV_3, 133000, 1025000},
-	{LV_4, 100000, 1025000},
-	{0, 0, 0},
-};
-
-static int exynos5_int_setvolt(struct busfreq_data_int *data,
-				unsigned long volt)
-{
-	return regulator_set_voltage(data->vdd_int, volt, MAX_SAFEVOLT);
-}
-
-static int exynos5_busfreq_int_target(struct device *dev, unsigned long *_freq,
-			      u32 flags)
-{
-	int err = 0;
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct dev_pm_opp *opp;
-	unsigned long old_freq, freq;
-	unsigned long volt;
-
-	rcu_read_lock();
-	opp = devfreq_recommended_opp(dev, _freq, flags);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		dev_err(dev, "%s: Invalid OPP.\n", __func__);
-		return PTR_ERR(opp);
-	}
-
-	freq = dev_pm_opp_get_freq(opp);
-	volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-
-	old_freq = data->curr_freq;
-
-	if (old_freq == freq)
-		return 0;
-
-	dev_dbg(dev, "targeting %lukHz %luuV\n", freq, volt);
-
-	mutex_lock(&data->lock);
-
-	if (data->disabled)
-		goto out;
-
-	if (freq > exynos5_int_opp_table[0].clk)
-		pm_qos_update_request(&data->int_req, freq * 16 / 1000);
-	else
-		pm_qos_update_request(&data->int_req, -1);
-
-	if (old_freq < freq)
-		err = exynos5_int_setvolt(data, volt);
-	if (err)
-		goto out;
-
-	err = clk_set_rate(data->int_clk, freq * 1000);
-
-	if (err)
-		goto out;
-
-	if (old_freq > freq)
-		err = exynos5_int_setvolt(data, volt);
-	if (err)
-		goto out;
-
-	data->curr_freq = freq;
-out:
-	mutex_unlock(&data->lock);
-	return err;
-}
-
-static int exynos5_int_get_dev_status(struct device *dev,
-				      struct devfreq_dev_status *stat)
-{
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-	int busier_dmc;
-
-	exynos_read_ppmu(ppmu_data);
-	busier_dmc = exynos_get_busier_ppmu(ppmu_data);
-
-	stat->current_frequency = data->curr_freq;
-
-	/* Number of cycles spent on memory access */
-	stat->busy_time = ppmu_data->ppmu[busier_dmc].count[PPMU_PMNCNT3];
-	stat->busy_time *= 100 / INT_BUS_SATURATION_RATIO;
-	stat->total_time = ppmu_data->ppmu[busier_dmc].ccnt;
-
-	return 0;
-}
-
-static struct devfreq_dev_profile exynos5_devfreq_int_profile = {
-	.initial_freq		= 160000,
-	.polling_ms		= 100,
-	.target			= exynos5_busfreq_int_target,
-	.get_dev_status		= exynos5_int_get_dev_status,
-};
-
-static int exynos5250_init_int_tables(struct busfreq_data_int *data)
-{
-	int i, err = 0;
-
-	for (i = LV_0; i < _LV_END; i++) {
-		err = dev_pm_opp_add(data->dev, exynos5_int_opp_table[i].clk,
-				exynos5_int_opp_table[i].volt);
-		if (err) {
-			dev_err(data->dev, "Cannot add opp entries.\n");
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-static int exynos5_busfreq_int_pm_notifier_event(struct notifier_block *this,
-		unsigned long event, void *ptr)
-{
-	struct busfreq_data_int *data = container_of(this,
-					struct busfreq_data_int, pm_notifier);
-	struct dev_pm_opp *opp;
-	unsigned long maxfreq = ULONG_MAX;
-	unsigned long freq;
-	unsigned long volt;
-	int err = 0;
-
-	switch (event) {
-	case PM_SUSPEND_PREPARE:
-		/* Set Fastest and Deactivate DVFS */
-		mutex_lock(&data->lock);
-
-		data->disabled = true;
-
-		rcu_read_lock();
-		opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
-		if (IS_ERR(opp)) {
-			rcu_read_unlock();
-			err = PTR_ERR(opp);
-			goto unlock;
-		}
-		freq = dev_pm_opp_get_freq(opp);
-		volt = dev_pm_opp_get_voltage(opp);
-		rcu_read_unlock();
-
-		err = exynos5_int_setvolt(data, volt);
-		if (err)
-			goto unlock;
-
-		err = clk_set_rate(data->int_clk, freq * 1000);
-
-		if (err)
-			goto unlock;
-
-		data->curr_freq = freq;
-unlock:
-		mutex_unlock(&data->lock);
-		if (err)
-			return NOTIFY_BAD;
-		return NOTIFY_OK;
-	case PM_POST_RESTORE:
-	case PM_POST_SUSPEND:
-		/* Reactivate */
-		mutex_lock(&data->lock);
-		data->disabled = false;
-		mutex_unlock(&data->lock);
-		return NOTIFY_OK;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int exynos5_busfreq_int_probe(struct platform_device *pdev)
-{
-	struct busfreq_data_int *data;
-	struct busfreq_ppmu_data *ppmu_data;
-	struct dev_pm_opp *opp;
-	struct device *dev = &pdev->dev;
-	struct device_node *np;
-	unsigned long initial_freq;
-	unsigned long initial_volt;
-	int err = 0;
-	int i;
-
-	data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data_int),
-				GFP_KERNEL);
-	if (data == NULL) {
-		dev_err(dev, "Cannot allocate memory.\n");
-		return -ENOMEM;
-	}
-
-	ppmu_data = &data->ppmu_data;
-	ppmu_data->ppmu_end = PPMU_END;
-	ppmu_data->ppmu = devm_kzalloc(dev,
-				       sizeof(struct exynos_ppmu) * PPMU_END,
-				       GFP_KERNEL);
-	if (!ppmu_data->ppmu) {
-		dev_err(dev, "Failed to allocate memory for exynos_ppmu\n");
-		return -ENOMEM;
-	}
-
-	np = of_find_compatible_node(NULL, NULL, "samsung,exynos5250-ppmu");
-	if (np == NULL) {
-		pr_err("Unable to find PPMU node\n");
-		return -ENOENT;
-	}
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		/* map PPMU memory region */
-		ppmu_data->ppmu[i].hw_base = of_iomap(np, i);
-		if (ppmu_data->ppmu[i].hw_base == NULL) {
-			dev_err(&pdev->dev, "failed to map memory region\n");
-			return -ENOMEM;
-		}
-	}
-	data->pm_notifier.notifier_call = exynos5_busfreq_int_pm_notifier_event;
-	data->dev = dev;
-	mutex_init(&data->lock);
-
-	err = exynos5250_init_int_tables(data);
-	if (err)
-		return err;
-
-	data->vdd_int = devm_regulator_get(dev, "vdd_int");
-	if (IS_ERR(data->vdd_int)) {
-		dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
-		return PTR_ERR(data->vdd_int);
-	}
-
-	data->int_clk = devm_clk_get(dev, "int_clk");
-	if (IS_ERR(data->int_clk)) {
-		dev_err(dev, "Cannot get clock \"int_clk\"\n");
-		return PTR_ERR(data->int_clk);
-	}
-
-	rcu_read_lock();
-	opp = dev_pm_opp_find_freq_floor(dev,
-			&exynos5_devfreq_int_profile.initial_freq);
-	if (IS_ERR(opp)) {
-		rcu_read_unlock();
-		dev_err(dev, "Invalid initial frequency %lu kHz.\n",
-		       exynos5_devfreq_int_profile.initial_freq);
-		return PTR_ERR(opp);
-	}
-	initial_freq = dev_pm_opp_get_freq(opp);
-	initial_volt = dev_pm_opp_get_voltage(opp);
-	rcu_read_unlock();
-	data->curr_freq = initial_freq;
-
-	err = clk_set_rate(data->int_clk, initial_freq * 1000);
-	if (err) {
-		dev_err(dev, "Failed to set initial frequency\n");
-		return err;
-	}
-
-	err = exynos5_int_setvolt(data, initial_volt);
-	if (err)
-		return err;
-
-	platform_set_drvdata(pdev, data);
-
-	busfreq_mon_reset(ppmu_data);
-
-	data->devfreq = devm_devfreq_add_device(dev, &exynos5_devfreq_int_profile,
-					   "simple_ondemand", NULL);
-	if (IS_ERR(data->devfreq))
-		return PTR_ERR(data->devfreq);
-
-	err = devm_devfreq_register_opp_notifier(dev, data->devfreq);
-	if (err < 0) {
-		dev_err(dev, "Failed to register opp notifier\n");
-		return err;
-	}
-
-	err = register_pm_notifier(&data->pm_notifier);
-	if (err) {
-		dev_err(dev, "Failed to setup pm notifier\n");
-		return err;
-	}
-
-	/* TODO: Add a new QOS class for int/mif bus */
-	pm_qos_add_request(&data->int_req, PM_QOS_NETWORK_THROUGHPUT, -1);
-
-	return 0;
-}
-
-static int exynos5_busfreq_int_remove(struct platform_device *pdev)
-{
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-
-	pm_qos_remove_request(&data->int_req);
-	unregister_pm_notifier(&data->pm_notifier);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos5_busfreq_int_resume(struct device *dev)
-{
-	struct platform_device *pdev = container_of(dev, struct platform_device,
-						    dev);
-	struct busfreq_data_int *data = platform_get_drvdata(pdev);
-	struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-
-	busfreq_mon_reset(ppmu_data);
-	return 0;
-}
-static const struct dev_pm_ops exynos5_busfreq_int_pm = {
-	.resume	= exynos5_busfreq_int_resume,
-};
-#endif
-static SIMPLE_DEV_PM_OPS(exynos5_busfreq_int_pm_ops, NULL,
-			 exynos5_busfreq_int_resume);
-
-/* platform device pointer for exynos5 devfreq device. */
-static struct platform_device *exynos5_devfreq_pdev;
-
-static struct platform_driver exynos5_busfreq_int_driver = {
-	.probe		= exynos5_busfreq_int_probe,
-	.remove		= exynos5_busfreq_int_remove,
-	.driver		= {
-		.name		= "exynos5-bus-int",
-		.pm		= &exynos5_busfreq_int_pm_ops,
-	},
-};
-
-static int __init exynos5_busfreq_int_init(void)
-{
-	int ret;
-
-	ret = platform_driver_register(&exynos5_busfreq_int_driver);
-	if (ret < 0)
-		goto out;
-
-	exynos5_devfreq_pdev =
-		platform_device_register_simple("exynos5-bus-int", -1, NULL, 0);
-	if (IS_ERR(exynos5_devfreq_pdev)) {
-		ret = PTR_ERR(exynos5_devfreq_pdev);
-		goto out1;
-	}
-
-	return 0;
-out1:
-	platform_driver_unregister(&exynos5_busfreq_int_driver);
-out:
-	return ret;
-}
-late_initcall(exynos5_busfreq_int_init);
-
-static void __exit exynos5_busfreq_int_exit(void)
-{
-	platform_device_unregister(exynos5_devfreq_pdev);
-	platform_driver_unregister(&exynos5_busfreq_int_driver);
-}
-module_exit(exynos5_busfreq_int_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EXYNOS5 busfreq driver with devfreq framework");

diff --git a/drivers/devfreq/exynos/exynos_ppmu.c b/drivers/devfreq/exynos/exynos_ppmu.c
deleted file mode 100644
index 97b75e5..0000000
--- a/drivers/devfreq/exynos/exynos_ppmu.c
+++ /dev/null

@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS - PPMU support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/io.h>
-
-#include "exynos_ppmu.h"
-
-void exynos_ppmu_reset(void __iomem *ppmu_base)
-{
-	__raw_writel(PPMU_CYCLE_RESET | PPMU_COUNTER_RESET, ppmu_base);
-	__raw_writel(PPMU_ENABLE_CYCLE  |
-		     PPMU_ENABLE_COUNT0 |
-		     PPMU_ENABLE_COUNT1 |
-		     PPMU_ENABLE_COUNT2 |
-		     PPMU_ENABLE_COUNT3,
-		     ppmu_base + PPMU_CNTENS);
-}
-
-void exynos_ppmu_setevent(void __iomem *ppmu_base, unsigned int ch,
-			unsigned int evt)
-{
-	__raw_writel(evt, ppmu_base + PPMU_BEVTSEL(ch));
-}
-
-void exynos_ppmu_start(void __iomem *ppmu_base)
-{
-	__raw_writel(PPMU_ENABLE, ppmu_base);
-}
-
-void exynos_ppmu_stop(void __iomem *ppmu_base)
-{
-	__raw_writel(PPMU_DISABLE, ppmu_base);
-}
-
-unsigned int exynos_ppmu_read(void __iomem *ppmu_base, unsigned int ch)
-{
-	unsigned int total;
-
-	if (ch == PPMU_PMNCNT3)
-		total = ((__raw_readl(ppmu_base + PMCNT_OFFSET(ch)) << 8) |
-			  __raw_readl(ppmu_base + PMCNT_OFFSET(ch + 1)));
-	else
-		total = __raw_readl(ppmu_base + PMCNT_OFFSET(ch));
-
-	return total;
-}
-
-void busfreq_mon_reset(struct busfreq_ppmu_data *ppmu_data)
-{
-	unsigned int i;
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		void __iomem *ppmu_base = ppmu_data->ppmu[i].hw_base;
-
-		/* Reset the performance and cycle counters */
-		exynos_ppmu_reset(ppmu_base);
-
-		/* Setup count registers to monitor read/write transactions */
-		ppmu_data->ppmu[i].event[PPMU_PMNCNT3] = RDWR_DATA_COUNT;
-		exynos_ppmu_setevent(ppmu_base, PPMU_PMNCNT3,
-					ppmu_data->ppmu[i].event[PPMU_PMNCNT3]);
-
-		exynos_ppmu_start(ppmu_base);
-	}
-}
-EXPORT_SYMBOL(busfreq_mon_reset);
-
-void exynos_read_ppmu(struct busfreq_ppmu_data *ppmu_data)
-{
-	int i, j;
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		void __iomem *ppmu_base = ppmu_data->ppmu[i].hw_base;
-
-		exynos_ppmu_stop(ppmu_base);
-
-		/* Update local data from PPMU */
-		ppmu_data->ppmu[i].ccnt = __raw_readl(ppmu_base + PPMU_CCNT);
-
-		for (j = PPMU_PMNCNT0; j < PPMU_PMNCNT_MAX; j++) {
-			if (ppmu_data->ppmu[i].event[j] == 0)
-				ppmu_data->ppmu[i].count[j] = 0;
-			else
-				ppmu_data->ppmu[i].count[j] =
-					exynos_ppmu_read(ppmu_base, j);
-		}
-	}
-
-	busfreq_mon_reset(ppmu_data);
-}
-EXPORT_SYMBOL(exynos_read_ppmu);
-
-int exynos_get_busier_ppmu(struct busfreq_ppmu_data *ppmu_data)
-{
-	unsigned int count = 0;
-	int i, j, busy = 0;
-
-	for (i = 0; i < ppmu_data->ppmu_end; i++) {
-		for (j = PPMU_PMNCNT0; j < PPMU_PMNCNT_MAX; j++) {
-			if (ppmu_data->ppmu[i].count[j] > count) {
-				count = ppmu_data->ppmu[i].count[j];
-				busy = i;
-			}
-		}
-	}
-
-	return busy;
-}
-EXPORT_SYMBOL(exynos_get_busier_ppmu);

diff --git a/drivers/devfreq/exynos/exynos_ppmu.h b/drivers/devfreq/exynos/exynos_ppmu.h
deleted file mode 100644
index 71f17ba..0000000
--- a/drivers/devfreq/exynos/exynos_ppmu.h
+++ /dev/null

@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com/
- *
- * EXYNOS PPMU header
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __DEVFREQ_EXYNOS_PPMU_H
-#define __DEVFREQ_EXYNOS_PPMU_H __FILE__
-
-#include <linux/ktime.h>
-
-/* For PPMU Control */
-#define PPMU_ENABLE             BIT(0)
-#define PPMU_DISABLE            0x0
-#define PPMU_CYCLE_RESET        BIT(1)
-#define PPMU_COUNTER_RESET      BIT(2)
-
-#define PPMU_ENABLE_COUNT0      BIT(0)
-#define PPMU_ENABLE_COUNT1      BIT(1)
-#define PPMU_ENABLE_COUNT2      BIT(2)
-#define PPMU_ENABLE_COUNT3      BIT(3)
-#define PPMU_ENABLE_CYCLE       BIT(31)
-
-#define PPMU_CNTENS		0x10
-#define PPMU_FLAG		0x50
-#define PPMU_CCNT_OVERFLOW	BIT(31)
-#define PPMU_CCNT		0x100
-
-#define PPMU_PMCNT0		0x110
-#define PPMU_PMCNT_OFFSET	0x10
-#define PMCNT_OFFSET(x)		(PPMU_PMCNT0 + (PPMU_PMCNT_OFFSET * x))
-
-#define PPMU_BEVT0SEL		0x1000
-#define PPMU_BEVTSEL_OFFSET	0x100
-#define PPMU_BEVTSEL(x)		(PPMU_BEVT0SEL + (ch * PPMU_BEVTSEL_OFFSET))
-
-/* For Event Selection */
-#define RD_DATA_COUNT		0x5
-#define WR_DATA_COUNT		0x6
-#define RDWR_DATA_COUNT		0x7
-
-enum ppmu_counter {
-	PPMU_PMNCNT0,
-	PPMU_PMCCNT1,
-	PPMU_PMNCNT2,
-	PPMU_PMNCNT3,
-	PPMU_PMNCNT_MAX,
-};
-
-struct bus_opp_table {
-	unsigned int idx;
-	unsigned long clk;
-	unsigned long volt;
-};
-
-struct exynos_ppmu {
-	void __iomem *hw_base;
-	unsigned int ccnt;
-	unsigned int event[PPMU_PMNCNT_MAX];
-	unsigned int count[PPMU_PMNCNT_MAX];
-	unsigned long long ns;
-	ktime_t reset_time;
-	bool ccnt_overflow;
-	bool count_overflow[PPMU_PMNCNT_MAX];
-};
-
-struct busfreq_ppmu_data {
-	struct exynos_ppmu *ppmu;
-	int ppmu_end;
-};
-
-void exynos_ppmu_reset(void __iomem *ppmu_base);
-void exynos_ppmu_setevent(void __iomem *ppmu_base, unsigned int ch,
-			unsigned int evt);
-void exynos_ppmu_start(void __iomem *ppmu_base);
-void exynos_ppmu_stop(void __iomem *ppmu_base);
-unsigned int exynos_ppmu_read(void __iomem *ppmu_base, unsigned int ch);
-void busfreq_mon_reset(struct busfreq_ppmu_data *ppmu_data);
-void exynos_read_ppmu(struct busfreq_ppmu_data *ppmu_data);
-int exynos_get_busier_ppmu(struct busfreq_ppmu_data *ppmu_data);
-#endif /* __DEVFREQ_EXYNOS_PPMU_H */

diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
new file mode 100644
index 0000000..9ef46e2
--- /dev/null
+++ b/drivers/devfreq/governor_passive.c

@@ -0,0 +1,205 @@
+/*
+ * linux/drivers/devfreq/governor_passive.c
+ *
+ * Copyright (C) 2016 Samsung Electronics
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/devfreq.h>
+#include "governor.h"
+
+static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
+					unsigned long *freq)
+{
+	struct devfreq_passive_data *p_data
+			= (struct devfreq_passive_data *)devfreq->data;
+	struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
+	unsigned long child_freq = ULONG_MAX;
+	struct dev_pm_opp *opp;
+	int i, count, ret = 0;
+
+	/*
+	 * If the devfreq device with passive governor has the specific method
+	 * to determine the next frequency, should use the get_target_freq()
+	 * of struct devfreq_passive_data.
+	 */
+	if (p_data->get_target_freq) {
+		ret = p_data->get_target_freq(devfreq, freq);
+		goto out;
+	}
+
+	/*
+	 * If the parent and passive devfreq device uses the OPP table,
+	 * get the next frequency by using the OPP table.
+	 */
+
+	/*
+	 * - parent devfreq device uses the governors except for passive.
+	 * - passive devfreq device uses the passive governor.
+	 *
+	 * Each devfreq has the OPP table. After deciding the new frequency
+	 * from the governor of parent devfreq device, the passive governor
+	 * need to get the index of new frequency on OPP table of parent
+	 * device. And then the index is used for getting the suitable
+	 * new frequency for passive devfreq device.
+	 */
+	if (!devfreq->profile || !devfreq->profile->freq_table
+		|| devfreq->profile->max_state <= 0)
+		return -EINVAL;
+
+	/*
+	 * The passive governor have to get the correct frequency from OPP
+	 * list of parent device. Because in this case, *freq is temporary
+	 * value which is decided by ondemand governor.
+	 */
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(parent_devfreq->dev.parent, freq, 0);
+	rcu_read_unlock();
+	if (IS_ERR(opp)) {
+		ret = PTR_ERR(opp);
+		goto out;
+	}
+
+	/*
+	 * Get the OPP table's index of decided freqeuncy by governor
+	 * of parent device.
+	 */
+	for (i = 0; i < parent_devfreq->profile->max_state; i++)
+		if (parent_devfreq->profile->freq_table[i] == *freq)
+			break;
+
+	if (i == parent_devfreq->profile->max_state) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Get the suitable frequency by using index of parent device. */
+	if (i < devfreq->profile->max_state) {
+		child_freq = devfreq->profile->freq_table[i];
+	} else {
+		count = devfreq->profile->max_state;
+		child_freq = devfreq->profile->freq_table[count - 1];
+	}
+
+	/* Return the suitable frequency for passive device. */
+	*freq = child_freq;
+
+out:
+	return ret;
+}
+
+static int update_devfreq_passive(struct devfreq *devfreq, unsigned long freq)
+{
+	int ret;
+
+	if (!devfreq->governor)
+		return -EINVAL;
+
+	mutex_lock_nested(&devfreq->lock, SINGLE_DEPTH_NESTING);
+
+	ret = devfreq->governor->get_target_freq(devfreq, &freq);
+	if (ret < 0)
+		goto out;
+
+	ret = devfreq->profile->target(devfreq->dev.parent, &freq, 0);
+	if (ret < 0)
+		goto out;
+
+	devfreq->previous_freq = freq;
+
+out:
+	mutex_unlock(&devfreq->lock);
+
+	return 0;
+}
+
+static int devfreq_passive_notifier_call(struct notifier_block *nb,
+				unsigned long event, void *ptr)
+{
+	struct devfreq_passive_data *data
+			= container_of(nb, struct devfreq_passive_data, nb);
+	struct devfreq *devfreq = (struct devfreq *)data->this;
+	struct devfreq *parent = (struct devfreq *)data->parent;
+	struct devfreq_freqs *freqs = (struct devfreq_freqs *)ptr;
+	unsigned long freq = freqs->new;
+
+	switch (event) {
+	case DEVFREQ_PRECHANGE:
+		if (parent->previous_freq > freq)
+			update_devfreq_passive(devfreq, freq);
+		break;
+	case DEVFREQ_POSTCHANGE:
+		if (parent->previous_freq < freq)
+			update_devfreq_passive(devfreq, freq);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int devfreq_passive_event_handler(struct devfreq *devfreq,
+				unsigned int event, void *data)
+{
+	struct device *dev = devfreq->dev.parent;
+	struct devfreq_passive_data *p_data
+			= (struct devfreq_passive_data *)devfreq->data;
+	struct devfreq *parent = (struct devfreq *)p_data->parent;
+	struct notifier_block *nb = &p_data->nb;
+	int ret = 0;
+
+	if (!parent)
+		return -EPROBE_DEFER;
+
+	switch (event) {
+	case DEVFREQ_GOV_START:
+		if (!p_data->this)
+			p_data->this = devfreq;
+
+		nb->notifier_call = devfreq_passive_notifier_call;
+		ret = devm_devfreq_register_notifier(dev, parent, nb,
+					DEVFREQ_TRANSITION_NOTIFIER);
+		break;
+	case DEVFREQ_GOV_STOP:
+		devm_devfreq_unregister_notifier(dev, parent, nb,
+					DEVFREQ_TRANSITION_NOTIFIER);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static struct devfreq_governor devfreq_passive = {
+	.name = "passive",
+	.get_target_freq = devfreq_passive_get_target_freq,
+	.event_handler = devfreq_passive_event_handler,
+};
+
+static int __init devfreq_passive_init(void)
+{
+	return devfreq_add_governor(&devfreq_passive);
+}
+subsys_initcall(devfreq_passive_init);
+
+static void __exit devfreq_passive_exit(void)
+{
+	int ret;
+
+	ret = devfreq_remove_governor(&devfreq_passive);
+	if (ret)
+		pr_err("%s: failed remove governor %d\n", __func__, ret);
+}
+module_exit(devfreq_passive_exit);
+
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_DESCRIPTION("DEVFREQ Passive governor");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 37755e6..6ca7474 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig

@@ -378,12 +378,11 @@
 
 config EDAC_ALTERA_L2C
 	bool "Altera L2 Cache ECC"
-	depends on EDAC_ALTERA=y
-	select CACHE_L2X0
+	depends on EDAC_ALTERA=y && CACHE_L2X0
 	help
 	  Support for error detection and correction on the
 	  Altera L2 cache Memory for Altera SoCs. This option
-	  requires L2 cache so it will force that selection.
+	  requires L2 cache.
 
 config EDAC_ALTERA_OCRAM
 	bool "Altera On-Chip RAM ECC"

diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 63e4209..5b4d223 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c

@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -78,27 +79,6 @@
 	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
 };
 
-/************************** EDAC Device Defines **************************/
-
-/* OCRAM ECC Management Group Defines */
-#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
-#define ALTR_OCR_ECC_EN                 BIT(0)
-#define ALTR_OCR_ECC_INJS               BIT(1)
-#define ALTR_OCR_ECC_INJD               BIT(2)
-#define ALTR_OCR_ECC_SERR               BIT(3)
-#define ALTR_OCR_ECC_DERR               BIT(4)
-
-/* L2 ECC Management Group Defines */
-#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
-#define ALTR_L2_ECC_EN                  BIT(0)
-#define ALTR_L2_ECC_INJS                BIT(1)
-#define ALTR_L2_ECC_INJD                BIT(2)
-
-#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
-#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
-#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
-#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
-
 /*********************** EDAC Memory Controller Functions ****************/
 
 /* The SDRAM controller uses the EDAC Memory Controller framework.       */
@@ -252,8 +232,8 @@
 }
 
 static const struct of_device_id altr_sdram_ctrl_of_match[] = {
-	{ .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
-	{ .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
+	{ .compatible = "altr,sdram-edac", .data = &c5_data},
+	{ .compatible = "altr,sdram-edac-a10", .data = &a10_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -570,28 +550,8 @@
 
 const struct edac_device_prv_data ocramecc_data;
 const struct edac_device_prv_data l2ecc_data;
-
-struct edac_device_prv_data {
-	int (*setup)(struct platform_device *pdev, void __iomem *base);
-	int ce_clear_mask;
-	int ue_clear_mask;
-	char dbgfs_name[20];
-	void * (*alloc_mem)(size_t size, void **other);
-	void (*free_mem)(void *p, size_t size, void *other);
-	int ecc_enable_mask;
-	int ce_set_mask;
-	int ue_set_mask;
-	int trig_alloc_sz;
-};
-
-struct altr_edac_device_dev {
-	void __iomem *base;
-	int sb_irq;
-	int db_irq;
-	const struct edac_device_prv_data *data;
-	struct dentry *debugfs_dir;
-	char *edac_dev_name;
-};
+const struct edac_device_prv_data a10_ocramecc_data;
+const struct edac_device_prv_data a10_l2ecc_data;
 
 static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
 {
@@ -665,8 +625,9 @@
 		if (ACCESS_ONCE(ptemp[i]))
 			result = -1;
 		/* Toggle Error bit (it is latched), leave ECC enabled */
-		writel(error_mask, drvdata->base);
-		writel(priv->ecc_enable_mask, drvdata->base);
+		writel(error_mask, (drvdata->base + priv->set_err_ofst));
+		writel(priv->ecc_enable_mask, (drvdata->base +
+					       priv->set_err_ofst));
 		ptemp[i] = i;
 	}
 	/* Ensure it has been written out */
@@ -694,6 +655,16 @@
 	.llseek = generic_file_llseek,
 };
 
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+					 const char __user *user_buf,
+					 size_t count, loff_t *ppos);
+
+static const struct file_operations altr_edac_a10_device_inject_fops = {
+	.open = simple_open,
+	.write = altr_edac_a10_device_trig,
+	.llseek = generic_file_llseek,
+};
+
 static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
 				      const struct edac_device_prv_data *priv)
 {
@@ -708,17 +679,18 @@
 
 	if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR,
 				      drvdata->debugfs_dir, edac_dci,
-				      &altr_edac_device_inject_fops))
+				      priv->inject_fops))
 		debugfs_remove_recursive(drvdata->debugfs_dir);
 }
 
 static const struct of_device_id altr_edac_device_of_match[] = {
 #ifdef CONFIG_EDAC_ALTERA_L2C
-	{ .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data },
+	{ .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data },
+	{ .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data },
 #endif
 #ifdef CONFIG_EDAC_ALTERA_OCRAM
-	{ .compatible = "altr,socfpga-ocram-ecc",
-	  .data = (void *)&ocramecc_data },
+	{ .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data },
+	{ .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data },
 #endif
 	{},
 };
@@ -789,7 +761,7 @@
 
 	/* Check specific dependencies for the module */
 	if (drvdata->data->setup) {
-		res = drvdata->data->setup(pdev, drvdata->base);
+		res = drvdata->data->setup(drvdata);
 		if (res)
 			goto fail1;
 	}
@@ -856,6 +828,25 @@
 /*********************** OCRAM EDAC Device Functions *********************/
 
 #ifdef CONFIG_EDAC_ALTERA_OCRAM
+/*
+ *  Test for memory's ECC dependencies upon entry because platform specific
+ *  startup should have initialized the memory and enabled the ECC.
+ *  Can't turn on ECC here because accessing un-initialized memory will
+ *  cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+	void __iomem  *base = device->base;
+	const struct edac_device_prv_data *prv = device->data;
+
+	if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask)
+		return 0;
+
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s: No ECC present or ECC disabled.\n",
+		    device->edac_dev_name);
+	return -ENODEV;
+}
 
 static void *ocram_alloc_mem(size_t size, void **other)
 {
@@ -891,36 +882,53 @@
 	gen_pool_free((struct gen_pool *)other, (u32)p, size);
 }
 
-/*
- * altr_ocram_check_deps()
- *	Test for OCRAM cache ECC dependencies upon entry because
- *	platform specific startup should have initialized the
- *	On-Chip RAM memory and enabled the ECC.
- *	Can't turn on ECC here because accessing un-initialized
- *	memory will cause CE/UE errors possibly causing an ABORT.
- */
-static int altr_ocram_check_deps(struct platform_device *pdev,
-				 void __iomem *base)
+static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci,
+					 bool sberr)
 {
-	if (readl(base) & ALTR_OCR_ECC_EN)
-		return 0;
+	void __iomem  *base = dci->base;
 
-	edac_printk(KERN_ERR, EDAC_DEVICE,
-		    "OCRAM: No ECC present or ECC disabled.\n");
-	return -ENODEV;
+	if (sberr) {
+		writel(ALTR_A10_ECC_SERRPENA,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+	} else {
+		writel(ALTR_A10_ECC_DERRPENA,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+	}
+	return IRQ_HANDLED;
 }
 
 const struct edac_device_prv_data ocramecc_data = {
-	.setup = altr_ocram_check_deps,
+	.setup = altr_check_ecc_deps,
 	.ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
 	.ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
 	.dbgfs_name = "altr_ocram_trigger",
 	.alloc_mem = ocram_alloc_mem,
 	.free_mem = ocram_free_mem,
 	.ecc_enable_mask = ALTR_OCR_ECC_EN,
+	.ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET,
 	.ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS),
 	.ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD),
+	.set_err_ofst = ALTR_OCR_ECC_REG_OFFSET,
 	.trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE,
+	.inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_ocramecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM,
+	.dbgfs_name = "altr_ocram_trigger",
+	.ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
 };
 
 #endif	/* CONFIG_EDAC_ALTERA_OCRAM */
@@ -966,10 +974,13 @@
  *	Bail if ECC is not enabled.
  *	Note that L2 Cache Enable is forced at build time.
  */
-static int altr_l2_check_deps(struct platform_device *pdev,
-			      void __iomem *base)
+static int altr_l2_check_deps(struct altr_edac_device_dev *device)
 {
-	if (readl(base) & ALTR_L2_ECC_EN)
+	void __iomem *base = device->base;
+	const struct edac_device_prv_data *prv = device->data;
+
+	if ((readl(base) & prv->ecc_enable_mask) ==
+	     prv->ecc_enable_mask)
 		return 0;
 
 	edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -977,6 +988,24 @@
 	return -ENODEV;
 }
 
+static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci,
+					bool sberr)
+{
+	if (sberr) {
+		regmap_write(dci->edac->ecc_mgr_map,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
+		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+	} else {
+		regmap_write(dci->edac->ecc_mgr_map,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+			     A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
+		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+	}
+	return IRQ_HANDLED;
+}
+
 const struct edac_device_prv_data l2ecc_data = {
 	.setup = altr_l2_check_deps,
 	.ce_clear_mask = 0,
@@ -987,11 +1016,252 @@
 	.ecc_enable_mask = ALTR_L2_ECC_EN,
 	.ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS),
 	.ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD),
+	.set_err_ofst = ALTR_L2_ECC_REG_OFFSET,
 	.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+	.inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_l2ecc_data = {
+	.setup = altr_l2_check_deps,
+	.ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR,
+	.ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR,
+	.irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2,
+	.dbgfs_name = "altr_l2_trigger",
+	.alloc_mem = l2_alloc_mem,
+	.free_mem = l2_free_mem,
+	.ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL,
+	.ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK,
+	.ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK,
+	.set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST,
+	.ecc_irq_handler = altr_edac_a10_l2_irq,
+	.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+	.inject_fops = &altr_edac_device_inject_fops,
 };
 
 #endif	/* CONFIG_EDAC_ALTERA_L2C */
 
+/********************* Arria10 EDAC Device Functions *************************/
+
+/*
+ * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5
+ * because 2 IRQs are shared among the all ECC peripherals. The ECC
+ * manager manages the IRQs and the children.
+ * Based on xgene_edac.c peripheral code.
+ */
+
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+					 const char __user *user_buf,
+					 size_t count, loff_t *ppos)
+{
+	struct edac_device_ctl_info *edac_dci = file->private_data;
+	struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+	const struct edac_device_prv_data *priv = drvdata->data;
+	void __iomem *set_addr = (drvdata->base + priv->set_err_ofst);
+	unsigned long flags;
+	u8 trig_type;
+
+	if (!user_buf || get_user(trig_type, user_buf))
+		return -EFAULT;
+
+	local_irq_save(flags);
+	if (trig_type == ALTR_UE_TRIGGER_CHAR)
+		writel(priv->ue_set_mask, set_addr);
+	else
+		writel(priv->ce_set_mask, set_addr);
+	/* Ensure the interrupt test bits are set */
+	wmb();
+	local_irq_restore(flags);
+
+	return count;
+}
+
+static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id)
+{
+	irqreturn_t rc = IRQ_NONE;
+	struct altr_arria10_edac *edac = dev_id;
+	struct altr_edac_device_dev *dci;
+	int irq_status;
+	bool sberr = (irq == edac->sb_irq) ? 1 : 0;
+	int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST :
+				A10_SYSMGR_ECC_INTSTAT_DERR_OFST;
+
+	regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
+
+	if ((irq != edac->sb_irq) && (irq != edac->db_irq)) {
+		WARN_ON(1);
+	} else {
+		list_for_each_entry(dci, &edac->a10_ecc_devices, next) {
+			if (irq_status & dci->data->irq_status_mask)
+				rc = dci->data->ecc_irq_handler(dci, sberr);
+		}
+	}
+
+	return rc;
+}
+
+static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
+				    struct device_node *np)
+{
+	struct edac_device_ctl_info *dci;
+	struct altr_edac_device_dev *altdev;
+	char *ecc_name = (char *)np->name;
+	struct resource res;
+	int edac_idx;
+	int rc = 0;
+	const struct edac_device_prv_data *prv;
+	/* Get matching node and check for valid result */
+	const struct of_device_id *pdev_id =
+		of_match_node(altr_edac_device_of_match, np);
+	if (IS_ERR_OR_NULL(pdev_id))
+		return -ENODEV;
+
+	/* Get driver specific data for this EDAC device */
+	prv = pdev_id->data;
+	if (IS_ERR_OR_NULL(prv))
+		return -ENODEV;
+
+	if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
+		return -ENOMEM;
+
+	rc = of_address_to_resource(np, 0, &res);
+	if (rc < 0) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s: no resource address\n", ecc_name);
+		goto err_release_group;
+	}
+
+	edac_idx = edac_device_alloc_index();
+	dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name,
+					 1, ecc_name, 1, 0, NULL, 0,
+					 edac_idx);
+
+	if (!dci) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s: Unable to allocate EDAC device\n", ecc_name);
+		rc = -ENOMEM;
+		goto err_release_group;
+	}
+
+	altdev = dci->pvt_info;
+	dci->dev = edac->dev;
+	altdev->edac_dev_name = ecc_name;
+	altdev->edac_idx = edac_idx;
+	altdev->edac = edac;
+	altdev->edac_dev = dci;
+	altdev->data = prv;
+	altdev->ddev = *edac->dev;
+	dci->dev = &altdev->ddev;
+	dci->ctl_name = "Altera ECC Manager";
+	dci->mod_name = ecc_name;
+	dci->dev_name = ecc_name;
+
+	altdev->base = devm_ioremap_resource(edac->dev, &res);
+	if (IS_ERR(altdev->base)) {
+		rc = PTR_ERR(altdev->base);
+		goto err_release_group1;
+	}
+
+	/* Check specific dependencies for the module */
+	if (altdev->data->setup) {
+		rc = altdev->data->setup(altdev);
+		if (rc)
+			goto err_release_group1;
+	}
+
+	rc = edac_device_add_device(dci);
+	if (rc) {
+		dev_err(edac->dev, "edac_device_add_device failed\n");
+		rc = -ENOMEM;
+		goto err_release_group1;
+	}
+
+	altr_create_edacdev_dbgfs(dci, prv);
+
+	list_add(&altdev->next, &edac->a10_ecc_devices);
+
+	devres_remove_group(edac->dev, altr_edac_a10_device_add);
+
+	return 0;
+
+err_release_group1:
+	edac_device_free_ctl_info(dci);
+err_release_group:
+	edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__);
+	devres_release_group(edac->dev, NULL);
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s:Error setting up EDAC device: %d\n", ecc_name, rc);
+
+	return rc;
+}
+
+static int altr_edac_a10_probe(struct platform_device *pdev)
+{
+	struct altr_arria10_edac *edac;
+	struct device_node *child;
+	int rc;
+
+	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
+	if (!edac)
+		return -ENOMEM;
+
+	edac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, edac);
+	INIT_LIST_HEAD(&edac->a10_ecc_devices);
+
+	edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							"altr,sysmgr-syscon");
+	if (IS_ERR(edac->ecc_mgr_map)) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Unable to get syscon altr,sysmgr-syscon\n");
+		return PTR_ERR(edac->ecc_mgr_map);
+	}
+
+	edac->sb_irq = platform_get_irq(pdev, 0);
+	rc = devm_request_irq(&pdev->dev, edac->sb_irq,
+			      altr_edac_a10_irq_handler,
+			      IRQF_SHARED, dev_name(&pdev->dev), edac);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
+		return rc;
+	}
+
+	edac->db_irq = platform_get_irq(pdev, 1);
+	rc = devm_request_irq(&pdev->dev, edac->db_irq,
+			      altr_edac_a10_irq_handler,
+			      IRQF_SHARED, dev_name(&pdev->dev), edac);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+		return rc;
+	}
+
+	for_each_child_of_node(pdev->dev.of_node, child) {
+		if (!of_device_is_available(child))
+			continue;
+		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc"))
+			altr_edac_a10_device_add(edac, child);
+		else if (of_device_is_compatible(child,
+						 "altr,socfpga-a10-ocram-ecc"))
+			altr_edac_a10_device_add(edac, child);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id altr_edac_a10_of_match[] = {
+	{ .compatible = "altr,socfpga-a10-ecc-manager" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match);
+
+static struct platform_driver altr_edac_a10_driver = {
+	.probe =  altr_edac_a10_probe,
+	.driver = {
+		.name = "socfpga_a10_ecc_manager",
+		.of_match_table = altr_edac_a10_of_match,
+	},
+};
+module_platform_driver(altr_edac_a10_driver);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Thor Thayer");
 MODULE_DESCRIPTION("EDAC Driver for Altera Memories");

diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 953077d..42090f3 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h

@@ -195,4 +195,132 @@
 	const struct altr_sdram_prv_data *data;
 };
 
+/************************** EDAC Device Defines **************************/
+/***** General Device Trigger Defines *****/
+#define ALTR_UE_TRIGGER_CHAR            'U'   /* Trigger for UE */
+#define ALTR_TRIGGER_READ_WRD_CNT       32    /* Line size x 4 */
+#define ALTR_TRIG_OCRAM_BYTE_SIZE       128   /* Line size x 4 */
+#define ALTR_TRIG_L2C_BYTE_SIZE         4096  /* Full Page */
+
+/******* Cyclone5 and Arria5 Defines *******/
+/* OCRAM ECC Management Group Defines */
+#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET   0x04
+#define ALTR_OCR_ECC_REG_OFFSET         0x00
+#define ALTR_OCR_ECC_EN                 BIT(0)
+#define ALTR_OCR_ECC_INJS               BIT(1)
+#define ALTR_OCR_ECC_INJD               BIT(2)
+#define ALTR_OCR_ECC_SERR               BIT(3)
+#define ALTR_OCR_ECC_DERR               BIT(4)
+
+/* L2 ECC Management Group Defines */
+#define ALTR_MAN_GRP_L2_ECC_OFFSET      0x00
+#define ALTR_L2_ECC_REG_OFFSET          0x00
+#define ALTR_L2_ECC_EN                  BIT(0)
+#define ALTR_L2_ECC_INJS                BIT(1)
+#define ALTR_L2_ECC_INJD                BIT(2)
+
+/* Arria10 General ECC Block Module Defines */
+#define ALTR_A10_ECC_CTRL_OFST          0x08
+#define ALTR_A10_ECC_EN                 BIT(0)
+#define ALTR_A10_ECC_INITA              BIT(16)
+#define ALTR_A10_ECC_INITB              BIT(24)
+
+#define ALTR_A10_ECC_INITSTAT_OFST      0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA      BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB      BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST      0x10
+#define ALTR_A10_ECC_SERRINTEN          BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST       0x20
+#define ALTR_A10_ECC_SERRPENA           BIT(0)
+#define ALTR_A10_ECC_DERRPENA           BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK       (ALTR_A10_ECC_SERRPENA | \
+					 ALTR_A10_ECC_DERRPENA)
+#define ALTR_A10_ECC_SERRPENB           BIT(16)
+#define ALTR_A10_ECC_DERRPENB           BIT(24)
+#define ALTR_A10_ECC_ERRPENB_MASK       (ALTR_A10_ECC_SERRPENB | \
+					 ALTR_A10_ECC_DERRPENB)
+
+#define ALTR_A10_ECC_INTTEST_OFST       0x24
+#define ALTR_A10_ECC_TSERRA             BIT(0)
+#define ALTR_A10_ECC_TDERRA             BIT(8)
+
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST   0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST   0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM      BIT(1)
+
+#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST  0x9C
+#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST  0xA0
+#define A10_SYSMGR_ECC_INTSTAT_L2         BIT(0)
+#define A10_SYSMGR_ECC_INTSTAT_OCRAM      BIT(1)
+
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST  0xA8
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB    BIT(15)
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB    BIT(31)
+
+/* Arria 10 L2 ECC Management Group Defines */
+#define ALTR_A10_L2_ECC_CTL_OFST        0x0
+#define ALTR_A10_L2_ECC_EN_CTL          BIT(0)
+
+#define ALTR_A10_L2_ECC_STATUS          0xFFD060A4
+#define ALTR_A10_L2_ECC_STAT_OFST       0xA4
+#define ALTR_A10_L2_ECC_SERR_PEND       BIT(0)
+#define ALTR_A10_L2_ECC_MERR_PEND       BIT(0)
+
+#define ALTR_A10_L2_ECC_CLR_OFST        0x4
+#define ALTR_A10_L2_ECC_SERR_CLR        BIT(15)
+#define ALTR_A10_L2_ECC_MERR_CLR        BIT(31)
+
+#define ALTR_A10_L2_ECC_INJ_OFST        ALTR_A10_L2_ECC_CTL_OFST
+#define ALTR_A10_L2_ECC_CE_INJ_MASK     0x00000101
+#define ALTR_A10_L2_ECC_UE_INJ_MASK     0x00010101
+
+/* Arria 10 OCRAM ECC Management Group Defines */
+#define ALTR_A10_OCRAM_ECC_EN_CTL       (BIT(1) | BIT(0))
+
+struct altr_edac_device_dev;
+
+struct edac_device_prv_data {
+	int (*setup)(struct altr_edac_device_dev *device);
+	int ce_clear_mask;
+	int ue_clear_mask;
+	int irq_status_mask;
+	char dbgfs_name[20];
+	void * (*alloc_mem)(size_t size, void **other);
+	void (*free_mem)(void *p, size_t size, void *other);
+	int ecc_enable_mask;
+	int ecc_en_ofst;
+	int ce_set_mask;
+	int ue_set_mask;
+	int set_err_ofst;
+	irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci,
+				       bool sb);
+	int trig_alloc_sz;
+	const struct file_operations *inject_fops;
+};
+
+struct altr_edac_device_dev {
+	struct list_head next;
+	void __iomem *base;
+	int sb_irq;
+	int db_irq;
+	const struct edac_device_prv_data *data;
+	struct dentry *debugfs_dir;
+	char *edac_dev_name;
+	struct altr_arria10_edac *edac;
+	struct edac_device_ctl_info *edac_dev;
+	struct device ddev;
+	int edac_idx;
+};
+
+struct altr_arria10_edac {
+	struct device		*dev;
+	struct regmap		*ecc_mgr_map;
+	int sb_irq;
+	int db_irq;
+	struct list_head	a10_ecc_devices;
+};
+
 #endif	/* #ifndef _ALTERA_EDAC_H */

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index d87a475..624e2f7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c

@@ -15,11 +15,6 @@
 
 static struct msr __percpu *msrs;
 
-/*
- * count successfully initialized driver instances for setup_pci_device()
- */
-static atomic_t drv_instances = ATOMIC_INIT(0);
-
 /* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
@@ -1918,7 +1913,7 @@
 	[K8_CPUS] = {
 		.ctl_name = "K8",
 		.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
-		.f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+		.f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
 		.ops = {
 			.early_channel_count	= k8_early_channel_count,
 			.map_sysaddr_to_csrow	= k8_map_sysaddr_to_csrow,
@@ -1928,7 +1923,7 @@
 	[F10_CPUS] = {
 		.ctl_name = "F10h",
 		.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
-		.f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+		.f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1938,7 +1933,7 @@
 	[F15_CPUS] = {
 		.ctl_name = "F15h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1948,7 +1943,7 @@
 	[F15_M30H_CPUS] = {
 		.ctl_name = "F15h_M30h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1958,7 +1953,7 @@
 	[F15_M60H_CPUS] = {
 		.ctl_name = "F15h_M60h",
 		.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1968,7 +1963,7 @@
 	[F16_CPUS] = {
 		.ctl_name = "F16h",
 		.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -1978,7 +1973,7 @@
 	[F16_M30H_CPUS] = {
 		.ctl_name = "F16h_M30h",
 		.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
-		.f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3,
+		.f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
 		.ops = {
 			.early_channel_count	= f1x_early_channel_count,
 			.map_sysaddr_to_csrow	= f1x_map_sysaddr_to_csrow,
@@ -2227,13 +2222,13 @@
 }
 
 /*
- * Use pvt->F2 which contains the F2 CPU PCI device to get the related
- * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
+ * Use pvt->F3 which contains the F3 CPU PCI device to get the related
+ * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
  */
-static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
+static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id)
 {
 	/* Reserve the ADDRESS MAP Device */
-	pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
+	pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3);
 	if (!pvt->F1) {
 		amd64_err("error address map device not found: "
 			  "vendor %x device 0x%x (broken BIOS?)\n",
@@ -2241,15 +2236,15 @@
 		return -ENODEV;
 	}
 
-	/* Reserve the MISC Device */
-	pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
-	if (!pvt->F3) {
+	/* Reserve the DCT Device */
+	pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3);
+	if (!pvt->F2) {
 		pci_dev_put(pvt->F1);
 		pvt->F1 = NULL;
 
-		amd64_err("error F3 device not found: "
+		amd64_err("error F2 device not found: "
 			  "vendor %x device 0x%x (broken BIOS?)\n",
-			  PCI_VENDOR_ID_AMD, f3_id);
+			  PCI_VENDOR_ID_AMD, f2_id);
 
 		return -ENODEV;
 	}
@@ -2263,7 +2258,7 @@
 static void free_mc_sibling_devs(struct amd64_pvt *pvt)
 {
 	pci_dev_put(pvt->F1);
-	pci_dev_put(pvt->F3);
+	pci_dev_put(pvt->F2);
 }
 
 /*
@@ -2778,14 +2773,14 @@
 	NULL
 };
 
-static int init_one_instance(struct pci_dev *F2)
+static int init_one_instance(unsigned int nid)
 {
-	struct amd64_pvt *pvt = NULL;
+	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct amd64_family_type *fam_type = NULL;
 	struct mem_ctl_info *mci = NULL;
 	struct edac_mc_layer layers[2];
+	struct amd64_pvt *pvt = NULL;
 	int err = 0, ret;
-	u16 nid = amd_pci_dev_to_node_id(F2);
 
 	ret = -ENOMEM;
 	pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2793,7 +2788,7 @@
 		goto err_ret;
 
 	pvt->mc_node_id	= nid;
-	pvt->F2 = F2;
+	pvt->F3 = F3;
 
 	ret = -EINVAL;
 	fam_type = per_family_init(pvt);
@@ -2801,7 +2796,7 @@
 		goto err_free;
 
 	ret = -ENODEV;
-	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
+	err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id);
 	if (err)
 		goto err_free;
 
@@ -2836,7 +2831,7 @@
 		goto err_siblings;
 
 	mci->pvt_info = pvt;
-	mci->pdev = &pvt->F2->dev;
+	mci->pdev = &pvt->F3->dev;
 
 	setup_mci_misc_attrs(mci, fam_type);
 
@@ -2855,8 +2850,6 @@
 
 	amd_register_ecc_decoder(decode_bus_error);
 
-	atomic_inc(&drv_instances);
-
 	return 0;
 
 err_add_mc:
@@ -2872,19 +2865,11 @@
 	return ret;
 }
 
-static int probe_one_instance(struct pci_dev *pdev,
-			      const struct pci_device_id *mc_type)
+static int probe_one_instance(unsigned int nid)
 {
-	u16 nid = amd_pci_dev_to_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s;
-	int ret = 0;
-
-	ret = pci_enable_device(pdev);
-	if (ret < 0) {
-		edac_dbg(0, "ret=%d\n", ret);
-		return -EIO;
-	}
+	int ret;
 
 	ret = -ENOMEM;
 	s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
@@ -2905,7 +2890,7 @@
 			goto err_enable;
 	}
 
-	ret = init_one_instance(pdev);
+	ret = init_one_instance(nid);
 	if (ret < 0) {
 		amd64_err("Error probing instance: %d\n", nid);
 		restore_ecc_error_reporting(s, nid, F3);
@@ -2921,19 +2906,18 @@
 	return ret;
 }
 
-static void remove_one_instance(struct pci_dev *pdev)
+static void remove_one_instance(unsigned int nid)
 {
-	struct mem_ctl_info *mci;
-	struct amd64_pvt *pvt;
-	u16 nid = amd_pci_dev_to_node_id(pdev);
 	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
 	struct ecc_settings *s = ecc_stngs[nid];
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
 
-	mci = find_mci_by_dev(&pdev->dev);
+	mci = find_mci_by_dev(&F3->dev);
 	WARN_ON(!mci);
 
 	/* Remove from EDAC CORE tracking list */
-	mci = edac_mc_del_mc(&pdev->dev);
+	mci = edac_mc_del_mc(&F3->dev);
 	if (!mci)
 		return;
 
@@ -2957,31 +2941,6 @@
 	edac_mc_free(mci);
 }
 
-/*
- * This table is part of the interface for loading drivers for PCI devices. The
- * PCI core identifies what devices are on a system during boot, and then
- * inquiry this table to see if this driver is for a given device found.
- */
-static const struct pci_device_id amd64_pci_table[] = {
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) },
-	{0, }
-};
-MODULE_DEVICE_TABLE(pci, amd64_pci_table);
-
-static struct pci_driver amd64_pci_driver = {
-	.name		= EDAC_MOD_STR,
-	.probe		= probe_one_instance,
-	.remove		= remove_one_instance,
-	.id_table	= amd64_pci_table,
-	.driver.probe_type = PROBE_FORCE_SYNCHRONOUS,
-};
-
 static void setup_pci_device(void)
 {
 	struct mem_ctl_info *mci;
@@ -3005,8 +2964,7 @@
 static int __init amd64_edac_init(void)
 {
 	int err = -ENODEV;
-
-	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
+	int i;
 
 	opstate_init();
 
@@ -3022,13 +2980,14 @@
 	if (!msrs)
 		goto err_free;
 
-	err = pci_register_driver(&amd64_pci_driver);
-	if (err)
-		goto err_pci;
+	for (i = 0; i < amd_nb_num(); i++)
+		if (probe_one_instance(i)) {
+			/* unwind properly */
+			while (--i >= 0)
+				remove_one_instance(i);
 
-	err = -ENODEV;
-	if (!atomic_read(&drv_instances))
-		goto err_no_instances;
+			goto err_pci;
+		}
 
 	setup_pci_device();
 
@@ -3036,10 +2995,9 @@
 	amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
 #endif
 
-	return 0;
+	printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
 
-err_no_instances:
-	pci_unregister_driver(&amd64_pci_driver);
+	return 0;
 
 err_pci:
 	msrs_free(msrs);
@@ -3055,10 +3013,13 @@
 
 static void __exit amd64_edac_exit(void)
 {
+	int i;
+
 	if (pci_ctl)
 		edac_pci_release_generic_ctl(pci_ctl);
 
-	pci_unregister_driver(&amd64_pci_driver);
+	for (i = 0; i < amd_nb_num(); i++)
+		remove_one_instance(i);
 
 	kfree(ecc_stngs);
 	ecc_stngs = NULL;

diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index c0f248f..c088704 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h

@@ -422,7 +422,7 @@
 
 struct amd64_family_type {
 	const char *ctl_name;
-	u16 f1_id, f3_id;
+	u16 f1_id, f2_id;
 	struct low_ops ops;
 };
 

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 1472f48c..6aa256b0 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c

@@ -923,7 +923,7 @@
 	mci->ue_mc += count;
 
 	if (!enable_per_layer_report) {
-		mci->ce_noinfo_count += count;
+		mci->ue_noinfo_count += count;
 		return;
 	}
 

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 26e65ab..10c305b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c

@@ -998,11 +998,12 @@
 
 void edac_unregister_sysfs(struct mem_ctl_info *mci)
 {
+	struct bus_type *bus = mci->bus;
 	const char *name = mci->bus->name;
 
 	edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
 	device_unregister(&mci->dev);
-	bus_unregister(mci->bus);
+	bus_unregister(bus);
 	kfree(name);
 }
 

diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 792bdae..8a68a5e 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c

@@ -271,16 +271,6 @@
 
 	bool		is_registered, enable_scrub;
 
-	/* Fifo double buffers */
-	struct mce		mce_entry[MCE_LOG_LEN];
-	struct mce		mce_outentry[MCE_LOG_LEN];
-
-	/* Fifo in/out counters */
-	unsigned		mce_in, mce_out;
-
-	/* Count indicator to show errors not got */
-	unsigned		mce_overrun;
-
 	/* DCLK Frequency used for computing scrub rate */
 	int			dclk_freq;
 
@@ -1792,56 +1782,15 @@
  *	i7core_check_error	Retrieve and process errors reported by the
  *				hardware. Called by the Core module.
  */
-static void i7core_check_error(struct mem_ctl_info *mci)
+static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
 {
 	struct i7core_pvt *pvt = mci->pvt_info;
-	int i;
-	unsigned count = 0;
-	struct mce *m;
 
-	/*
-	 * MCE first step: Copy all mce errors into a temporary buffer
-	 * We use a double buffering here, to reduce the risk of
-	 * losing an error.
-	 */
-	smp_rmb();
-	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
-		% MCE_LOG_LEN;
-	if (!count)
-		goto check_ce_error;
-
-	m = pvt->mce_outentry;
-	if (pvt->mce_in + count > MCE_LOG_LEN) {
-		unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
-		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
-		smp_wmb();
-		pvt->mce_in = 0;
-		count -= l;
-		m += l;
-	}
-	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
-	smp_wmb();
-	pvt->mce_in += count;
-
-	smp_rmb();
-	if (pvt->mce_overrun) {
-		i7core_printk(KERN_ERR, "Lost %d memory errors\n",
-			      pvt->mce_overrun);
-		smp_wmb();
-		pvt->mce_overrun = 0;
-	}
-
-	/*
-	 * MCE second step: parse errors and display
-	 */
-	for (i = 0; i < count; i++)
-		i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
+	i7core_mce_output_error(mci, m);
 
 	/*
 	 * Now, let's increment CE error counts
 	 */
-check_ce_error:
 	if (!pvt->is_registered)
 		i7core_udimm_check_mc_ecc_err(mci);
 	else
@@ -1849,12 +1798,8 @@
 }
 
 /*
- * i7core_mce_check_error	Replicates mcelog routine to get errors
- *				This routine simply queues mcelog errors, and
- *				return. The error itself should be handled later
- *				by i7core_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
  */
 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 				  void *data)
@@ -1882,21 +1827,7 @@
 	if (mce->bank != 8)
 		return NOTIFY_DONE;
 
-	smp_rmb();
-	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
-		smp_wmb();
-		pvt->mce_overrun++;
-		return NOTIFY_DONE;
-	}
-
-	/* Copy memory error at the ringbuffer */
-	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
-	smp_wmb();
-	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
-	/* Handle fatal errors immediately */
-	if (mce->mcgstatus & 1)
-		i7core_check_error(mci);
+	i7core_check_error(mci, mce);
 
 	/* Advise mcelog that the errors were handled */
 	return NOTIFY_STOP;
@@ -2243,8 +2174,6 @@
 	get_dimm_config(mci);
 	/* record ptr to the generic device */
 	mci->pdev = &i7core_dev->pdev[0]->dev;
-	/* Set the function pointer to an actual operation function */
-	mci->edac_check = i7core_check_error;
 
 	/* Enable scrubrate setting */
 	if (pvt->enable_scrub)

diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 18d77ac..1c88d97 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c

@@ -17,6 +17,7 @@
  * 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller
  * 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
  * 0c08: Xeon E3-1200 v3 Processor DRAM Controller
+ * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
  *
  * Based on Intel specification:
  * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
@@ -55,6 +56,7 @@
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
 
 #define IE31200_DIMMS			4
 #define IE31200_RANKS			8
@@ -105,8 +107,11 @@
  *    1  Multiple Bit Error Status (MERRSTS)
  *    0  Correctable Error Status (CERRSTS)
  */
+
 #define IE31200_C0ECCERRLOG			0x40c8
 #define IE31200_C1ECCERRLOG			0x44c8
+#define IE31200_C0ECCERRLOG_SKL			0x4048
+#define IE31200_C1ECCERRLOG_SKL			0x4448
 #define IE31200_ECCERRLOG_CE			BIT(0)
 #define IE31200_ECCERRLOG_UE			BIT(1)
 #define IE31200_ECCERRLOG_RANK_BITS		GENMASK_ULL(28, 27)
@@ -123,17 +128,28 @@
 #define IE31200_CAPID0_DDPCD		BIT(6)
 #define IE31200_CAPID0_ECC		BIT(1)
 
-#define IE31200_MAD_DIMM_0_OFFSET	0x5004
-#define IE31200_MAD_DIMM_SIZE		GENMASK_ULL(7, 0)
-#define IE31200_MAD_DIMM_A_RANK		BIT(17)
-#define IE31200_MAD_DIMM_A_WIDTH	BIT(19)
+#define IE31200_MAD_DIMM_0_OFFSET		0x5004
+#define IE31200_MAD_DIMM_0_OFFSET_SKL		0x500C
+#define IE31200_MAD_DIMM_SIZE			GENMASK_ULL(7, 0)
+#define IE31200_MAD_DIMM_A_RANK			BIT(17)
+#define IE31200_MAD_DIMM_A_RANK_SHIFT		17
+#define IE31200_MAD_DIMM_A_RANK_SKL		BIT(10)
+#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT	10
+#define IE31200_MAD_DIMM_A_WIDTH		BIT(19)
+#define IE31200_MAD_DIMM_A_WIDTH_SHIFT		19
+#define IE31200_MAD_DIMM_A_WIDTH_SKL		GENMASK_ULL(9, 8)
+#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT	8
 
-#define IE31200_PAGES(n)		(n << (28 - PAGE_SHIFT))
+/* Skylake reports 1GB increments, everything else is 256MB */
+#define IE31200_PAGES(n, skl)	\
+	(n << (28 + (2 * skl) - PAGE_SHIFT))
 
 static int nr_channels;
 
 struct ie31200_priv {
 	void __iomem *window;
+	void __iomem *c0errlog;
+	void __iomem *c1errlog;
 };
 
 enum ie31200_chips {
@@ -157,9 +173,9 @@
 };
 
 struct dimm_data {
-	u8 size; /* in 256MB multiples */
+	u8 size; /* in multiples of 256MB, except Skylake is 1GB */
 	u8 dual_rank : 1,
-	   x16_width : 1; /* 0 means x8 width */
+	   x16_width : 2; /* 0 means x8 width */
 };
 
 static int how_many_channels(struct pci_dev *pdev)
@@ -197,11 +213,10 @@
 	return true;
 }
 
-static int eccerrlog_row(int channel, u64 log)
+static int eccerrlog_row(u64 log)
 {
-	int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >>
-		IE31200_ECCERRLOG_RANK_SHIFT);
-	return rank | (channel * IE31200_RANKS_PER_CHANNEL);
+	return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
+				IE31200_ECCERRLOG_RANK_SHIFT);
 }
 
 static void ie31200_clear_error_info(struct mem_ctl_info *mci)
@@ -219,7 +234,6 @@
 {
 	struct pci_dev *pdev;
 	struct ie31200_priv *priv = mci->pvt_info;
-	void __iomem *window = priv->window;
 
 	pdev = to_pci_dev(mci->pdev);
 
@@ -232,9 +246,9 @@
 	if (!(info->errsts & IE31200_ERRSTS_BITS))
 		return;
 
-	info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+	info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
 	if (nr_channels == 2)
-		info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG);
+		info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
 
 	pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2);
 
@@ -245,10 +259,10 @@
 	 * should be UE info.
 	 */
 	if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
-		info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+		info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
 		if (nr_channels == 2)
 			info->eccerrlog[1] =
-				lo_hi_readq(window + IE31200_C1ECCERRLOG);
+				lo_hi_readq(priv->c1errlog);
 	}
 
 	ie31200_clear_error_info(mci);
@@ -274,14 +288,14 @@
 		if (log & IE31200_ECCERRLOG_UE) {
 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
 					     0, 0, 0,
-					     eccerrlog_row(channel, log),
+					     eccerrlog_row(log),
 					     channel, -1,
 					     "ie31200 UE", "");
 		} else if (log & IE31200_ECCERRLOG_CE) {
 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
 					     0, 0,
 					     IE31200_ECCERRLOG_SYNDROME(log),
-					     eccerrlog_row(channel, log),
+					     eccerrlog_row(log),
 					     channel, -1,
 					     "ie31200 CE", "");
 		}
@@ -326,6 +340,33 @@
 	return window;
 }
 
+static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+				     int chan)
+{
+	dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
+	dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
+	dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
+				(IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
+}
+
+static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+				 int chan)
+{
+	dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
+	dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
+	dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
+}
+
+static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
+			       bool skl)
+{
+	if (skl)
+		__skl_populate_dimm_info(dd, addr_decode, chan);
+	else
+		__populate_dimm_info(dd, addr_decode, chan);
+}
+
+
 static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
 {
 	int i, j, ret;
@@ -334,7 +375,8 @@
 	struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
 	void __iomem *window;
 	struct ie31200_priv *priv;
-	u32 addr_decode;
+	u32 addr_decode, mad_offset;
+	bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
 
 	edac_dbg(0, "MC:\n");
 
@@ -363,7 +405,10 @@
 
 	edac_dbg(3, "MC: init mci\n");
 	mci->pdev = &pdev->dev;
-	mci->mtype_cap = MEM_FLAG_DDR3;
+	if (skl)
+		mci->mtype_cap = MEM_FLAG_DDR4;
+	else
+		mci->mtype_cap = MEM_FLAG_DDR3;
 	mci->edac_ctl_cap = EDAC_FLAG_SECDED;
 	mci->edac_cap = EDAC_FLAG_SECDED;
 	mci->mod_name = EDAC_MOD_STR;
@@ -374,19 +419,24 @@
 	mci->ctl_page_to_phys = NULL;
 	priv = mci->pvt_info;
 	priv->window = window;
+	if (skl) {
+		priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
+		priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
+		mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
+	} else {
+		priv->c0errlog = window + IE31200_C0ECCERRLOG;
+		priv->c1errlog = window + IE31200_C1ECCERRLOG;
+		mad_offset = IE31200_MAD_DIMM_0_OFFSET;
+	}
 
 	/* populate DIMM info */
 	for (i = 0; i < IE31200_CHANNELS; i++) {
-		addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET +
+		addr_decode = readl(window + mad_offset +
 					(i * 4));
 		edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
 		for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
-			dimm_info[i][j].size = (addr_decode >> (j * 8)) &
-						IE31200_MAD_DIMM_SIZE;
-			dimm_info[i][j].dual_rank = (addr_decode &
-				(IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0;
-			dimm_info[i][j].x16_width = (addr_decode &
-				(IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0;
+			populate_dimm_info(&dimm_info[i][j], addr_decode, j,
+					   skl);
 			edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
 				 dimm_info[i][j].size,
 				 dimm_info[i][j].dual_rank,
@@ -405,7 +455,7 @@
 			struct dimm_info *dimm;
 			unsigned long nr_pages;
 
-			nr_pages = IE31200_PAGES(dimm_info[j][i].size);
+			nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
 			if (nr_pages == 0)
 				continue;
 
@@ -417,7 +467,10 @@
 				dimm->nr_pages = nr_pages;
 				edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
 				dimm->grain = 8; /* just a guess */
-				dimm->mtype = MEM_DDR3;
+				if (skl)
+					dimm->mtype = MEM_DDR4;
+				else
+					dimm->mtype = MEM_DDR3;
 				dimm->dtype = DEV_UNKNOWN;
 				dimm->edac_mode = EDAC_UNKNOWN;
 			}
@@ -426,7 +479,10 @@
 			dimm->nr_pages = nr_pages;
 			edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
 			dimm->grain = 8; /* same guess */
-			dimm->mtype = MEM_DDR3;
+			if (skl)
+				dimm->mtype = MEM_DDR4;
+			else
+				dimm->mtype = MEM_DDR3;
 			dimm->dtype = DEV_UNKNOWN;
 			dimm->edac_mode = EDAC_UNKNOWN;
 		}
@@ -501,6 +557,9 @@
 		PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		IE31200},
 	{
+		PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		IE31200},
+	{
 		0,
 	}            /* 0 terminated list. */
 };

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 49768c0..9b6800a 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c

@@ -1052,7 +1052,6 @@
 	struct mce *m = (struct mce *)data;
 	struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
 	int ecc;
-	u32 ebx = cpuid_ebx(0x80000007);
 
 	if (amd_filter_mce(m))
 		return NOTIFY_STOP;
@@ -1075,7 +1074,7 @@
 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
 
-	if (!!(ebx & BIT(3))) {
+	if (boot_cpu_has(X86_FEATURE_SMCA)) {
 		u32 low, high;
 		u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
 
@@ -1094,7 +1093,7 @@
 	if (m->status & MCI_STATUS_ADDRV)
 		pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
 
-	if (!!(ebx & BIT(3))) {
+	if (boot_cpu_has(X86_FEATURE_SMCA)) {
 		decode_smca_errors(m);
 		goto err_code;
 	}
@@ -1149,7 +1148,6 @@
 static int __init mce_amd_init(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
-	u32 ebx;
 
 	if (c->x86_vendor != X86_VENDOR_AMD)
 		return -ENODEV;
@@ -1205,9 +1203,8 @@
 		break;
 
 	case 0x17:
-		ebx = cpuid_ebx(0x80000007);
 		xec_mask = 0x3f;
-		if (!(ebx & BIT(3))) {
+		if (!boot_cpu_has(X86_FEATURE_SMCA)) {
 			printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
 			goto err_out;
 		}

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 8bf745d..b4d0bf6 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c

@@ -21,6 +21,8 @@
 #include <linux/smp.h>
 #include <linux/bitmap.h>
 #include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
 #include <asm/processor.h>
 #include <asm/mce.h>
 
@@ -28,8 +30,6 @@
 
 /* Static vars */
 static LIST_HEAD(sbridge_edac_list);
-static DEFINE_MUTEX(sbridge_edac_lock);
-static int probed;
 
 /*
  * Alter this version for the module when modifications are made
@@ -364,16 +364,6 @@
 	bool			is_mirrored, is_lockstep, is_close_pg;
 	bool			is_chan_hash;
 
-	/* Fifo double buffers */
-	struct mce		mce_entry[MCE_LOG_LEN];
-	struct mce		mce_outentry[MCE_LOG_LEN];
-
-	/* Fifo in/out counters */
-	unsigned		mce_in, mce_out;
-
-	/* Count indicator to show errors not got */
-	unsigned		mce_overrun;
-
 	/* Memory description */
 	u64			tolm, tohm;
 	struct knl_pvt knl;
@@ -662,18 +652,6 @@
 	{0,}			/* 0 terminated list. */
 };
 
-/*
- *	pci_device_id	table for which devices we are looking for
- */
-static const struct pci_device_id sbridge_pci_tbl[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)},
-	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)},
-	{0,}			/* 0 terminated list. */
-};
-
 
 /****************************************************************************
 			Ancillary status routines
@@ -3097,63 +3075,8 @@
 }
 
 /*
- *	sbridge_check_error	Retrieve and process errors reported by the
- *				hardware. Called by the Core module.
- */
-static void sbridge_check_error(struct mem_ctl_info *mci)
-{
-	struct sbridge_pvt *pvt = mci->pvt_info;
-	int i;
-	unsigned count = 0;
-	struct mce *m;
-
-	/*
-	 * MCE first step: Copy all mce errors into a temporary buffer
-	 * We use a double buffering here, to reduce the risk of
-	 * loosing an error.
-	 */
-	smp_rmb();
-	count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
-		% MCE_LOG_LEN;
-	if (!count)
-		return;
-
-	m = pvt->mce_outentry;
-	if (pvt->mce_in + count > MCE_LOG_LEN) {
-		unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
-		memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
-		smp_wmb();
-		pvt->mce_in = 0;
-		count -= l;
-		m += l;
-	}
-	memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
-	smp_wmb();
-	pvt->mce_in += count;
-
-	smp_rmb();
-	if (pvt->mce_overrun) {
-		sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
-			      pvt->mce_overrun);
-		smp_wmb();
-		pvt->mce_overrun = 0;
-	}
-
-	/*
-	 * MCE second step: parse errors and display
-	 */
-	for (i = 0; i < count; i++)
-		sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
-}
-
-/*
- * sbridge_mce_check_error	Replicates mcelog routine to get errors
- *				This routine simply queues mcelog errors, and
- *				return. The error itself should be handled later
- *				by sbridge_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
  */
 static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 				   void *data)
@@ -3198,21 +3121,7 @@
 			  "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
 			  mce->time, mce->socketid, mce->apicid);
 
-	smp_rmb();
-	if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
-		smp_wmb();
-		pvt->mce_overrun++;
-		return NOTIFY_DONE;
-	}
-
-	/* Copy memory error at the ringbuffer */
-	memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
-	smp_wmb();
-	pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
-	/* Handle fatal errors immediately */
-	if (mce->mcgstatus & 1)
-		sbridge_check_error(mci);
+	sbridge_mce_output_error(mci, mce);
 
 	/* Advice mcelog that the error were handled */
 	return NOTIFY_STOP;
@@ -3298,9 +3207,6 @@
 	mci->dev_name = pci_name(pdev);
 	mci->ctl_page_to_phys = NULL;
 
-	/* Set the function pointer to an actual operation function */
-	mci->edac_check = sbridge_check_error;
-
 	pvt->info.type = type;
 	switch (type) {
 	case IVY_BRIDGE:
@@ -3448,62 +3354,40 @@
 	return rc;
 }
 
+#define ICPU(model, table) \
+	{ X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
+
+/* Order here must match "enum type" */
+static const struct x86_cpu_id sbridge_cpuids[] = {
+	ICPU(0x2d, pci_dev_descr_sbridge_table),	/* SANDY_BRIDGE */
+	ICPU(0x3e, pci_dev_descr_ibridge_table),	/* IVY_BRIDGE */
+	ICPU(0x3f, pci_dev_descr_haswell_table),	/* HASWELL */
+	ICPU(0x4f, pci_dev_descr_broadwell_table),	/* BROADWELL */
+	ICPU(0x57, pci_dev_descr_knl_table),		/* KNIGHTS_LANDING */
+	{ }
+};
+MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
+
 /*
- *	sbridge_probe	Probe for ONE instance of device to see if it is
+ *	sbridge_probe	Get all devices and register memory controllers
  *			present.
  *	return:
  *		0 for FOUND a device
  *		< 0 for error code
  */
 
-static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int sbridge_probe(const struct x86_cpu_id *id)
 {
 	int rc = -ENODEV;
 	u8 mc, num_mc = 0;
 	struct sbridge_dev *sbridge_dev;
-	enum type type = SANDY_BRIDGE;
+	struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data;
 
 	/* get the pci devices we want to reserve for our use */
-	mutex_lock(&sbridge_edac_lock);
+	rc = sbridge_get_all_devices(&num_mc, ptable);
 
-	/*
-	 * All memory controllers are allocated at the first pass.
-	 */
-	if (unlikely(probed >= 1)) {
-		mutex_unlock(&sbridge_edac_lock);
-		return -ENODEV;
-	}
-	probed++;
-
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_ibridge_table);
-		type = IVY_BRIDGE;
-		break;
-	case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_sbridge_table);
-		type = SANDY_BRIDGE;
-		break;
-	case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_haswell_table);
-		type = HASWELL;
-		break;
-	case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
-		rc = sbridge_get_all_devices(&num_mc,
-					pci_dev_descr_broadwell_table);
-		type = BROADWELL;
-	    break;
-	case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0:
-		rc = sbridge_get_all_devices_knl(&num_mc,
-					pci_dev_descr_knl_table);
-		type = KNIGHTS_LANDING;
-		break;
-	}
 	if (unlikely(rc < 0)) {
-		edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device);
+		edac_dbg(0, "couldn't get all devices\n");
 		goto fail0;
 	}
 
@@ -3514,14 +3398,13 @@
 			 mc, mc + 1, num_mc);
 
 		sbridge_dev->mc = mc++;
-		rc = sbridge_register_mci(sbridge_dev, type);
+		rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids);
 		if (unlikely(rc < 0))
 			goto fail1;
 	}
 
 	sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION);
 
-	mutex_unlock(&sbridge_edac_lock);
 	return 0;
 
 fail1:
@@ -3530,74 +3413,47 @@
 
 	sbridge_put_all_devices();
 fail0:
-	mutex_unlock(&sbridge_edac_lock);
 	return rc;
 }
 
 /*
- *	sbridge_remove	destructor for one instance of device
+ *	sbridge_remove	cleanup
  *
  */
-static void sbridge_remove(struct pci_dev *pdev)
+static void sbridge_remove(void)
 {
 	struct sbridge_dev *sbridge_dev;
 
 	edac_dbg(0, "\n");
 
-	/*
-	 * we have a trouble here: pdev value for removal will be wrong, since
-	 * it will point to the X58 register used to detect that the machine
-	 * is a Nehalem or upper design. However, due to the way several PCI
-	 * devices are grouped together to provide MC functionality, we need
-	 * to use a different method for releasing the devices
-	 */
-
-	mutex_lock(&sbridge_edac_lock);
-
-	if (unlikely(!probed)) {
-		mutex_unlock(&sbridge_edac_lock);
-		return;
-	}
-
 	list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
 		sbridge_unregister_mci(sbridge_dev);
 
 	/* Release PCI resources */
 	sbridge_put_all_devices();
-
-	probed--;
-
-	mutex_unlock(&sbridge_edac_lock);
 }
 
-MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
-
-/*
- *	sbridge_driver	pci_driver structure for this module
- *
- */
-static struct pci_driver sbridge_driver = {
-	.name     = "sbridge_edac",
-	.probe    = sbridge_probe,
-	.remove   = sbridge_remove,
-	.id_table = sbridge_pci_tbl,
-};
-
 /*
  *	sbridge_init		Module entry function
  *			Try to initialize this module for its devices
  */
 static int __init sbridge_init(void)
 {
-	int pci_rc;
+	const struct x86_cpu_id *id;
+	int rc;
 
 	edac_dbg(2, "\n");
 
+	id = x86_match_cpu(sbridge_cpuids);
+	if (!id)
+		return -ENODEV;
+
 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
 	opstate_init();
 
-	pci_rc = pci_register_driver(&sbridge_driver);
-	if (pci_rc >= 0) {
+	rc = sbridge_probe(id);
+
+	if (rc >= 0) {
 		mce_register_decode_chain(&sbridge_mce_dec);
 		if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
 			sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
@@ -3605,9 +3461,9 @@
 	}
 
 	sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
-		      pci_rc);
+		      rc);
 
-	return pci_rc;
+	return rc;
 }
 
 /*
@@ -3617,7 +3473,7 @@
 static void __exit sbridge_exit(void)
 {
 	edac_dbg(2, "\n");
-	pci_unregister_driver(&sbridge_driver);
+	sbridge_remove();
 	mce_unregister_decode_chain(&sbridge_mce_dec);
 }
 

diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index e1670d5..6394152 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig

@@ -87,6 +87,31 @@
 config EFI_ARMSTUB
 	bool
 
+config EFI_BOOTLOADER_CONTROL
+	tristate "EFI Bootloader Control"
+	depends on EFI_VARS
+	default n
+	---help---
+	  This module installs a reboot hook, such that if reboot() is
+	  invoked with a string argument NNN, "NNN" is copied to the
+	  "LoaderEntryOneShot" EFI variable, to be read by the
+	  bootloader. If the string matches one of the boot labels
+	  defined in its configuration, the bootloader will boot once
+	  to that label. The "LoaderEntryRebootReason" EFI variable is
+	  set with the reboot reason: "reboot" or "shutdown". The
+	  bootloader reads this reboot reason and takes particular
+	  action according to its policy.
+
+config EFI_CAPSULE_LOADER
+	tristate "EFI capsule loader"
+	depends on EFI
+	help
+	  This option exposes a loader interface "/dev/efi_capsule_loader" for
+	  users to load EFI capsules. This driver requires working runtime
+	  capsule support in the firmware, which many OEMs do not provide.
+
+	  Most users should say N.
+
 endmenu
 
 config UEFI_CPER

diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 62e654f..a219640 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile

@@ -9,7 +9,8 @@
 #
 KASAN_SANITIZE_runtime-wrappers.o	:= n
 
-obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o
+obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o memattr.o
+obj-$(CONFIG_EFI)			+= capsule.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_ESRT)			+= esrt.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
@@ -18,7 +19,9 @@
 obj-$(CONFIG_EFI_RUNTIME_WRAPPERS)	+= runtime-wrappers.o
 obj-$(CONFIG_EFI_STUB)			+= libstub/
 obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
+obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
 
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
 obj-$(CONFIG_ARM64)			+= $(arm-obj-y)
+obj-$(CONFIG_EFI_CAPSULE_LOADER)	+= capsule-loader.o

diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 8714f8c..a850cbc 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c

@@ -11,17 +11,19 @@
  *
  */
 
+#define pr_fmt(fmt)	"efi: " fmt
+
 #include <linux/efi.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/mm_types.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
 
 #include <asm/efi.h>
 
-struct efi_memory_map memmap;
-
 u64 efi_system_table;
 
 static int __init is_normal_ram(efi_memory_desc_t *md)
@@ -40,7 +42,7 @@
 {
 	efi_memory_desc_t *md;
 
-	for_each_efi_memory_desc(&memmap, md) {
+	for_each_efi_memory_desc(md) {
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;
 		if (md->virt_addr == 0)
@@ -53,6 +55,36 @@
 	return addr;
 }
 
+static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+	{LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, NULL, &screen_info_table},
+	{NULL_GUID, NULL, NULL}
+};
+
+static void __init init_screen_info(void)
+{
+	struct screen_info *si;
+
+	if (screen_info_table != EFI_INVALID_TABLE_ADDR) {
+		si = early_memremap_ro(screen_info_table, sizeof(*si));
+		if (!si) {
+			pr_err("Could not map screen_info config table\n");
+			return;
+		}
+		screen_info = *si;
+		early_memunmap(si, sizeof(*si));
+
+		/* dummycon on ARM needs non-zero values for columns/lines */
+		screen_info.orig_video_cols = 80;
+		screen_info.orig_video_lines = 25;
+	}
+
+	if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
+	    memblock_is_map_memory(screen_info.lfb_base))
+		memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size);
+}
+
 static int __init uefi_init(void)
 {
 	efi_char16_t *c16;
@@ -85,6 +117,8 @@
 			efi.systab->hdr.revision >> 16,
 			efi.systab->hdr.revision & 0xffff);
 
+	efi.runtime_version = efi.systab->hdr.revision;
+
 	/* Show what we know for posterity */
 	c16 = early_memremap_ro(efi_to_phys(efi.systab->fw_vendor),
 				sizeof(vendor) * sizeof(efi_char16_t));
@@ -108,7 +142,8 @@
 		goto out;
 	}
 	retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
-					 sizeof(efi_config_table_t), NULL);
+					 sizeof(efi_config_table_t),
+					 arch_tables);
 
 	early_memunmap(config_tables, table_size);
 out:
@@ -143,7 +178,15 @@
 	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
 
-	for_each_efi_memory_desc(&memmap, md) {
+	/*
+	 * Discard memblocks discovered so far: if there are any at this
+	 * point, they originate from memory nodes in the DT, and UEFI
+	 * uses its own memory map instead.
+	 */
+	memblock_dump_all();
+	memblock_remove(0, (phys_addr_t)ULLONG_MAX);
+
+	for_each_efi_memory_desc(md) {
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
@@ -184,9 +227,9 @@
 
 	efi_system_table = params.system_table;
 
-	memmap.phys_map = params.mmap;
-	memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
-	if (memmap.map == NULL) {
+	efi.memmap.phys_map = params.mmap;
+	efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
+	if (efi.memmap.map == NULL) {
 		/*
 		* If we are booting via UEFI, the UEFI memory map is the only
 		* description of memory we have, so there is little point in
@@ -194,28 +237,37 @@
 		*/
 		panic("Unable to map EFI memory map.\n");
 	}
-	memmap.map_end = memmap.map + params.mmap_size;
-	memmap.desc_size = params.desc_size;
-	memmap.desc_version = params.desc_ver;
+	efi.memmap.map_end = efi.memmap.map + params.mmap_size;
+	efi.memmap.desc_size = params.desc_size;
+	efi.memmap.desc_version = params.desc_ver;
+
+	WARN(efi.memmap.desc_version != 1,
+	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+	      efi.memmap.desc_version);
 
 	if (uefi_init() < 0)
 		return;
 
 	reserve_regions();
-	early_memunmap(memmap.map, params.mmap_size);
+	efi_memattr_init();
+	early_memunmap(efi.memmap.map, params.mmap_size);
 
-	if (IS_ENABLED(CONFIG_ARM)) {
-		/*
-		 * ARM currently does not allow ioremap_cache() to be called on
-		 * memory regions that are covered by struct page. So remove the
-		 * UEFI memory map from the linear mapping.
-		 */
-		memblock_mark_nomap(params.mmap & PAGE_MASK,
-				    PAGE_ALIGN(params.mmap_size +
-					       (params.mmap & ~PAGE_MASK)));
-	} else {
-		memblock_reserve(params.mmap & PAGE_MASK,
-				 PAGE_ALIGN(params.mmap_size +
-					    (params.mmap & ~PAGE_MASK)));
-	}
+	memblock_reserve(params.mmap & PAGE_MASK,
+			 PAGE_ALIGN(params.mmap_size +
+				    (params.mmap & ~PAGE_MASK)));
+
+	init_screen_info();
 }
+
+static int __init register_gop_device(void)
+{
+	void *pd;
+
+	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+		return 0;
+
+	pd = platform_device_register_data(NULL, "efi-framebuffer", 0,
+					   &screen_info, sizeof(screen_info));
+	return PTR_ERR_OR_ZERO(pd);
+}
+subsys_initcall(register_gop_device);

diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 6ae21e4..17ccf0a 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c

@@ -42,11 +42,13 @@
 static bool __init efi_virtmap_init(void)
 {
 	efi_memory_desc_t *md;
+	bool systab_found;
 
 	efi_mm.pgd = pgd_alloc(&efi_mm);
 	init_new_context(NULL, &efi_mm);
 
-	for_each_efi_memory_desc(&memmap, md) {
+	systab_found = false;
+	for_each_efi_memory_desc(md) {
 		phys_addr_t phys = md->phys_addr;
 		int ret;
 
@@ -64,7 +66,25 @@
 				&phys, ret);
 			return false;
 		}
+		/*
+		 * If this entry covers the address of the UEFI system table,
+		 * calculate and record its virtual address.
+		 */
+		if (efi_system_table >= phys &&
+		    efi_system_table < phys + (md->num_pages * EFI_PAGE_SIZE)) {
+			efi.systab = (void *)(unsigned long)(efi_system_table -
+							     phys + md->virt_addr);
+			systab_found = true;
+		}
 	}
+	if (!systab_found) {
+		pr_err("No virtual mapping found for the UEFI System Table\n");
+		return false;
+	}
+
+	if (efi_memattr_apply_permissions(&efi_mm, efi_set_mapping_permissions))
+		return false;
+
 	return true;
 }
 
@@ -89,26 +109,17 @@
 
 	pr_info("Remapping and enabling EFI services.\n");
 
-	mapsize = memmap.map_end - memmap.map;
-	memmap.map = (__force void *)ioremap_cache(memmap.phys_map,
-						   mapsize);
-	if (!memmap.map) {
+	mapsize = efi.memmap.map_end - efi.memmap.map;
+
+	efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB);
+	if (!efi.memmap.map) {
 		pr_err("Failed to remap EFI memory map\n");
 		return -ENOMEM;
 	}
-	memmap.map_end = memmap.map + mapsize;
-	efi.memmap = &memmap;
-
-	efi.systab = (__force void *)ioremap_cache(efi_system_table,
-						   sizeof(efi_system_table_t));
-	if (!efi.systab) {
-		pr_err("Failed to remap EFI System Table\n");
-		return -ENOMEM;
-	}
-	set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+	efi.memmap.map_end = efi.memmap.map + mapsize;
 
 	if (!efi_virtmap_init()) {
-		pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
+		pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
 		return -ENOMEM;
 	}
 
@@ -116,8 +127,6 @@
 	efi_native_runtime_setup();
 	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 
-	efi.runtime_version = efi.systab->hdr.revision;
-
 	return 0;
 }
 early_initcall(arm_enable_runtime_services);

diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
new file mode 100644
index 0000000..c99c24b
--- /dev/null
+++ b/drivers/firmware/efi/capsule-loader.c

@@ -0,0 +1,343 @@
+/*
+ * EFI capsule loader driver.
+ *
+ * Copyright 2015 Intel Corporation
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/efi.h>
+
+#define NO_FURTHER_WRITE_ACTION -1
+
+struct capsule_info {
+	bool		header_obtained;
+	int		reset_type;
+	long		index;
+	size_t		count;
+	size_t		total_size;
+	struct page	**pages;
+	size_t		page_bytes_remain;
+};
+
+/**
+ * efi_free_all_buff_pages - free all previous allocated buffer pages
+ * @cap_info: pointer to current instance of capsule_info structure
+ *
+ *	In addition to freeing buffer pages, it flags NO_FURTHER_WRITE_ACTION
+ *	to cease processing data in subsequent write(2) calls until close(2)
+ *	is called.
+ **/
+static void efi_free_all_buff_pages(struct capsule_info *cap_info)
+{
+	while (cap_info->index > 0)
+		__free_page(cap_info->pages[--cap_info->index]);
+
+	cap_info->index = NO_FURTHER_WRITE_ACTION;
+}
+
+/**
+ * efi_capsule_setup_info - obtain the efi capsule header in the binary and
+ *			    setup capsule_info structure
+ * @cap_info: pointer to current instance of capsule_info structure
+ * @kbuff: a mapped first page buffer pointer
+ * @hdr_bytes: the total received number of bytes for efi header
+ **/
+static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
+				      void *kbuff, size_t hdr_bytes)
+{
+	efi_capsule_header_t *cap_hdr;
+	size_t pages_needed;
+	int ret;
+	void *temp_page;
+
+	/* Only process data block that is larger than efi header size */
+	if (hdr_bytes < sizeof(efi_capsule_header_t))
+		return 0;
+
+	/* Reset back to the correct offset of header */
+	cap_hdr = kbuff - cap_info->count;
+	pages_needed = ALIGN(cap_hdr->imagesize, PAGE_SIZE) >> PAGE_SHIFT;
+
+	if (pages_needed == 0) {
+		pr_err("%s: pages count invalid\n", __func__);
+		return -EINVAL;
+	}
+
+	/* Check if the capsule binary supported */
+	ret = efi_capsule_supported(cap_hdr->guid, cap_hdr->flags,
+				    cap_hdr->imagesize,
+				    &cap_info->reset_type);
+	if (ret) {
+		pr_err("%s: efi_capsule_supported() failed\n",
+		       __func__);
+		return ret;
+	}
+
+	cap_info->total_size = cap_hdr->imagesize;
+	temp_page = krealloc(cap_info->pages,
+			     pages_needed * sizeof(void *),
+			     GFP_KERNEL | __GFP_ZERO);
+	if (!temp_page) {
+		pr_debug("%s: krealloc() failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	cap_info->pages = temp_page;
+	cap_info->header_obtained = true;
+
+	return 0;
+}
+
+/**
+ * efi_capsule_submit_update - invoke the efi_capsule_update API once binary
+ *			       upload done
+ * @cap_info: pointer to current instance of capsule_info structure
+ **/
+static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
+{
+	int ret;
+	void *cap_hdr_temp;
+
+	cap_hdr_temp = kmap(cap_info->pages[0]);
+	if (!cap_hdr_temp) {
+		pr_debug("%s: kmap() failed\n", __func__);
+		return -EFAULT;
+	}
+
+	ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
+	kunmap(cap_info->pages[0]);
+	if (ret) {
+		pr_err("%s: efi_capsule_update() failed\n", __func__);
+		return ret;
+	}
+
+	/* Indicate capsule binary uploading is done */
+	cap_info->index = NO_FURTHER_WRITE_ACTION;
+	pr_info("%s: Successfully upload capsule file with reboot type '%s'\n",
+		__func__, !cap_info->reset_type ? "RESET_COLD" :
+		cap_info->reset_type == 1 ? "RESET_WARM" :
+		"RESET_SHUTDOWN");
+	return 0;
+}
+
+/**
+ * efi_capsule_write - store the capsule binary and pass it to
+ *		       efi_capsule_update() API
+ * @file: file pointer
+ * @buff: buffer pointer
+ * @count: number of bytes in @buff
+ * @offp: not used
+ *
+ *	Expectation:
+ *	- A user space tool should start at the beginning of capsule binary and
+ *	  pass data in sequentially.
+ *	- Users should close and re-open this file note in order to upload more
+ *	  capsules.
+ *	- After an error returned, user should close the file and restart the
+ *	  operation for the next try otherwise -EIO will be returned until the
+ *	  file is closed.
+ *	- An EFI capsule header must be located at the beginning of capsule
+ *	  binary file and passed in as first block data of write operation.
+ **/
+static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
+				 size_t count, loff_t *offp)
+{
+	int ret = 0;
+	struct capsule_info *cap_info = file->private_data;
+	struct page *page;
+	void *kbuff = NULL;
+	size_t write_byte;
+
+	if (count == 0)
+		return 0;
+
+	/* Return error while NO_FURTHER_WRITE_ACTION is flagged */
+	if (cap_info->index < 0)
+		return -EIO;
+
+	/* Only alloc a new page when previous page is full */
+	if (!cap_info->page_bytes_remain) {
+		page = alloc_page(GFP_KERNEL);
+		if (!page) {
+			pr_debug("%s: alloc_page() failed\n", __func__);
+			ret = -ENOMEM;
+			goto failed;
+		}
+
+		cap_info->pages[cap_info->index++] = page;
+		cap_info->page_bytes_remain = PAGE_SIZE;
+	}
+
+	page = cap_info->pages[cap_info->index - 1];
+
+	kbuff = kmap(page);
+	if (!kbuff) {
+		pr_debug("%s: kmap() failed\n", __func__);
+		ret = -EFAULT;
+		goto failed;
+	}
+	kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
+
+	/* Copy capsule binary data from user space to kernel space buffer */
+	write_byte = min_t(size_t, count, cap_info->page_bytes_remain);
+	if (copy_from_user(kbuff, buff, write_byte)) {
+		pr_debug("%s: copy_from_user() failed\n", __func__);
+		ret = -EFAULT;
+		goto fail_unmap;
+	}
+	cap_info->page_bytes_remain -= write_byte;
+
+	/* Setup capsule binary info structure */
+	if (!cap_info->header_obtained) {
+		ret = efi_capsule_setup_info(cap_info, kbuff,
+					     cap_info->count + write_byte);
+		if (ret)
+			goto fail_unmap;
+	}
+
+	cap_info->count += write_byte;
+	kunmap(page);
+
+	/* Submit the full binary to efi_capsule_update() API */
+	if (cap_info->header_obtained &&
+	    cap_info->count >= cap_info->total_size) {
+		if (cap_info->count > cap_info->total_size) {
+			pr_err("%s: upload size exceeded header defined size\n",
+			       __func__);
+			ret = -EINVAL;
+			goto failed;
+		}
+
+		ret = efi_capsule_submit_update(cap_info);
+		if (ret)
+			goto failed;
+	}
+
+	return write_byte;
+
+fail_unmap:
+	kunmap(page);
+failed:
+	efi_free_all_buff_pages(cap_info);
+	return ret;
+}
+
+/**
+ * efi_capsule_flush - called by file close or file flush
+ * @file: file pointer
+ * @id: not used
+ *
+ *	If a capsule is being partially uploaded then calling this function
+ *	will be treated as upload termination and will free those completed
+ *	buffer pages and -ECANCELED will be returned.
+ **/
+static int efi_capsule_flush(struct file *file, fl_owner_t id)
+{
+	int ret = 0;
+	struct capsule_info *cap_info = file->private_data;
+
+	if (cap_info->index > 0) {
+		pr_err("%s: capsule upload not complete\n", __func__);
+		efi_free_all_buff_pages(cap_info);
+		ret = -ECANCELED;
+	}
+
+	return ret;
+}
+
+/**
+ * efi_capsule_release - called by file close
+ * @inode: not used
+ * @file: file pointer
+ *
+ *	We will not free successfully submitted pages since efi update
+ *	requires data to be maintained across system reboot.
+ **/
+static int efi_capsule_release(struct inode *inode, struct file *file)
+{
+	struct capsule_info *cap_info = file->private_data;
+
+	kfree(cap_info->pages);
+	kfree(file->private_data);
+	file->private_data = NULL;
+	return 0;
+}
+
+/**
+ * efi_capsule_open - called by file open
+ * @inode: not used
+ * @file: file pointer
+ *
+ *	Will allocate each capsule_info memory for each file open call.
+ *	This provided the capability to support multiple file open feature
+ *	where user is not needed to wait for others to finish in order to
+ *	upload their capsule binary.
+ **/
+static int efi_capsule_open(struct inode *inode, struct file *file)
+{
+	struct capsule_info *cap_info;
+
+	cap_info = kzalloc(sizeof(*cap_info), GFP_KERNEL);
+	if (!cap_info)
+		return -ENOMEM;
+
+	cap_info->pages = kzalloc(sizeof(void *), GFP_KERNEL);
+	if (!cap_info->pages) {
+		kfree(cap_info);
+		return -ENOMEM;
+	}
+
+	file->private_data = cap_info;
+
+	return 0;
+}
+
+static const struct file_operations efi_capsule_fops = {
+	.owner = THIS_MODULE,
+	.open = efi_capsule_open,
+	.write = efi_capsule_write,
+	.flush = efi_capsule_flush,
+	.release = efi_capsule_release,
+	.llseek = no_llseek,
+};
+
+static struct miscdevice efi_capsule_misc = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "efi_capsule_loader",
+	.fops = &efi_capsule_fops,
+};
+
+static int __init efi_capsule_loader_init(void)
+{
+	int ret;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
+	ret = misc_register(&efi_capsule_misc);
+	if (ret)
+		pr_err("%s: Failed to register misc char file note\n",
+		       __func__);
+
+	return ret;
+}
+module_init(efi_capsule_loader_init);
+
+static void __exit efi_capsule_loader_exit(void)
+{
+	misc_deregister(&efi_capsule_misc);
+}
+module_exit(efi_capsule_loader_exit);
+
+MODULE_DESCRIPTION("EFI capsule firmware binary loader");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
new file mode 100644
index 0000000..53b9fd2
--- /dev/null
+++ b/drivers/firmware/efi/capsule.c

@@ -0,0 +1,308 @@
+/*
+ * EFI capsule support.
+ *
+ * Copyright 2013 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/highmem.h>
+#include <linux/efi.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+
+typedef struct {
+	u64 length;
+	u64 data;
+} efi_capsule_block_desc_t;
+
+static bool capsule_pending;
+static bool stop_capsules;
+static int efi_reset_type = -1;
+
+/*
+ * capsule_mutex serialises access to both capsule_pending and
+ * efi_reset_type and stop_capsules.
+ */
+static DEFINE_MUTEX(capsule_mutex);
+
+/**
+ * efi_capsule_pending - has a capsule been passed to the firmware?
+ * @reset_type: store the type of EFI reset if capsule is pending
+ *
+ * To ensure that the registered capsule is processed correctly by the
+ * firmware we need to perform a specific type of reset. If a capsule is
+ * pending return the reset type in @reset_type.
+ *
+ * This function will race with callers of efi_capsule_update(), for
+ * example, calling this function while somebody else is in
+ * efi_capsule_update() but hasn't reached efi_capsue_update_locked()
+ * will miss the updates to capsule_pending and efi_reset_type after
+ * efi_capsule_update_locked() completes.
+ *
+ * A non-racy use is from platform reboot code because we use
+ * system_state to ensure no capsules can be sent to the firmware once
+ * we're at SYSTEM_RESTART. See efi_capsule_update_locked().
+ */
+bool efi_capsule_pending(int *reset_type)
+{
+	if (!capsule_pending)
+		return false;
+
+	if (reset_type)
+		*reset_type = efi_reset_type;
+
+	return true;
+}
+
+/*
+ * Whitelist of EFI capsule flags that we support.
+ *
+ * We do not handle EFI_CAPSULE_INITIATE_RESET because that would
+ * require us to prepare the kernel for reboot. Refuse to load any
+ * capsules with that flag and any other flags that we do not know how
+ * to handle.
+ */
+#define EFI_CAPSULE_SUPPORTED_FLAG_MASK			\
+	(EFI_CAPSULE_PERSIST_ACROSS_RESET | EFI_CAPSULE_POPULATE_SYSTEM_TABLE)
+
+/**
+ * efi_capsule_supported - does the firmware support the capsule?
+ * @guid: vendor guid of capsule
+ * @flags: capsule flags
+ * @size: size of capsule data
+ * @reset: the reset type required for this capsule
+ *
+ * Check whether a capsule with @flags is supported by the firmware
+ * and that @size doesn't exceed the maximum size for a capsule.
+ *
+ * No attempt is made to check @reset against the reset type required
+ * by any pending capsules because of the races involved.
+ */
+int efi_capsule_supported(efi_guid_t guid, u32 flags, size_t size, int *reset)
+{
+	efi_capsule_header_t capsule;
+	efi_capsule_header_t *cap_list[] = { &capsule };
+	efi_status_t status;
+	u64 max_size;
+
+	if (flags & ~EFI_CAPSULE_SUPPORTED_FLAG_MASK)
+		return -EINVAL;
+
+	capsule.headersize = capsule.imagesize = sizeof(capsule);
+	memcpy(&capsule.guid, &guid, sizeof(efi_guid_t));
+	capsule.flags = flags;
+
+	status = efi.query_capsule_caps(cap_list, 1, &max_size, reset);
+	if (status != EFI_SUCCESS)
+		return efi_status_to_err(status);
+
+	if (size > max_size)
+		return -ENOSPC;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_supported);
+
+/*
+ * Every scatter gather list (block descriptor) page must end with a
+ * continuation pointer. The last continuation pointer of the last
+ * page must be zero to mark the end of the chain.
+ */
+#define SGLIST_PER_PAGE	((PAGE_SIZE / sizeof(efi_capsule_block_desc_t)) - 1)
+
+/*
+ * How many scatter gather list (block descriptor) pages do we need
+ * to map @count pages?
+ */
+static inline unsigned int sg_pages_num(unsigned int count)
+{
+	return DIV_ROUND_UP(count, SGLIST_PER_PAGE);
+}
+
+/**
+ * efi_capsule_update_locked - pass a single capsule to the firmware
+ * @capsule: capsule to send to the firmware
+ * @sg_pages: array of scatter gather (block descriptor) pages
+ * @reset: the reset type required for @capsule
+ *
+ * Since this function must be called under capsule_mutex check
+ * whether efi_reset_type will conflict with @reset, and atomically
+ * set it and capsule_pending if a capsule was successfully sent to
+ * the firmware.
+ *
+ * We also check to see if the system is about to restart, and if so,
+ * abort. This avoids races between efi_capsule_update() and
+ * efi_capsule_pending().
+ */
+static int
+efi_capsule_update_locked(efi_capsule_header_t *capsule,
+			  struct page **sg_pages, int reset)
+{
+	efi_physical_addr_t sglist_phys;
+	efi_status_t status;
+
+	lockdep_assert_held(&capsule_mutex);
+
+	/*
+	 * If someone has already registered a capsule that requires a
+	 * different reset type, we're out of luck and must abort.
+	 */
+	if (efi_reset_type >= 0 && efi_reset_type != reset) {
+		pr_err("Conflicting capsule reset type %d (%d).\n",
+		       reset, efi_reset_type);
+		return -EINVAL;
+	}
+
+	/*
+	 * If the system is getting ready to restart it may have
+	 * called efi_capsule_pending() to make decisions (such as
+	 * whether to force an EFI reboot), and we're racing against
+	 * that call. Abort in that case.
+	 */
+	if (unlikely(stop_capsules)) {
+		pr_warn("Capsule update raced with reboot, aborting.\n");
+		return -EINVAL;
+	}
+
+	sglist_phys = page_to_phys(sg_pages[0]);
+
+	status = efi.update_capsule(&capsule, 1, sglist_phys);
+	if (status == EFI_SUCCESS) {
+		capsule_pending = true;
+		efi_reset_type = reset;
+	}
+
+	return efi_status_to_err(status);
+}
+
+/**
+ * efi_capsule_update - send a capsule to the firmware
+ * @capsule: capsule to send to firmware
+ * @pages: an array of capsule data pages
+ *
+ * Build a scatter gather list with EFI capsule block descriptors to
+ * map the capsule described by @capsule with its data in @pages and
+ * send it to the firmware via the UpdateCapsule() runtime service.
+ *
+ * @capsule must be a virtual mapping of the first page in @pages
+ * (@pages[0]) in the kernel address space. That is, a
+ * capsule_header_t that describes the entire contents of the capsule
+ * must be at the start of the first data page.
+ *
+ * Even though this function will validate that the firmware supports
+ * the capsule guid, users will likely want to check that
+ * efi_capsule_supported() returns true before calling this function
+ * because it makes it easier to print helpful error messages.
+ *
+ * If the capsule is successfully submitted to the firmware, any
+ * subsequent calls to efi_capsule_pending() will return true. @pages
+ * must not be released or modified if this function returns
+ * successfully.
+ *
+ * Callers must be prepared for this function to fail, which can
+ * happen if we raced with system reboot or if there is already a
+ * pending capsule that has a reset type that conflicts with the one
+ * required by @capsule. Do NOT use efi_capsule_pending() to detect
+ * this conflict since that would be racy. Instead, submit the capsule
+ * to efi_capsule_update() and check the return value.
+ *
+ * Return 0 on success, a converted EFI status code on failure.
+ */
+int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+{
+	u32 imagesize = capsule->imagesize;
+	efi_guid_t guid = capsule->guid;
+	unsigned int count, sg_count;
+	u32 flags = capsule->flags;
+	struct page **sg_pages;
+	int rv, reset_type;
+	int i, j;
+
+	rv = efi_capsule_supported(guid, flags, imagesize, &reset_type);
+	if (rv)
+		return rv;
+
+	count = DIV_ROUND_UP(imagesize, PAGE_SIZE);
+	sg_count = sg_pages_num(count);
+
+	sg_pages = kzalloc(sg_count * sizeof(*sg_pages), GFP_KERNEL);
+	if (!sg_pages)
+		return -ENOMEM;
+
+	for (i = 0; i < sg_count; i++) {
+		sg_pages[i] = alloc_page(GFP_KERNEL);
+		if (!sg_pages[i]) {
+			rv = -ENOMEM;
+			goto out;
+		}
+	}
+
+	for (i = 0; i < sg_count; i++) {
+		efi_capsule_block_desc_t *sglist;
+
+		sglist = kmap(sg_pages[i]);
+		if (!sglist) {
+			rv = -ENOMEM;
+			goto out;
+		}
+
+		for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
+			u64 sz = min_t(u64, imagesize, PAGE_SIZE);
+
+			sglist[j].length = sz;
+			sglist[j].data = page_to_phys(*pages++);
+
+			imagesize -= sz;
+			count--;
+		}
+
+		/* Continuation pointer */
+		sglist[j].length = 0;
+
+		if (i + 1 == sg_count)
+			sglist[j].data = 0;
+		else
+			sglist[j].data = page_to_phys(sg_pages[i + 1]);
+
+		kunmap(sg_pages[i]);
+	}
+
+	mutex_lock(&capsule_mutex);
+	rv = efi_capsule_update_locked(capsule, sg_pages, reset_type);
+	mutex_unlock(&capsule_mutex);
+
+out:
+	for (i = 0; rv && i < sg_count; i++) {
+		if (sg_pages[i])
+			__free_page(sg_pages[i]);
+	}
+
+	kfree(sg_pages);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_update);
+
+static int capsule_reboot_notify(struct notifier_block *nb, unsigned long event, void *cmd)
+{
+	mutex_lock(&capsule_mutex);
+	stop_capsules = true;
+	mutex_unlock(&capsule_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block capsule_reboot_nb = {
+	.notifier_call = capsule_reboot_notify,
+};
+
+static int __init capsule_reboot_register(void)
+{
+	return register_reboot_notifier(&capsule_reboot_nb);
+}
+core_initcall(capsule_reboot_register);

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 3a69ed5..05509f3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c

@@ -43,6 +43,7 @@
 	.config_table		= EFI_INVALID_TABLE_ADDR,
 	.esrt			= EFI_INVALID_TABLE_ADDR,
 	.properties_table	= EFI_INVALID_TABLE_ADDR,
+	.mem_attr_table		= EFI_INVALID_TABLE_ADDR,
 };
 EXPORT_SYMBOL(efi);
 
@@ -256,7 +257,7 @@
  */
 int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
 {
-	struct efi_memory_map *map = efi.memmap;
+	struct efi_memory_map *map = &efi.memmap;
 	phys_addr_t p, e;
 
 	if (!efi_enabled(EFI_MEMMAP)) {
@@ -338,6 +339,7 @@
 	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
 	{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
 	{EFI_PROPERTIES_TABLE_GUID, "PROP", &efi.properties_table},
+	{EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table},
 	{NULL_GUID, NULL, NULL},
 };
 
@@ -351,8 +353,9 @@
 		for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
 			if (!efi_guidcmp(*guid, table_types[i].guid)) {
 				*(table_types[i].ptr) = table;
-				pr_cont(" %s=0x%lx ",
-					table_types[i].name, table);
+				if (table_types[i].name)
+					pr_cont(" %s=0x%lx ",
+						table_types[i].name, table);
 				return 1;
 			}
 		}
@@ -620,16 +623,12 @@
  */
 u64 __weak efi_mem_attributes(unsigned long phys_addr)
 {
-	struct efi_memory_map *map;
 	efi_memory_desc_t *md;
-	void *p;
 
 	if (!efi_enabled(EFI_MEMMAP))
 		return 0;
 
-	map = efi.memmap;
-	for (p = map->map; p < map->map_end; p += map->desc_size) {
-		md = p;
+	for_each_efi_memory_desc(md) {
 		if ((md->phys_addr <= phys_addr) &&
 		    (phys_addr < (md->phys_addr +
 		    (md->num_pages << EFI_PAGE_SHIFT))))
@@ -637,3 +636,36 @@
 	}
 	return 0;
 }
+
+int efi_status_to_err(efi_status_t status)
+{
+	int err;
+
+	switch (status) {
+	case EFI_SUCCESS:
+		err = 0;
+		break;
+	case EFI_INVALID_PARAMETER:
+		err = -EINVAL;
+		break;
+	case EFI_OUT_OF_RESOURCES:
+		err = -ENOSPC;
+		break;
+	case EFI_DEVICE_ERROR:
+		err = -EIO;
+		break;
+	case EFI_WRITE_PROTECTED:
+		err = -EROFS;
+		break;
+	case EFI_SECURITY_VIOLATION:
+		err = -EACCES;
+		break;
+	case EFI_NOT_FOUND:
+		err = -ENOENT;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	return err;
+}

diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c
new file mode 100644
index 0000000..8dd0c70
--- /dev/null
+++ b/drivers/firmware/efi/efibc.c

@@ -0,0 +1,113 @@
+/*
+ * efibc: control EFI bootloaders which obey LoaderEntryOneShot var
+ * Copyright (c) 2013-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt) "efibc: " fmt
+
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+
+static void efibc_str_to_str16(const char *str, efi_char16_t *str16)
+{
+	size_t i;
+
+	for (i = 0; i < strlen(str); i++)
+		str16[i] = str[i];
+
+	str16[i] = '\0';
+}
+
+static int efibc_set_variable(const char *name, const char *value)
+{
+	int ret;
+	efi_guid_t guid = LINUX_EFI_LOADER_ENTRY_GUID;
+	struct efivar_entry *entry;
+	size_t size = (strlen(value) + 1) * sizeof(efi_char16_t);
+
+	if (size > sizeof(entry->var.Data)) {
+		pr_err("value is too large");
+		return -EINVAL;
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		pr_err("failed to allocate efivar entry");
+		return -ENOMEM;
+	}
+
+	efibc_str_to_str16(name, entry->var.VariableName);
+	efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
+	memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
+
+	ret = efivar_entry_set(entry,
+			       EFI_VARIABLE_NON_VOLATILE
+			       | EFI_VARIABLE_BOOTSERVICE_ACCESS
+			       | EFI_VARIABLE_RUNTIME_ACCESS,
+			       size, entry->var.Data, NULL);
+	if (ret)
+		pr_err("failed to set %s EFI variable: 0x%x\n",
+		       name, ret);
+
+	kfree(entry);
+	return ret;
+}
+
+static int efibc_reboot_notifier_call(struct notifier_block *notifier,
+				      unsigned long event, void *data)
+{
+	const char *reason = "shutdown";
+	int ret;
+
+	if (event == SYS_RESTART)
+		reason = "reboot";
+
+	ret = efibc_set_variable("LoaderEntryRebootReason", reason);
+	if (ret || !data)
+		return NOTIFY_DONE;
+
+	efibc_set_variable("LoaderEntryOneShot", (char *)data);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block efibc_reboot_notifier = {
+	.notifier_call = efibc_reboot_notifier_call,
+};
+
+static int __init efibc_init(void)
+{
+	int ret;
+
+	if (!efi_enabled(EFI_RUNTIME_SERVICES))
+		return -ENODEV;
+
+	ret = register_reboot_notifier(&efibc_reboot_notifier);
+	if (ret)
+		pr_err("unable to register reboot notifier\n");
+
+	return ret;
+}
+module_init(efibc_init);
+
+static void __exit efibc_exit(void)
+{
+	unregister_reboot_notifier(&efibc_reboot_notifier);
+}
+module_exit(efibc_exit);
+
+MODULE_AUTHOR("Jeremy Compostella <jeremy.compostella@intel.com>");
+MODULE_AUTHOR("Matt Gumbel <matthew.k.gumbel@intel.com");
+MODULE_DESCRIPTION("EFI Bootloader Control");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 096adcb..116b244 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c

@@ -661,7 +661,7 @@
 			return;
 
 		err = efivar_init(efivar_update_sysfs_entry, entry,
-				  true, false, &efivar_sysfs_list);
+				  false, &efivar_sysfs_list);
 		if (!err)
 			break;
 
@@ -730,8 +730,7 @@
 		return -ENOMEM;
 	}
 
-	efivar_init(efivars_sysfs_callback, NULL, false,
-		    true, &efivar_sysfs_list);
+	efivar_init(efivars_sysfs_callback, NULL, true, &efivar_sysfs_list);
 
 	error = create_efivars_bin_attributes();
 	if (error) {

diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index ed3a854..48430ab 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c

@@ -57,7 +57,7 @@
 void __init efi_fake_memmap(void)
 {
 	u64 start, end, m_start, m_end, m_attr;
-	int new_nr_map = memmap.nr_map;
+	int new_nr_map = efi.memmap.nr_map;
 	efi_memory_desc_t *md;
 	phys_addr_t new_memmap_phy;
 	void *new_memmap;
@@ -68,8 +68,7 @@
 		return;
 
 	/* count up the number of EFI memory descriptor */
-	for (old = memmap.map; old < memmap.map_end; old += memmap.desc_size) {
-		md = old;
+	for_each_efi_memory_desc(md) {
 		start = md->phys_addr;
 		end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
 
@@ -95,25 +94,25 @@
 	}
 
 	/* allocate memory for new EFI memmap */
-	new_memmap_phy = memblock_alloc(memmap.desc_size * new_nr_map,
+	new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map,
 					PAGE_SIZE);
 	if (!new_memmap_phy)
 		return;
 
 	/* create new EFI memmap */
 	new_memmap = early_memremap(new_memmap_phy,
-				    memmap.desc_size * new_nr_map);
+				    efi.memmap.desc_size * new_nr_map);
 	if (!new_memmap) {
-		memblock_free(new_memmap_phy, memmap.desc_size * new_nr_map);
+		memblock_free(new_memmap_phy, efi.memmap.desc_size * new_nr_map);
 		return;
 	}
 
-	for (old = memmap.map, new = new_memmap;
-	     old < memmap.map_end;
-	     old += memmap.desc_size, new += memmap.desc_size) {
+	for (old = efi.memmap.map, new = new_memmap;
+	     old < efi.memmap.map_end;
+	     old += efi.memmap.desc_size, new += efi.memmap.desc_size) {
 
 		/* copy original EFI memory descriptor */
-		memcpy(new, old, memmap.desc_size);
+		memcpy(new, old, efi.memmap.desc_size);
 		md = new;
 		start = md->phys_addr;
 		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
@@ -134,8 +133,8 @@
 				md->num_pages = (m_end - md->phys_addr + 1) >>
 					EFI_PAGE_SHIFT;
 				/* latter part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->phys_addr = m_end + 1;
 				md->num_pages = (end - md->phys_addr + 1) >>
@@ -147,16 +146,16 @@
 				md->num_pages = (m_start - md->phys_addr) >>
 					EFI_PAGE_SHIFT;
 				/* middle part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->attribute |= m_attr;
 				md->phys_addr = m_start;
 				md->num_pages = (m_end - m_start + 1) >>
 					EFI_PAGE_SHIFT;
 				/* last part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->phys_addr = m_end + 1;
 				md->num_pages = (end - m_end) >>
@@ -169,8 +168,8 @@
 				md->num_pages = (m_start - md->phys_addr) >>
 					EFI_PAGE_SHIFT;
 				/* latter part */
-				new += memmap.desc_size;
-				memcpy(new, old, memmap.desc_size);
+				new += efi.memmap.desc_size;
+				memcpy(new, old, efi.memmap.desc_size);
 				md = new;
 				md->phys_addr = m_start;
 				md->num_pages = (end - md->phys_addr + 1) >>
@@ -182,10 +181,10 @@
 
 	/* swap into new EFI memmap */
 	efi_unmap_memmap();
-	memmap.map = new_memmap;
-	memmap.phys_map = new_memmap_phy;
-	memmap.nr_map = new_nr_map;
-	memmap.map_end = memmap.map + memmap.nr_map * memmap.desc_size;
+	efi.memmap.map = new_memmap;
+	efi.memmap.phys_map = new_memmap_phy;
+	efi.memmap.nr_map = new_nr_map;
+	efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size;
 	set_bit(EFI_MEMMAP, &efi.flags);
 
 	/* print new EFI memmap */

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index da99bbb..c069451 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile

@@ -28,7 +28,7 @@
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT			:= n
 
-lib-y				:= efi-stub-helper.o
+lib-y				:= efi-stub-helper.o gop.o
 
 # include the stub's generic dependencies from lib/ when building for ARM/arm64
 arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c

diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 414deb8..993aa56 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c

@@ -20,27 +20,49 @@
 
 bool __nokaslr;
 
-static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
+static int efi_get_secureboot(efi_system_table_t *sys_table_arg)
 {
-	static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
-	static efi_char16_t const var_name[] = {
+	static efi_char16_t const sb_var_name[] = {
 		'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 };
+	static efi_char16_t const sm_var_name[] = {
+		'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 };
 
+	efi_guid_t var_guid = EFI_GLOBAL_VARIABLE_GUID;
 	efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable;
-	unsigned long size = sizeof(u8);
-	efi_status_t status;
 	u8 val;
+	unsigned long size = sizeof(val);
+	efi_status_t status;
 
-	status = f_getvar((efi_char16_t *)var_name, (efi_guid_t *)&var_guid,
+	status = f_getvar((efi_char16_t *)sb_var_name, (efi_guid_t *)&var_guid,
 			  NULL, &size, &val);
 
+	if (status != EFI_SUCCESS)
+		goto out_efi_err;
+
+	if (val == 0)
+		return 0;
+
+	status = f_getvar((efi_char16_t *)sm_var_name, (efi_guid_t *)&var_guid,
+			  NULL, &size, &val);
+
+	if (status != EFI_SUCCESS)
+		goto out_efi_err;
+
+	if (val == 1)
+		return 0;
+
+	return 1;
+
+out_efi_err:
 	switch (status) {
-	case EFI_SUCCESS:
-		return val;
 	case EFI_NOT_FOUND:
 		return 0;
+	case EFI_DEVICE_ERROR:
+		return -EIO;
+	case EFI_SECURITY_VIOLATION:
+		return -EACCES;
 	default:
-		return 1;
+		return -EINVAL;
 	}
 }
 
@@ -147,6 +169,25 @@
 	out->output_string(out, str);
 }
 
+static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg)
+{
+	efi_guid_t gop_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+	efi_status_t status;
+	unsigned long size;
+	void **gop_handle = NULL;
+	struct screen_info *si = NULL;
+
+	size = 0;
+	status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+				&gop_proto, NULL, &size, gop_handle);
+	if (status == EFI_BUFFER_TOO_SMALL) {
+		si = alloc_screen_info(sys_table_arg);
+		if (!si)
+			return NULL;
+		efi_setup_gop(sys_table_arg, si, &gop_proto, size);
+	}
+	return si;
+}
 
 /*
  * This function handles the architcture specific differences between arm and
@@ -185,6 +226,8 @@
 	efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID;
 	unsigned long reserve_addr = 0;
 	unsigned long reserve_size = 0;
+	int secure_boot = 0;
+	struct screen_info *si;
 
 	/* Check if we were booted by the EFI firmware */
 	if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
@@ -237,6 +280,8 @@
 			__nokaslr = true;
 	}
 
+	si = setup_graphics(sys_table);
+
 	status = handle_kernel_image(sys_table, image_addr, &image_size,
 				     &reserve_addr,
 				     &reserve_size,
@@ -250,12 +295,21 @@
 	if (status != EFI_SUCCESS)
 		pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
 
+	secure_boot = efi_get_secureboot(sys_table);
+	if (secure_boot > 0)
+		pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+
+	if (secure_boot < 0) {
+		pr_efi_err(sys_table,
+			"could not determine UEFI Secure Boot status.\n");
+	}
+
 	/*
 	 * Unauthenticated device tree data is a security hazard, so
 	 * ignore 'dtb=' unless UEFI Secure Boot is disabled.
 	 */
-	if (efi_secureboot_enabled(sys_table)) {
-		pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+	if (secure_boot != 0 && strstr(cmdline_ptr, "dtb=")) {
+		pr_efi(sys_table, "Ignoring DTB from command line.\n");
 	} else {
 		status = handle_cmdline_files(sys_table, image, cmdline_ptr,
 					      "dtb=",
@@ -309,6 +363,7 @@
 	efi_free(sys_table, image_size, *image_addr);
 	efi_free(sys_table, reserve_size, reserve_addr);
 fail_free_cmdline:
+	free_screen_info(sys_table, si);
 	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
 fail:
 	return EFI_ERROR;

diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 6f42be4..e1f0b28 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c

@@ -26,6 +26,43 @@
 	return EFI_SUCCESS;
 }
 
+static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID;
+
+struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg)
+{
+	struct screen_info *si;
+	efi_status_t status;
+
+	/*
+	 * Unlike on arm64, where we can directly fill out the screen_info
+	 * structure from the stub, we need to allocate a buffer to hold
+	 * its contents while we hand over to the kernel proper from the
+	 * decompressor.
+	 */
+	status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+				sizeof(*si), (void **)&si);
+
+	if (status != EFI_SUCCESS)
+		return NULL;
+
+	status = efi_call_early(install_configuration_table,
+				&screen_info_guid, si);
+	if (status == EFI_SUCCESS)
+		return si;
+
+	efi_call_early(free_pool, si);
+	return NULL;
+}
+
+void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si)
+{
+	if (!si)
+		return;
+
+	efi_call_early(install_configuration_table, &screen_info_guid, NULL);
+	efi_call_early(free_pool, si);
+}
+
 efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
 				 unsigned long *image_addr,
 				 unsigned long *image_size,

diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index a90f645..eae693e 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c

@@ -81,15 +81,24 @@
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
 		/*
+		 * If CONFIG_DEBUG_ALIGN_RODATA is not set, produce a
+		 * displacement in the interval [0, MIN_KIMG_ALIGN) that
+		 * is a multiple of the minimal segment alignment (SZ_64K)
+		 */
+		u32 mask = (MIN_KIMG_ALIGN - 1) & ~(SZ_64K - 1);
+		u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ?
+			     (phys_seed >> 32) & mask : TEXT_OFFSET;
+
+		/*
 		 * If KASLR is enabled, and we have some randomness available,
 		 * locate the kernel at a randomized offset in physical memory.
 		 */
-		*reserve_size = kernel_memsize + TEXT_OFFSET;
+		*reserve_size = kernel_memsize + offset;
 		status = efi_random_alloc(sys_table_arg, *reserve_size,
 					  MIN_KIMG_ALIGN, reserve_addr,
-					  phys_seed);
+					  (u32)phys_seed);
 
-		*image_addr = *reserve_addr + TEXT_OFFSET;
+		*image_addr = *reserve_addr + offset;
 	} else {
 		/*
 		 * Else, try a straight allocation at the preferred offset.

diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 29ed2f9..3bd127f9 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c

@@ -125,10 +125,12 @@
 
 	map.map_end = map.map + map_size;
 
-	for_each_efi_memory_desc(&map, md)
-		if (md->attribute & EFI_MEMORY_WB)
+	for_each_efi_memory_desc_in_map(&map, md) {
+		if (md->attribute & EFI_MEMORY_WB) {
 			if (membase > md->phys_addr)
 				membase = md->phys_addr;
+		}
+	}
 
 	efi_call_early(free_pool, map.map);
 

diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 6dba78a..e58abfa 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c

@@ -24,7 +24,7 @@
 			unsigned long map_size, unsigned long desc_size,
 			u32 desc_ver)
 {
-	int node, prev, num_rsv;
+	int node, num_rsv;
 	int status;
 	u32 fdt_val32;
 	u64 fdt_val64;
@@ -54,28 +54,6 @@
 		goto fdt_set_fail;
 
 	/*
-	 * Delete any memory nodes present. We must delete nodes which
-	 * early_init_dt_scan_memory may try to use.
-	 */
-	prev = 0;
-	for (;;) {
-		const char *type;
-		int len;
-
-		node = fdt_next_node(fdt, prev, NULL);
-		if (node < 0)
-			break;
-
-		type = fdt_getprop(fdt, node, "device_type", &len);
-		if (type && strncmp(type, "memory", len) == 0) {
-			fdt_del_node(fdt, node);
-			continue;
-		}
-
-		prev = node;
-	}
-
-	/*
 	 * Delete all memory reserve map entries. When booting via UEFI,
 	 * kernel will use the UEFI memory map to find reserved regions.
 	 */

diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
new file mode 100644
index 0000000..932742e
--- /dev/null
+++ b/drivers/firmware/efi/libstub/gop.c

@@ -0,0 +1,354 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2011 Intel Corporation; author Matt Fleming
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/efi.h>
+#include <linux/screen_info.h>
+#include <asm/efi.h>
+#include <asm/setup.h>
+
+static void find_bits(unsigned long mask, u8 *pos, u8 *size)
+{
+	u8 first, len;
+
+	first = 0;
+	len = 0;
+
+	if (mask) {
+		while (!(mask & 0x1)) {
+			mask = mask >> 1;
+			first++;
+		}
+
+		while (mask & 0x1) {
+			mask = mask >> 1;
+			len++;
+		}
+	}
+
+	*pos = first;
+	*size = len;
+}
+
+static void
+setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
+		 struct efi_pixel_bitmask pixel_info, int pixel_format)
+{
+	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
+		si->lfb_depth = 32;
+		si->lfb_linelength = pixels_per_scan_line * 4;
+		si->red_size = 8;
+		si->red_pos = 0;
+		si->green_size = 8;
+		si->green_pos = 8;
+		si->blue_size = 8;
+		si->blue_pos = 16;
+		si->rsvd_size = 8;
+		si->rsvd_pos = 24;
+	} else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
+		si->lfb_depth = 32;
+		si->lfb_linelength = pixels_per_scan_line * 4;
+		si->red_size = 8;
+		si->red_pos = 16;
+		si->green_size = 8;
+		si->green_pos = 8;
+		si->blue_size = 8;
+		si->blue_pos = 0;
+		si->rsvd_size = 8;
+		si->rsvd_pos = 24;
+	} else if (pixel_format == PIXEL_BIT_MASK) {
+		find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
+		find_bits(pixel_info.green_mask, &si->green_pos,
+			  &si->green_size);
+		find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
+		find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
+			  &si->rsvd_size);
+		si->lfb_depth = si->red_size + si->green_size +
+			si->blue_size + si->rsvd_size;
+		si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
+	} else {
+		si->lfb_depth = 4;
+		si->lfb_linelength = si->lfb_width / 2;
+		si->red_size = 0;
+		si->red_pos = 0;
+		si->green_size = 0;
+		si->green_pos = 0;
+		si->blue_size = 0;
+		si->blue_pos = 0;
+		si->rsvd_size = 0;
+		si->rsvd_pos = 0;
+	}
+}
+
+static efi_status_t
+__gop_query32(efi_system_table_t *sys_table_arg,
+	      struct efi_graphics_output_protocol_32 *gop32,
+	      struct efi_graphics_output_mode_info **info,
+	      unsigned long *size, u64 *fb_base)
+{
+	struct efi_graphics_output_protocol_mode_32 *mode;
+	efi_graphics_output_protocol_query_mode query_mode;
+	efi_status_t status;
+	unsigned long m;
+
+	m = gop32->mode;
+	mode = (struct efi_graphics_output_protocol_mode_32 *)m;
+	query_mode = (void *)(unsigned long)gop32->query_mode;
+
+	status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size,
+				  info);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	*fb_base = mode->frame_buffer_base;
+	return status;
+}
+
+static efi_status_t
+setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
+            efi_guid_t *proto, unsigned long size, void **gop_handle)
+{
+	struct efi_graphics_output_protocol_32 *gop32, *first_gop;
+	unsigned long nr_gops;
+	u16 width, height;
+	u32 pixels_per_scan_line;
+	u32 ext_lfb_base;
+	u64 fb_base;
+	struct efi_pixel_bitmask pixel_info;
+	int pixel_format;
+	efi_status_t status = EFI_NOT_FOUND;
+	u32 *handles = (u32 *)(unsigned long)gop_handle;
+	int i;
+
+	first_gop = NULL;
+	gop32 = NULL;
+
+	nr_gops = size / sizeof(u32);
+	for (i = 0; i < nr_gops; i++) {
+		struct efi_graphics_output_mode_info *info = NULL;
+		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+		bool conout_found = false;
+		void *dummy = NULL;
+		efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
+		u64 current_fb_base;
+
+		status = efi_call_early(handle_protocol, h,
+					proto, (void **)&gop32);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		status = efi_call_early(handle_protocol, h,
+					&conout_proto, &dummy);
+		if (status == EFI_SUCCESS)
+			conout_found = true;
+
+		status = __gop_query32(sys_table_arg, gop32, &info, &size,
+				       &current_fb_base);
+		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+			/*
+			 * Systems that use the UEFI Console Splitter may
+			 * provide multiple GOP devices, not all of which are
+			 * backed by real hardware. The workaround is to search
+			 * for a GOP implementing the ConOut protocol, and if
+			 * one isn't found, to just fall back to the first GOP.
+			 */
+			width = info->horizontal_resolution;
+			height = info->vertical_resolution;
+			pixel_format = info->pixel_format;
+			pixel_info = info->pixel_information;
+			pixels_per_scan_line = info->pixels_per_scan_line;
+			fb_base = current_fb_base;
+
+			/*
+			 * Once we've found a GOP supporting ConOut,
+			 * don't bother looking any further.
+			 */
+			first_gop = gop32;
+			if (conout_found)
+				break;
+		}
+	}
+
+	/* Did we find any GOPs? */
+	if (!first_gop)
+		goto out;
+
+	/* EFI framebuffer */
+	si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+	si->lfb_width = width;
+	si->lfb_height = height;
+	si->lfb_base = fb_base;
+
+	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+	if (ext_lfb_base) {
+		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+		si->ext_lfb_base = ext_lfb_base;
+	}
+
+	si->pages = 1;
+
+	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+
+	si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+out:
+	return status;
+}
+
+static efi_status_t
+__gop_query64(efi_system_table_t *sys_table_arg,
+	      struct efi_graphics_output_protocol_64 *gop64,
+	      struct efi_graphics_output_mode_info **info,
+	      unsigned long *size, u64 *fb_base)
+{
+	struct efi_graphics_output_protocol_mode_64 *mode;
+	efi_graphics_output_protocol_query_mode query_mode;
+	efi_status_t status;
+	unsigned long m;
+
+	m = gop64->mode;
+	mode = (struct efi_graphics_output_protocol_mode_64 *)m;
+	query_mode = (void *)(unsigned long)gop64->query_mode;
+
+	status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size,
+				  info);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	*fb_base = mode->frame_buffer_base;
+	return status;
+}
+
+static efi_status_t
+setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
+	    efi_guid_t *proto, unsigned long size, void **gop_handle)
+{
+	struct efi_graphics_output_protocol_64 *gop64, *first_gop;
+	unsigned long nr_gops;
+	u16 width, height;
+	u32 pixels_per_scan_line;
+	u32 ext_lfb_base;
+	u64 fb_base;
+	struct efi_pixel_bitmask pixel_info;
+	int pixel_format;
+	efi_status_t status = EFI_NOT_FOUND;
+	u64 *handles = (u64 *)(unsigned long)gop_handle;
+	int i;
+
+	first_gop = NULL;
+	gop64 = NULL;
+
+	nr_gops = size / sizeof(u64);
+	for (i = 0; i < nr_gops; i++) {
+		struct efi_graphics_output_mode_info *info = NULL;
+		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+		bool conout_found = false;
+		void *dummy = NULL;
+		efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
+		u64 current_fb_base;
+
+		status = efi_call_early(handle_protocol, h,
+					proto, (void **)&gop64);
+		if (status != EFI_SUCCESS)
+			continue;
+
+		status = efi_call_early(handle_protocol, h,
+					&conout_proto, &dummy);
+		if (status == EFI_SUCCESS)
+			conout_found = true;
+
+		status = __gop_query64(sys_table_arg, gop64, &info, &size,
+				       &current_fb_base);
+		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+			/*
+			 * Systems that use the UEFI Console Splitter may
+			 * provide multiple GOP devices, not all of which are
+			 * backed by real hardware. The workaround is to search
+			 * for a GOP implementing the ConOut protocol, and if
+			 * one isn't found, to just fall back to the first GOP.
+			 */
+			width = info->horizontal_resolution;
+			height = info->vertical_resolution;
+			pixel_format = info->pixel_format;
+			pixel_info = info->pixel_information;
+			pixels_per_scan_line = info->pixels_per_scan_line;
+			fb_base = current_fb_base;
+
+			/*
+			 * Once we've found a GOP supporting ConOut,
+			 * don't bother looking any further.
+			 */
+			first_gop = gop64;
+			if (conout_found)
+				break;
+		}
+	}
+
+	/* Did we find any GOPs? */
+	if (!first_gop)
+		goto out;
+
+	/* EFI framebuffer */
+	si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+	si->lfb_width = width;
+	si->lfb_height = height;
+	si->lfb_base = fb_base;
+
+	ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+	if (ext_lfb_base) {
+		si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+		si->ext_lfb_base = ext_lfb_base;
+	}
+
+	si->pages = 1;
+
+	setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+
+	si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+out:
+	return status;
+}
+
+/*
+ * See if we have Graphics Output Protocol
+ */
+efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+			   struct screen_info *si, efi_guid_t *proto,
+			   unsigned long size)
+{
+	efi_status_t status;
+	void **gop_handle = NULL;
+
+	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+				size, (void **)&gop_handle);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	status = efi_call_early(locate_handle,
+				EFI_LOCATE_BY_PROTOCOL,
+				proto, NULL, &size, gop_handle);
+	if (status != EFI_SUCCESS)
+		goto free_handle;
+
+	if (efi_is_64bit()) {
+		status = setup_gop64(sys_table_arg, si, proto, size,
+				     gop_handle);
+	} else {
+		status = setup_gop32(sys_table_arg, si, proto, size,
+				     gop_handle);
+	}
+
+free_handle:
+	efi_call_early(free_pool, gop_handle);
+	return status;
+}

diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
new file mode 100644
index 0000000..236004b
--- /dev/null
+++ b/drivers/firmware/efi/memattr.c

@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt)	"efi: memattr: " fmt
+
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+
+#include <asm/early_ioremap.h>
+
+static int __initdata tbl_size;
+
+/*
+ * Reserve the memory associated with the Memory Attributes configuration
+ * table, if it exists.
+ */
+int __init efi_memattr_init(void)
+{
+	efi_memory_attributes_table_t *tbl;
+
+	if (efi.mem_attr_table == EFI_INVALID_TABLE_ADDR)
+		return 0;
+
+	tbl = early_memremap(efi.mem_attr_table, sizeof(*tbl));
+	if (!tbl) {
+		pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n",
+		       efi.mem_attr_table);
+		return -ENOMEM;
+	}
+
+	if (tbl->version > 1) {
+		pr_warn("Unexpected EFI Memory Attributes table version %d\n",
+			tbl->version);
+		goto unmap;
+	}
+
+	tbl_size = sizeof(*tbl) + tbl->num_entries * tbl->desc_size;
+	memblock_reserve(efi.mem_attr_table, tbl_size);
+
+unmap:
+	early_memunmap(tbl, sizeof(*tbl));
+	return 0;
+}
+
+/*
+ * Returns a copy @out of the UEFI memory descriptor @in if it is covered
+ * entirely by a UEFI memory map entry with matching attributes. The virtual
+ * address of @out is set according to the matching entry that was found.
+ */
+static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out)
+{
+	u64 in_paddr = in->phys_addr;
+	u64 in_size = in->num_pages << EFI_PAGE_SHIFT;
+	efi_memory_desc_t *md;
+
+	*out = *in;
+
+	if (in->type != EFI_RUNTIME_SERVICES_CODE &&
+	    in->type != EFI_RUNTIME_SERVICES_DATA) {
+		pr_warn("Entry type should be RuntimeServiceCode/Data\n");
+		return false;
+	}
+
+	if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) {
+		pr_warn("Entry attributes invalid: RO and XP bits both cleared\n");
+		return false;
+	}
+
+	if (PAGE_SIZE > EFI_PAGE_SIZE &&
+	    (!PAGE_ALIGNED(in->phys_addr) ||
+	     !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) {
+		/*
+		 * Since arm64 may execute with page sizes of up to 64 KB, the
+		 * UEFI spec mandates that RuntimeServices memory regions must
+		 * be 64 KB aligned. We need to validate this here since we will
+		 * not be able to tighten permissions on such regions without
+		 * affecting adjacent regions.
+		 */
+		pr_warn("Entry address region misaligned\n");
+		return false;
+	}
+
+	for_each_efi_memory_desc(md) {
+		u64 md_paddr = md->phys_addr;
+		u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
+
+		if (!(md->attribute & EFI_MEMORY_RUNTIME))
+			continue;
+		if (md->virt_addr == 0) {
+			/* no virtual mapping has been installed by the stub */
+			break;
+		}
+
+		if (md_paddr > in_paddr || (in_paddr - md_paddr) >= md_size)
+			continue;
+
+		/*
+		 * This entry covers the start of @in, check whether
+		 * it covers the end as well.
+		 */
+		if (md_paddr + md_size < in_paddr + in_size) {
+			pr_warn("Entry covers multiple EFI memory map regions\n");
+			return false;
+		}
+
+		if (md->type != in->type) {
+			pr_warn("Entry type deviates from EFI memory map region type\n");
+			return false;
+		}
+
+		out->virt_addr = in_paddr + (md->virt_addr - md_paddr);
+
+		return true;
+	}
+
+	pr_warn("No matching entry found in the EFI memory map\n");
+	return false;
+}
+
+/*
+ * To be called after the EFI page tables have been populated. If a memory
+ * attributes table is available, its contents will be used to update the
+ * mappings with tightened permissions as described by the table.
+ * This requires the UEFI memory map to have already been populated with
+ * virtual addresses.
+ */
+int __init efi_memattr_apply_permissions(struct mm_struct *mm,
+					 efi_memattr_perm_setter fn)
+{
+	efi_memory_attributes_table_t *tbl;
+	int i, ret;
+
+	if (tbl_size <= sizeof(*tbl))
+		return 0;
+
+	/*
+	 * We need the EFI memory map to be setup so we can use it to
+	 * lookup the virtual addresses of all entries in the  of EFI
+	 * Memory Attributes table. If it isn't available, this
+	 * function should not be called.
+	 */
+	if (WARN_ON(!efi_enabled(EFI_MEMMAP)))
+		return 0;
+
+	tbl = memremap(efi.mem_attr_table, tbl_size, MEMREMAP_WB);
+	if (!tbl) {
+		pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n",
+		       efi.mem_attr_table);
+		return -ENOMEM;
+	}
+
+	if (efi_enabled(EFI_DBG))
+		pr_info("Processing EFI Memory Attributes table:\n");
+
+	for (i = ret = 0; ret == 0 && i < tbl->num_entries; i++) {
+		efi_memory_desc_t md;
+		unsigned long size;
+		bool valid;
+		char buf[64];
+
+		valid = entry_is_valid((void *)tbl->entry + i * tbl->desc_size,
+				       &md);
+		size = md.num_pages << EFI_PAGE_SHIFT;
+		if (efi_enabled(EFI_DBG) || !valid)
+			pr_info("%s 0x%012llx-0x%012llx %s\n",
+				valid ? "" : "!", md.phys_addr,
+				md.phys_addr + size - 1,
+				efi_md_typeattr_format(buf, sizeof(buf), &md));
+
+		if (valid)
+			ret = fn(mm, &md);
+	}
+	memunmap(tbl);
+	return ret;
+}

diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index 9c59d1c..62ead9b 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c

@@ -9,7 +9,8 @@
 
 void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
 {
-	int efi_mode;
+	const char *str[] = { "cold", "warm", "shutdown", "platform" };
+	int efi_mode, cap_reset_mode;
 
 	if (!efi_enabled(EFI_RUNTIME_SERVICES))
 		return;
@@ -30,6 +31,15 @@
 	if (efi_reboot_quirk_mode != -1)
 		efi_mode = efi_reboot_quirk_mode;
 
+	if (efi_capsule_pending(&cap_reset_mode)) {
+		if (efi_mode != cap_reset_mode)
+			printk(KERN_CRIT "efi: %s reset requested but pending "
+			       "capsule update requires %s reset... Performing "
+			       "%s reset.\n", str[efi_mode], str[cap_reset_mode],
+			       str[cap_reset_mode]);
+		efi_mode = cap_reset_mode;
+	}
+
 	efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL);
 }
 

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index de69530..23bef6b 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c

@@ -16,10 +16,70 @@
 
 #include <linux/bug.h>
 #include <linux/efi.h>
+#include <linux/irqflags.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/stringify.h>
 #include <asm/efi.h>
 
+static void efi_call_virt_check_flags(unsigned long flags, const char *call)
+{
+	unsigned long cur_flags, mismatch;
+
+	local_save_flags(cur_flags);
+
+	mismatch = flags ^ cur_flags;
+	if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK))
+		return;
+
+	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE);
+	pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n",
+			   flags, cur_flags, call);
+	local_irq_restore(flags);
+}
+
+/*
+ * Arch code can implement the following three template macros, avoiding
+ * reptition for the void/non-void return cases of {__,}efi_call_virt:
+ *
+ *  * arch_efi_call_virt_setup
+ *
+ *    Sets up the environment for the call (e.g. switching page tables,
+ *    allowing kernel-mode use of floating point, if required).
+ *
+ *  * arch_efi_call_virt
+ *
+ *    Performs the call. The last expression in the macro must be the call
+ *    itself, allowing the logic to be shared by the void and non-void
+ *    cases.
+ *
+ *  * arch_efi_call_virt_teardown
+ *
+ *    Restores the usual kernel environment once the call has returned.
+ */
+
+#define efi_call_virt(f, args...)					\
+({									\
+	efi_status_t __s;						\
+	unsigned long flags;						\
+	arch_efi_call_virt_setup();					\
+	local_save_flags(flags);					\
+	__s = arch_efi_call_virt(f, args);				\
+	efi_call_virt_check_flags(flags, __stringify(f));		\
+	arch_efi_call_virt_teardown();					\
+	__s;								\
+})
+
+#define __efi_call_virt(f, args...)					\
+({									\
+	unsigned long flags;						\
+	arch_efi_call_virt_setup();					\
+	local_save_flags(flags);					\
+	arch_efi_call_virt(f, args);					\
+	efi_call_virt_check_flags(flags, __stringify(f));		\
+	arch_efi_call_virt_teardown();					\
+})
+
 /*
  * According to section 7.1 of the UEFI spec, Runtime Services are not fully
  * reentrant, and there are particular combinations of calls that need to be

diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 34b7419..d3b7513 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c

@@ -329,39 +329,6 @@
 	return fops->query_variable_store(attributes, size, true);
 }
 
-static int efi_status_to_err(efi_status_t status)
-{
-	int err;
-
-	switch (status) {
-	case EFI_SUCCESS:
-		err = 0;
-		break;
-	case EFI_INVALID_PARAMETER:
-		err = -EINVAL;
-		break;
-	case EFI_OUT_OF_RESOURCES:
-		err = -ENOSPC;
-		break;
-	case EFI_DEVICE_ERROR:
-		err = -EIO;
-		break;
-	case EFI_WRITE_PROTECTED:
-		err = -EROFS;
-		break;
-	case EFI_SECURITY_VIOLATION:
-		err = -EACCES;
-		break;
-	case EFI_NOT_FOUND:
-		err = -ENOENT;
-		break;
-	default:
-		err = -EINVAL;
-	}
-
-	return err;
-}
-
 static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor,
 				struct list_head *head)
 {
@@ -452,8 +419,7 @@
  * Returns 0 on success, or a kernel error code on failure.
  */
 int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
-		void *data, bool atomic, bool duplicates,
-		struct list_head *head)
+		void *data, bool duplicates, struct list_head *head)
 {
 	const struct efivar_operations *ops = __efivars->ops;
 	unsigned long variable_name_size = 1024;
@@ -483,7 +449,7 @@
 						&vendor_guid);
 		switch (status) {
 		case EFI_SUCCESS:
-			if (!atomic)
+			if (duplicates)
 				spin_unlock_irq(&__efivars->lock);
 
 			variable_name_size = var_name_strnsize(variable_name,
@@ -498,21 +464,19 @@
 			 * and may end up looping here forever.
 			 */
 			if (duplicates &&
-			    variable_is_present(variable_name, &vendor_guid, head)) {
+			    variable_is_present(variable_name, &vendor_guid,
+						head)) {
 				dup_variable_bug(variable_name, &vendor_guid,
 						 variable_name_size);
-				if (!atomic)
-					spin_lock_irq(&__efivars->lock);
-
 				status = EFI_NOT_FOUND;
-				break;
+			} else {
+				err = func(variable_name, vendor_guid,
+					   variable_name_size, data);
+				if (err)
+					status = EFI_NOT_FOUND;
 			}
 
-			err = func(variable_name, vendor_guid, variable_name_size, data);
-			if (err)
-				status = EFI_NOT_FOUND;
-
-			if (!atomic)
+			if (duplicates)
 				spin_lock_irq(&__efivars->lock);
 
 			break;

diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index b5d0580..fa4ea22 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c

@@ -355,7 +355,7 @@
 
 /* ARM specific CPU idle operations */
 #ifdef CONFIG_ARM
-static struct cpuidle_ops psci_cpuidle_ops __initdata = {
+static const struct cpuidle_ops psci_cpuidle_ops __initconst = {
 	.suspend = psci_cpu_suspend_enter,
 	.init = psci_dt_cpu_init_idle,
 };

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 6743ff7..059f7c3 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c

@@ -72,7 +72,7 @@
 {
 
 #if defined(CONFIG_X86)
-	if (cpu_has_clflush) {
+	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		drm_cache_flush_clflush(pages, num_pages);
 		return;
 	}
@@ -105,7 +105,7 @@
 drm_clflush_sg(struct sg_table *st)
 {
 #if defined(CONFIG_X86)
-	if (cpu_has_clflush) {
+	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		struct sg_page_iter sg_iter;
 
 		mb();
@@ -129,7 +129,7 @@
 drm_clflush_virt_range(void *addr, unsigned long length)
 {
 #if defined(CONFIG_X86)
-	if (cpu_has_clflush) {
+	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		const int size = boot_cpu_data.x86_clflush_size;
 		void *end = addr + length;
 		addr = (void *)(((unsigned long)addr) & -size);

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dabc089..f2cb9a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -1732,7 +1732,7 @@
 	if (args->flags & ~(I915_MMAP_WC))
 		return -EINVAL;
 
-	if (args->flags & I915_MMAP_WC && !cpu_has_pat)
+	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
 		return -ENODEV;
 
 	obj = drm_gem_object_lookup(dev, file, args->handle);

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1328bc5..b845f46 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -488,7 +488,7 @@
 		ret = relocate_entry_cpu(obj, reloc, target_offset);
 	else if (obj->map_and_fenceable)
 		ret = relocate_entry_gtt(obj, reloc, target_offset);
-	else if (cpu_has_clflush)
+	else if (static_cpu_has(X86_FEATURE_CLFLUSH))
 		ret = relocate_entry_clflush(obj, reloc, target_offset);
 	else {
 		WARN_ONCE(1, "Impossible case in relocation handling\n");

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 5c2d13a..ff94007 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig

@@ -288,7 +288,7 @@
 
 config SENSORS_FAM15H_POWER
 	tristate "AMD Family 15h processor power"
-	depends on X86 && PCI
+	depends on X86 && PCI && CPU_SUP_AMD
 	help
 	  If you say yes here you get support for processor power
 	  information of your AMD family 15h CPU.
@@ -621,7 +621,8 @@
 	  If you say yes here you get support for ITE IT8705F, IT8712F, IT8716F,
 	  IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F, IT8758E,
 	  IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E,
-	  IT8603E, IT8620E, and IT8623E sensor chips, and the SiS950 clone.
+	  IT8603E, IT8620E, IT8623E, and IT8628E sensor chips, and the SiS950
+	  clone.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called it87.
@@ -821,6 +822,16 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called max197.
 
+config SENSORS_MAX31722
+tristate "MAX31722 temperature sensor"
+	depends on SPI
+	help
+	  Support for the Maxim Integrated MAX31722/MAX31723 digital
+	  thermometers/thermostats operating over an SPI interface.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called max31722.
+
 config SENSORS_MAX6639
 	tristate "Maxim MAX6639 sensor chip"
 	depends on I2C

diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 58cc3ac..2ef5b7c 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile

@@ -112,6 +112,7 @@
 obj-$(CONFIG_SENSORS_MAX1619)	+= max1619.o
 obj-$(CONFIG_SENSORS_MAX1668)	+= max1668.o
 obj-$(CONFIG_SENSORS_MAX197)	+= max197.o
+obj-$(CONFIG_SENSORS_MAX31722)	+= max31722.o
 obj-$(CONFIG_SENSORS_MAX6639)	+= max6639.o
 obj-$(CONFIG_SENSORS_MAX6642)	+= max6642.o
 obj-$(CONFIG_SENSORS_MAX6650)	+= max6650.o

diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index 6c99ee7..ee396ff 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c

@@ -120,6 +120,7 @@
 	unsigned int vref_mv = ADS7828_INT_VREF_MV;
 	bool diff_input = false;
 	bool ext_vref = false;
+	unsigned int regval;
 
 	data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL);
 	if (!data)
@@ -154,6 +155,15 @@
 	if (!diff_input)
 		data->cmd_byte |= ADS7828_CMD_SD_SE;
 
+	/*
+	 * Datasheet specifies internal reference voltage is disabled by
+	 * default. The internal reference voltage needs to be enabled and
+	 * voltage needs to settle before getting valid ADC data. So perform a
+	 * dummy read to enable the internal reference voltage.
+	 */
+	if (!ext_vref)
+		regmap_read(data->regmap, data->cmd_byte, &regval);
+
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							   data,
 							   ads7828_groups);

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 4f695d8..eb97a92 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c

@@ -1,7 +1,7 @@
 /*
  * fam15h_power.c - AMD Family 15h processor power monitoring
  *
- * Copyright (c) 2011 Advanced Micro Devices, Inc.
+ * Copyright (c) 2011-2016 Advanced Micro Devices, Inc.
  * Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
  *
  *
@@ -25,6 +25,10 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/bitops.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/time.h>
+#include <linux/sched.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 
@@ -44,8 +48,14 @@
 
 #define FAM15H_MIN_NUM_ATTRS		2
 #define FAM15H_NUM_GROUPS		2
+#define MAX_CUS				8
 
+/* set maximum interval as 1 second */
+#define MAX_INTERVAL			1000
+
+#define MSR_F15H_CU_PWR_ACCUMULATOR	0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR	0xc001007b
+#define MSR_F15H_PTSC			0xc0010280
 
 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
 
@@ -59,8 +69,20 @@
 	struct attribute_group group;
 	/* maximum accumulated power of a compute unit */
 	u64 max_cu_acc_power;
+	/* accumulated power of the compute units */
+	u64 cu_acc_power[MAX_CUS];
+	/* performance timestamp counter */
+	u64 cpu_sw_pwr_ptsc[MAX_CUS];
+	/* online/offline status of current compute unit */
+	int cu_on[MAX_CUS];
+	unsigned long power_period;
 };
 
+static bool is_carrizo_or_later(void)
+{
+	return boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60;
+}
+
 static ssize_t show_power(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
@@ -77,7 +99,7 @@
 	 * On Carrizo and later platforms, TdpRunAvgAccCap bit field
 	 * is extended to 4:31 from 4:25.
 	 */
-	if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) {
+	if (is_carrizo_or_later()) {
 		running_avg_capture = val >> 4;
 		running_avg_capture = sign_extend32(running_avg_capture, 27);
 	} else {
@@ -94,7 +116,7 @@
 	 * On Carrizo and later platforms, ApmTdpLimit bit field
 	 * is extended to 16:31 from 16:28.
 	 */
-	if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+	if (is_carrizo_or_later())
 		tdp_limit = val >> 16;
 	else
 		tdp_limit = (val >> 16) & 0x1fff;
@@ -125,6 +147,167 @@
 }
 static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL);
 
+static void do_read_registers_on_cu(void *_data)
+{
+	struct fam15h_power_data *data = _data;
+	int cpu, cu;
+
+	cpu = smp_processor_id();
+
+	/*
+	 * With the new x86 topology modelling, cpu core id actually
+	 * is compute unit id.
+	 */
+	cu = cpu_data(cpu).cpu_core_id;
+
+	rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, &data->cu_acc_power[cu]);
+	rdmsrl_safe(MSR_F15H_PTSC, &data->cpu_sw_pwr_ptsc[cu]);
+
+	data->cu_on[cu] = 1;
+}
+
+/*
+ * This function is only able to be called when CPUID
+ * Fn8000_0007:EDX[12] is set.
+ */
+static int read_registers(struct fam15h_power_data *data)
+{
+	int this_cpu, ret, cpu;
+	int core, this_core;
+	cpumask_var_t mask;
+
+	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+	if (!ret)
+		return -ENOMEM;
+
+	memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
+
+	get_online_cpus();
+	this_cpu = smp_processor_id();
+
+	/*
+	 * Choose the first online core of each compute unit, and then
+	 * read their MSR value of power and ptsc in a single IPI,
+	 * because the MSR value of CPU core represent the compute
+	 * unit's.
+	 */
+	core = -1;
+
+	for_each_online_cpu(cpu) {
+		this_core = topology_core_id(cpu);
+
+		if (this_core == core)
+			continue;
+
+		core = this_core;
+
+		/* get any CPU on this compute unit */
+		cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask);
+	}
+
+	if (cpumask_test_cpu(this_cpu, mask))
+		do_read_registers_on_cu(data);
+
+	smp_call_function_many(mask, do_read_registers_on_cu, data, true);
+	put_online_cpus();
+
+	free_cpumask_var(mask);
+
+	return 0;
+}
+
+static ssize_t acc_show_power(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct fam15h_power_data *data = dev_get_drvdata(dev);
+	u64 prev_cu_acc_power[MAX_CUS], prev_ptsc[MAX_CUS],
+	    jdelta[MAX_CUS];
+	u64 tdelta, avg_acc;
+	int cu, cu_num, ret;
+	signed long leftover;
+
+	/*
+	 * With the new x86 topology modelling, x86_max_cores is the
+	 * compute unit number.
+	 */
+	cu_num = boot_cpu_data.x86_max_cores;
+
+	ret = read_registers(data);
+	if (ret)
+		return 0;
+
+	for (cu = 0; cu < cu_num; cu++) {
+		prev_cu_acc_power[cu] = data->cu_acc_power[cu];
+		prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
+	}
+
+	leftover = schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
+	if (leftover)
+		return 0;
+
+	ret = read_registers(data);
+	if (ret)
+		return 0;
+
+	for (cu = 0, avg_acc = 0; cu < cu_num; cu++) {
+		/* check if current compute unit is online */
+		if (data->cu_on[cu] == 0)
+			continue;
+
+		if (data->cu_acc_power[cu] < prev_cu_acc_power[cu]) {
+			jdelta[cu] = data->max_cu_acc_power + data->cu_acc_power[cu];
+			jdelta[cu] -= prev_cu_acc_power[cu];
+		} else {
+			jdelta[cu] = data->cu_acc_power[cu] - prev_cu_acc_power[cu];
+		}
+		tdelta = data->cpu_sw_pwr_ptsc[cu] - prev_ptsc[cu];
+		jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+		do_div(jdelta[cu], tdelta);
+
+		/* the unit is microWatt */
+		avg_acc += jdelta[cu];
+	}
+
+	return sprintf(buf, "%llu\n", (unsigned long long)avg_acc);
+}
+static DEVICE_ATTR(power1_average, S_IRUGO, acc_show_power, NULL);
+
+static ssize_t acc_show_power_period(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%lu\n", data->power_period);
+}
+
+static ssize_t acc_set_power_period(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct fam15h_power_data *data = dev_get_drvdata(dev);
+	unsigned long temp;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &temp);
+	if (ret)
+		return ret;
+
+	if (temp > MAX_INTERVAL)
+		return -EINVAL;
+
+	/* the interval value should be greater than 0 */
+	if (temp <= 0)
+		return -EINVAL;
+
+	data->power_period = temp;
+
+	return count;
+}
+static DEVICE_ATTR(power1_average_interval, S_IRUGO | S_IWUSR,
+		   acc_show_power_period, acc_set_power_period);
+
 static int fam15h_power_init_attrs(struct pci_dev *pdev,
 				   struct fam15h_power_data *data)
 {
@@ -137,6 +320,10 @@
 	     (c->x86_model >= 0x60 && c->x86_model <= 0x7f)))
 		n += 1;
 
+	/* check if processor supports accumulated power */
+	if (boot_cpu_has(X86_FEATURE_ACC_POWER))
+		n += 2;
+
 	fam15h_power_attrs = devm_kcalloc(&pdev->dev, n,
 					  sizeof(*fam15h_power_attrs),
 					  GFP_KERNEL);
@@ -151,6 +338,11 @@
 	     (c->x86_model >= 0x60 && c->x86_model <= 0x7f)))
 		fam15h_power_attrs[n++] = &dev_attr_power1_input.attr;
 
+	if (boot_cpu_has(X86_FEATURE_ACC_POWER)) {
+		fam15h_power_attrs[n++] = &dev_attr_power1_average.attr;
+		fam15h_power_attrs[n++] = &dev_attr_power1_average_interval.attr;
+	}
+
 	data->group.attrs = fam15h_power_attrs;
 
 	return 0;
@@ -216,7 +408,7 @@
 static int fam15h_power_init_data(struct pci_dev *f4,
 				  struct fam15h_power_data *data)
 {
-	u32 val, eax, ebx, ecx, edx;
+	u32 val;
 	u64 tmp;
 	int ret;
 
@@ -243,10 +435,9 @@
 	if (ret)
 		return ret;
 
-	cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
 
 	/* CPUID Fn8000_0007:EDX[12] indicates to support accumulated power */
-	if (!(edx & BIT(12)))
+	if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
 		return 0;
 
 	/*
@@ -254,7 +445,7 @@
 	 * sample period to the PTSC counter period by executing CPUID
 	 * Fn8000_0007:ECX
 	 */
-	data->cpu_pwr_sample_ratio = ecx;
+	data->cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
 
 	if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) {
 		pr_err("Failed to read max compute unit power accumulator MSR\n");
@@ -263,7 +454,15 @@
 
 	data->max_cu_acc_power = tmp;
 
-	return 0;
+	/*
+	 * Milliseconds are a reasonable interval for the measurement.
+	 * But it shouldn't set too long here, because several seconds
+	 * would cause the read function to hang. So set default
+	 * interval as 10 ms.
+	 */
+	data->power_period = 10;
+
+	return read_registers(data);
 }
 
 static int fam15h_power_probe(struct pci_dev *pdev,

diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 1896e26..730d840 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c

@@ -13,6 +13,7 @@
  *  Supports: IT8603E  Super I/O chip w/LPC interface
  *            IT8620E  Super I/O chip w/LPC interface
  *            IT8623E  Super I/O chip w/LPC interface
+ *            IT8628E  Super I/O chip w/LPC interface
  *            IT8705F  Super I/O chip w/LPC interface
  *            IT8712F  Super I/O chip w/LPC interface
  *            IT8716F  Super I/O chip w/LPC interface
@@ -44,14 +45,11 @@
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -72,17 +70,18 @@
 
 enum chips { it87, it8712, it8716, it8718, it8720, it8721, it8728, it8732,
 	     it8771, it8772, it8781, it8782, it8783, it8786, it8790, it8603,
-	     it8620 };
+	     it8620, it8628 };
 
 static unsigned short force_id;
 module_param(force_id, ushort, 0);
 MODULE_PARM_DESC(force_id, "Override the detected device ID");
 
-static struct platform_device *pdev;
+static struct platform_device *it87_pdev[2];
 
-#define	REG	0x2e	/* The register to read/write */
+#define	REG_2E	0x2e	/* The register to read/write */
+#define	REG_4E	0x4e	/* Secondary register to read/write */
+
 #define	DEV	0x07	/* Register: Logical device select */
-#define	VAL	0x2f	/* The value to read/write */
 #define PME	0x04	/* The device with the fan registers in it */
 
 /* The device with the IT8718F/IT8720F VID value in it */
@@ -91,54 +90,55 @@
 #define	DEVID	0x20	/* Register: Device ID */
 #define	DEVREV	0x22	/* Register: Device Revision */
 
-static inline int superio_inb(int reg)
+static inline int superio_inb(int ioreg, int reg)
 {
-	outb(reg, REG);
-	return inb(VAL);
+	outb(reg, ioreg);
+	return inb(ioreg + 1);
 }
 
-static inline void superio_outb(int reg, int val)
+static inline void superio_outb(int ioreg, int reg, int val)
 {
-	outb(reg, REG);
-	outb(val, VAL);
+	outb(reg, ioreg);
+	outb(val, ioreg + 1);
 }
 
-static int superio_inw(int reg)
+static int superio_inw(int ioreg, int reg)
 {
 	int val;
-	outb(reg++, REG);
-	val = inb(VAL) << 8;
-	outb(reg, REG);
-	val |= inb(VAL);
+
+	outb(reg++, ioreg);
+	val = inb(ioreg + 1) << 8;
+	outb(reg, ioreg);
+	val |= inb(ioreg + 1);
 	return val;
 }
 
-static inline void superio_select(int ldn)
+static inline void superio_select(int ioreg, int ldn)
 {
-	outb(DEV, REG);
-	outb(ldn, VAL);
+	outb(DEV, ioreg);
+	outb(ldn, ioreg + 1);
 }
 
-static inline int superio_enter(void)
+static inline int superio_enter(int ioreg)
 {
 	/*
-	 * Try to reserve REG and REG + 1 for exclusive access.
+	 * Try to reserve ioreg and ioreg + 1 for exclusive access.
 	 */
-	if (!request_muxed_region(REG, 2, DRVNAME))
+	if (!request_muxed_region(ioreg, 2, DRVNAME))
 		return -EBUSY;
 
-	outb(0x87, REG);
-	outb(0x01, REG);
-	outb(0x55, REG);
-	outb(0x55, REG);
+	outb(0x87, ioreg);
+	outb(0x01, ioreg);
+	outb(0x55, ioreg);
+	outb(ioreg == REG_4E ? 0xaa : 0x55, ioreg);
 	return 0;
 }
 
-static inline void superio_exit(void)
+static inline void superio_exit(int ioreg)
 {
-	outb(0x02, REG);
-	outb(0x02, VAL);
-	release_region(REG, 2);
+	outb(0x02, ioreg);
+	outb(0x02, ioreg + 1);
+	release_region(ioreg, 2);
 }
 
 /* Logical device 4 registers */
@@ -161,6 +161,7 @@
 #define IT8603E_DEVID 0x8603
 #define IT8620E_DEVID 0x8620
 #define IT8623E_DEVID 0x8623
+#define IT8628E_DEVID 0x8628
 #define IT87_ACT_REG  0x30
 #define IT87_BASE_REG 0x60
 
@@ -168,6 +169,7 @@
 #define IT87_SIO_GPIO1_REG	0x25
 #define IT87_SIO_GPIO2_REG	0x26
 #define IT87_SIO_GPIO3_REG	0x27
+#define IT87_SIO_GPIO4_REG	0x28
 #define IT87_SIO_GPIO5_REG	0x29
 #define IT87_SIO_PINX1_REG	0x2a	/* Pin selection */
 #define IT87_SIO_PINX2_REG	0x2c	/* Pin selection */
@@ -217,7 +219,12 @@
 #define IT87_REG_FAN_DIV       0x0b
 #define IT87_REG_FAN_16BIT     0x0c
 
-/* Monitors: 9 voltage (0 to 7, battery), 3 temp (1 to 3), 3 fan (1 to 3) */
+/*
+ * Monitors:
+ * - up to 13 voltage (0 to 7, battery, avcc, 10 to 12)
+ * - up to 6 temp (1 to 6)
+ * - up to 6 fan (1 to 6)
+ */
 
 static const u8 IT87_REG_FAN[]         = { 0x0d, 0x0e, 0x0f, 0x80, 0x82, 0x4c };
 static const u8 IT87_REG_FAN_MIN[]     = { 0x10, 0x11, 0x12, 0x84, 0x86, 0x4e };
@@ -227,10 +234,12 @@
 
 #define IT87_REG_FAN_MAIN_CTRL 0x13
 #define IT87_REG_FAN_CTL       0x14
-#define IT87_REG_PWM(nr)       (0x15 + (nr))
-#define IT87_REG_PWM_DUTY(nr)  (0x63 + (nr) * 8)
+static const u8 IT87_REG_PWM[]         = { 0x15, 0x16, 0x17, 0x7f, 0xa7, 0xaf };
+static const u8 IT87_REG_PWM_DUTY[]    = { 0x63, 0x6b, 0x73, 0x7b, 0xa3, 0xab };
 
-#define IT87_REG_VIN(nr)       (0x20 + (nr))
+static const u8 IT87_REG_VIN[]	= { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
+				    0x27, 0x28, 0x2f, 0x2c, 0x2d, 0x2e };
+
 #define IT87_REG_TEMP(nr)      (0x29 + (nr))
 
 #define IT87_REG_VIN_MAX(nr)   (0x30 + (nr) * 2)
@@ -245,30 +254,48 @@
 
 #define IT87_REG_CHIPID        0x58
 
-#define IT87_REG_AUTO_TEMP(nr, i) (0x60 + (nr) * 8 + (i))
-#define IT87_REG_AUTO_PWM(nr, i)  (0x65 + (nr) * 8 + (i))
+static const u8 IT87_REG_AUTO_BASE[] = { 0x60, 0x68, 0x70, 0x78, 0xa0, 0xa8 };
+
+#define IT87_REG_AUTO_TEMP(nr, i) (IT87_REG_AUTO_BASE[nr] + (i))
+#define IT87_REG_AUTO_PWM(nr, i)  (IT87_REG_AUTO_BASE[nr] + 5 + (i))
+
+#define IT87_REG_TEMP456_ENABLE	0x77
+
+#define NUM_VIN			ARRAY_SIZE(IT87_REG_VIN)
+#define NUM_VIN_LIMIT		8
+#define NUM_TEMP		6
+#define NUM_TEMP_OFFSET		ARRAY_SIZE(IT87_REG_TEMP_OFFSET)
+#define NUM_TEMP_LIMIT		3
+#define NUM_FAN			ARRAY_SIZE(IT87_REG_FAN)
+#define NUM_FAN_DIV		3
+#define NUM_PWM			ARRAY_SIZE(IT87_REG_PWM)
+#define NUM_AUTO_PWM		ARRAY_SIZE(IT87_REG_PWM)
 
 struct it87_devices {
 	const char *name;
 	const char * const suffix;
-	u16 features;
+	u32 features;
 	u8 peci_mask;
 	u8 old_peci_mask;
 };
 
-#define FEAT_12MV_ADC		(1 << 0)
-#define FEAT_NEWER_AUTOPWM	(1 << 1)
-#define FEAT_OLD_AUTOPWM	(1 << 2)
-#define FEAT_16BIT_FANS		(1 << 3)
-#define FEAT_TEMP_OFFSET	(1 << 4)
-#define FEAT_TEMP_PECI		(1 << 5)
-#define FEAT_TEMP_OLD_PECI	(1 << 6)
-#define FEAT_FAN16_CONFIG	(1 << 7)	/* Need to enable 16-bit fans */
-#define FEAT_FIVE_FANS		(1 << 8)	/* Supports five fans */
-#define FEAT_VID		(1 << 9)	/* Set if chip supports VID */
-#define FEAT_IN7_INTERNAL	(1 << 10)	/* Set if in7 is internal */
-#define FEAT_SIX_FANS		(1 << 11)	/* Supports six fans */
-#define FEAT_10_9MV_ADC		(1 << 12)
+#define FEAT_12MV_ADC		BIT(0)
+#define FEAT_NEWER_AUTOPWM	BIT(1)
+#define FEAT_OLD_AUTOPWM	BIT(2)
+#define FEAT_16BIT_FANS		BIT(3)
+#define FEAT_TEMP_OFFSET	BIT(4)
+#define FEAT_TEMP_PECI		BIT(5)
+#define FEAT_TEMP_OLD_PECI	BIT(6)
+#define FEAT_FAN16_CONFIG	BIT(7)	/* Need to enable 16-bit fans */
+#define FEAT_FIVE_FANS		BIT(8)	/* Supports five fans */
+#define FEAT_VID		BIT(9)	/* Set if chip supports VID */
+#define FEAT_IN7_INTERNAL	BIT(10)	/* Set if in7 is internal */
+#define FEAT_SIX_FANS		BIT(11)	/* Supports six fans */
+#define FEAT_10_9MV_ADC		BIT(12)
+#define FEAT_AVCC3		BIT(13)	/* Chip supports in9/AVCC3 */
+#define FEAT_SIX_PWM		BIT(14)	/* Chip supports 6 pwm chn */
+#define FEAT_PWM_FREQ2		BIT(15)	/* Separate pwm freq 2 */
+#define FEAT_SIX_TEMP		BIT(16)	/* Up to 6 temp sensors */
 
 static const struct it87_devices it87_devices[] = {
 	[it87] = {
@@ -286,20 +313,22 @@
 		.name = "it8716",
 		.suffix = "F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
-		  | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
+		  | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_PWM_FREQ2,
 	},
 	[it8718] = {
 		.name = "it8718",
 		.suffix = "F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
-		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS
+		  | FEAT_PWM_FREQ2,
 		.old_peci_mask = 0x4,
 	},
 	[it8720] = {
 		.name = "it8720",
 		.suffix = "F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
-		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS
+		  | FEAT_PWM_FREQ2,
 		.old_peci_mask = 0x4,
 	},
 	[it8721] = {
@@ -307,7 +336,8 @@
 		.suffix = "F",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
 		  | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI
-		  | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_IN7_INTERNAL,
+		  | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_IN7_INTERNAL
+		  | FEAT_PWM_FREQ2,
 		.peci_mask = 0x05,
 		.old_peci_mask = 0x02,	/* Actually reports PCH */
 	},
@@ -316,7 +346,7 @@
 		.suffix = "F",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
 		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_FIVE_FANS
-		  | FEAT_IN7_INTERNAL,
+		  | FEAT_IN7_INTERNAL | FEAT_PWM_FREQ2,
 		.peci_mask = 0x07,
 	},
 	[it8732] = {
@@ -332,7 +362,8 @@
 		.name = "it8771",
 		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+		  | FEAT_PWM_FREQ2,
 				/* PECI: guesswork */
 				/* 12mV ADC (OHM) */
 				/* 16 bit fans (OHM) */
@@ -343,7 +374,8 @@
 		.name = "it8772",
 		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+		  | FEAT_PWM_FREQ2,
 				/* PECI (coreboot) */
 				/* 12mV ADC (HWSensors4, OHM) */
 				/* 16 bit fans (HWSensors4, OHM) */
@@ -354,42 +386,45 @@
 		.name = "it8781",
 		.suffix = "F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_PWM_FREQ2,
 		.old_peci_mask = 0x4,
 	},
 	[it8782] = {
 		.name = "it8782",
 		.suffix = "F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_PWM_FREQ2,
 		.old_peci_mask = 0x4,
 	},
 	[it8783] = {
 		.name = "it8783",
 		.suffix = "E/F",
 		.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+		  | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_PWM_FREQ2,
 		.old_peci_mask = 0x4,
 	},
 	[it8786] = {
 		.name = "it8786",
 		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+		  | FEAT_PWM_FREQ2,
 		.peci_mask = 0x07,
 	},
 	[it8790] = {
 		.name = "it8790",
 		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+		  | FEAT_PWM_FREQ2,
 		.peci_mask = 0x07,
 	},
 	[it8603] = {
 		.name = "it8603",
 		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+		  | FEAT_AVCC3 | FEAT_PWM_FREQ2,
 		.peci_mask = 0x07,
 	},
 	[it8620] = {
@@ -397,7 +432,17 @@
 		.suffix = "E",
 		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
 		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_SIX_FANS
-		  | FEAT_IN7_INTERNAL,
+		  | FEAT_IN7_INTERNAL | FEAT_SIX_PWM | FEAT_PWM_FREQ2
+		  | FEAT_SIX_TEMP,
+		.peci_mask = 0x07,
+	},
+	[it8628] = {
+		.name = "it8628",
+		.suffix = "E",
+		.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+		  | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_SIX_FANS
+		  | FEAT_IN7_INTERNAL | FEAT_SIX_PWM | FEAT_PWM_FREQ2
+		  | FEAT_SIX_TEMP,
 		.peci_mask = 0x07,
 	},
 };
@@ -409,16 +454,20 @@
 #define has_old_autopwm(data)	((data)->features & FEAT_OLD_AUTOPWM)
 #define has_temp_offset(data)	((data)->features & FEAT_TEMP_OFFSET)
 #define has_temp_peci(data, nr)	(((data)->features & FEAT_TEMP_PECI) && \
-				 ((data)->peci_mask & (1 << nr)))
+				 ((data)->peci_mask & BIT(nr)))
 #define has_temp_old_peci(data, nr) \
 				(((data)->features & FEAT_TEMP_OLD_PECI) && \
-				 ((data)->old_peci_mask & (1 << nr)))
+				 ((data)->old_peci_mask & BIT(nr)))
 #define has_fan16_config(data)	((data)->features & FEAT_FAN16_CONFIG)
 #define has_five_fans(data)	((data)->features & (FEAT_FIVE_FANS | \
 						     FEAT_SIX_FANS))
 #define has_vid(data)		((data)->features & FEAT_VID)
 #define has_in7_internal(data)	((data)->features & FEAT_IN7_INTERNAL)
 #define has_six_fans(data)	((data)->features & FEAT_SIX_FANS)
+#define has_avcc3(data)		((data)->features & FEAT_AVCC3)
+#define has_six_pwm(data)	((data)->features & FEAT_SIX_PWM)
+#define has_pwm_freq2(data)	((data)->features & FEAT_PWM_FREQ2)
+#define has_six_temp(data)	((data)->features & FEAT_SIX_TEMP)
 
 struct it87_sio_data {
 	enum chips type;
@@ -440,7 +489,7 @@
  * The structure is dynamically allocated.
  */
 struct it87_data {
-	struct device *hwmon_dev;
+	const struct attribute_group *groups[7];
 	enum chips type;
 	u16 features;
 	u8 peci_mask;
@@ -453,17 +502,21 @@
 	unsigned long last_updated;	/* In jiffies */
 
 	u16 in_scaled;		/* Internal voltage sensors are scaled */
-	u8 in[10][3];		/* [nr][0]=in, [1]=min, [2]=max */
+	u16 in_internal;	/* Bitfield, internal sensors (for labels) */
+	u16 has_in;		/* Bitfield, voltage sensors enabled */
+	u8 in[NUM_VIN][3];		/* [nr][0]=in, [1]=min, [2]=max */
 	u8 has_fan;		/* Bitfield, fans enabled */
-	u16 fan[6][2];		/* Register values, [nr][0]=fan, [1]=min */
+	u16 fan[NUM_FAN][2];	/* Register values, [nr][0]=fan, [1]=min */
 	u8 has_temp;		/* Bitfield, temp sensors enabled */
-	s8 temp[3][4];		/* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
+	s8 temp[NUM_TEMP][4];	/* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
 	u8 sensor;		/* Register value (IT87_REG_TEMP_ENABLE) */
 	u8 extra;		/* Register value (IT87_REG_TEMP_EXTRA) */
-	u8 fan_div[3];		/* Register encoding, shifted right */
+	u8 fan_div[NUM_FAN_DIV];/* Register encoding, shifted right */
+	bool has_vid;		/* True if VID supported */
 	u8 vid;			/* Register encoding, combined */
 	u8 vrm;
 	u32 alarms;		/* Register encoding, combined */
+	bool has_beep;		/* true if beep supported */
 	u8 beeps;		/* Register encoding */
 	u8 fan_main_ctrl;	/* Register value */
 	u8 fan_ctl;		/* Register value */
@@ -478,13 +531,14 @@
 	 * is no longer needed, but it is still done to keep the driver
 	 * simple.
 	 */
-	u8 pwm_ctrl[3];		/* Register value */
-	u8 pwm_duty[3];		/* Manual PWM value set by user */
-	u8 pwm_temp_map[3];	/* PWM to temp. chan. mapping (bits 1-0) */
+	u8 has_pwm;		/* Bitfield, pwm control enabled */
+	u8 pwm_ctrl[NUM_PWM];	/* Register value */
+	u8 pwm_duty[NUM_PWM];	/* Manual PWM value set by user */
+	u8 pwm_temp_map[NUM_PWM];/* PWM to temp. chan. mapping (bits 1-0) */
 
 	/* Automatic fan speed control registers */
-	u8 auto_pwm[3][4];	/* [nr][3] is hard-coded */
-	s8 auto_temp[3][5];	/* [nr][0] is point1_temp_hyst */
+	u8 auto_pwm[NUM_AUTO_PWM][4];	/* [nr][3] is hard-coded */
+	s8 auto_temp[NUM_AUTO_PWM][5];	/* [nr][0] is point1_temp_hyst */
 };
 
 static int adc_lsb(const struct it87_data *data, int nr)
@@ -497,7 +551,7 @@
 		lsb = 109;
 	else
 		lsb = 160;
-	if (data->in_scaled & (1 << nr))
+	if (data->in_scaled & BIT(nr))
 		lsb <<= 1;
 	return lsb;
 }
@@ -554,15 +608,16 @@
 		return (reg & 0x7f) << 1;
 }
 
-
 static int DIV_TO_REG(int val)
 {
 	int answer = 0;
+
 	while (answer < 7 && (val >>= 1))
 		answer++;
 	return answer;
 }
-#define DIV_FROM_REG(val) (1 << (val))
+
+#define DIV_FROM_REG(val) BIT(val)
 
 /*
  * PWM base frequencies. The frequency has to be divided by either 128 or 256,
@@ -585,32 +640,204 @@
 	750000,
 };
 
-static int it87_probe(struct platform_device *pdev);
-static int it87_remove(struct platform_device *pdev);
+/*
+ * Must be called with data->update_lock held, except during initialization.
+ * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
+ * would slow down the IT87 access and should not be necessary.
+ */
+static int it87_read_value(struct it87_data *data, u8 reg)
+{
+	outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
+	return inb_p(data->addr + IT87_DATA_REG_OFFSET);
+}
 
-static int it87_read_value(struct it87_data *data, u8 reg);
-static void it87_write_value(struct it87_data *data, u8 reg, u8 value);
-static struct it87_data *it87_update_device(struct device *dev);
-static int it87_check_pwm(struct device *dev);
-static void it87_init_device(struct platform_device *pdev);
+/*
+ * Must be called with data->update_lock held, except during initialization.
+ * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
+ * would slow down the IT87 access and should not be necessary.
+ */
+static void it87_write_value(struct it87_data *data, u8 reg, u8 value)
+{
+	outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
+	outb_p(value, data->addr + IT87_DATA_REG_OFFSET);
+}
 
+static void it87_update_pwm_ctrl(struct it87_data *data, int nr)
+{
+	data->pwm_ctrl[nr] = it87_read_value(data, IT87_REG_PWM[nr]);
+	if (has_newer_autopwm(data)) {
+		data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
+		data->pwm_duty[nr] = it87_read_value(data,
+						     IT87_REG_PWM_DUTY[nr]);
+	} else {
+		if (data->pwm_ctrl[nr] & 0x80)	/* Automatic mode */
+			data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
+		else				/* Manual mode */
+			data->pwm_duty[nr] = data->pwm_ctrl[nr] & 0x7f;
+	}
 
-static struct platform_driver it87_driver = {
-	.driver = {
-		.name	= DRVNAME,
-	},
-	.probe	= it87_probe,
-	.remove	= it87_remove,
-};
+	if (has_old_autopwm(data)) {
+		int i;
+
+		for (i = 0; i < 5 ; i++)
+			data->auto_temp[nr][i] = it87_read_value(data,
+						IT87_REG_AUTO_TEMP(nr, i));
+		for (i = 0; i < 3 ; i++)
+			data->auto_pwm[nr][i] = it87_read_value(data,
+						IT87_REG_AUTO_PWM(nr, i));
+	} else if (has_newer_autopwm(data)) {
+		int i;
+
+		/*
+		 * 0: temperature hysteresis (base + 5)
+		 * 1: fan off temperature (base + 0)
+		 * 2: fan start temperature (base + 1)
+		 * 3: fan max temperature (base + 2)
+		 */
+		data->auto_temp[nr][0] =
+			it87_read_value(data, IT87_REG_AUTO_TEMP(nr, 5));
+
+		for (i = 0; i < 3 ; i++)
+			data->auto_temp[nr][i + 1] =
+				it87_read_value(data,
+						IT87_REG_AUTO_TEMP(nr, i));
+		/*
+		 * 0: start pwm value (base + 3)
+		 * 1: pwm slope (base + 4, 1/8th pwm)
+		 */
+		data->auto_pwm[nr][0] =
+			it87_read_value(data, IT87_REG_AUTO_TEMP(nr, 3));
+		data->auto_pwm[nr][1] =
+			it87_read_value(data, IT87_REG_AUTO_TEMP(nr, 4));
+	}
+}
+
+static struct it87_data *it87_update_device(struct device *dev)
+{
+	struct it87_data *data = dev_get_drvdata(dev);
+	int i;
+
+	mutex_lock(&data->update_lock);
+
+	if (time_after(jiffies, data->last_updated + HZ + HZ / 2) ||
+	    !data->valid) {
+		if (update_vbat) {
+			/*
+			 * Cleared after each update, so reenable.  Value
+			 * returned by this read will be previous value
+			 */
+			it87_write_value(data, IT87_REG_CONFIG,
+				it87_read_value(data, IT87_REG_CONFIG) | 0x40);
+		}
+		for (i = 0; i < NUM_VIN; i++) {
+			if (!(data->has_in & BIT(i)))
+				continue;
+
+			data->in[i][0] =
+				it87_read_value(data, IT87_REG_VIN[i]);
+
+			/* VBAT and AVCC don't have limit registers */
+			if (i >= NUM_VIN_LIMIT)
+				continue;
+
+			data->in[i][1] =
+				it87_read_value(data, IT87_REG_VIN_MIN(i));
+			data->in[i][2] =
+				it87_read_value(data, IT87_REG_VIN_MAX(i));
+		}
+
+		for (i = 0; i < NUM_FAN; i++) {
+			/* Skip disabled fans */
+			if (!(data->has_fan & BIT(i)))
+				continue;
+
+			data->fan[i][1] =
+				it87_read_value(data, IT87_REG_FAN_MIN[i]);
+			data->fan[i][0] = it87_read_value(data,
+				       IT87_REG_FAN[i]);
+			/* Add high byte if in 16-bit mode */
+			if (has_16bit_fans(data)) {
+				data->fan[i][0] |= it87_read_value(data,
+						IT87_REG_FANX[i]) << 8;
+				data->fan[i][1] |= it87_read_value(data,
+						IT87_REG_FANX_MIN[i]) << 8;
+			}
+		}
+		for (i = 0; i < NUM_TEMP; i++) {
+			if (!(data->has_temp & BIT(i)))
+				continue;
+			data->temp[i][0] =
+				it87_read_value(data, IT87_REG_TEMP(i));
+
+			if (has_temp_offset(data) && i < NUM_TEMP_OFFSET)
+				data->temp[i][3] =
+				  it87_read_value(data,
+						  IT87_REG_TEMP_OFFSET[i]);
+
+			if (i >= NUM_TEMP_LIMIT)
+				continue;
+
+			data->temp[i][1] =
+				it87_read_value(data, IT87_REG_TEMP_LOW(i));
+			data->temp[i][2] =
+				it87_read_value(data, IT87_REG_TEMP_HIGH(i));
+		}
+
+		/* Newer chips don't have clock dividers */
+		if ((data->has_fan & 0x07) && !has_16bit_fans(data)) {
+			i = it87_read_value(data, IT87_REG_FAN_DIV);
+			data->fan_div[0] = i & 0x07;
+			data->fan_div[1] = (i >> 3) & 0x07;
+			data->fan_div[2] = (i & 0x40) ? 3 : 1;
+		}
+
+		data->alarms =
+			it87_read_value(data, IT87_REG_ALARM1) |
+			(it87_read_value(data, IT87_REG_ALARM2) << 8) |
+			(it87_read_value(data, IT87_REG_ALARM3) << 16);
+		data->beeps = it87_read_value(data, IT87_REG_BEEP_ENABLE);
+
+		data->fan_main_ctrl = it87_read_value(data,
+				IT87_REG_FAN_MAIN_CTRL);
+		data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL);
+		for (i = 0; i < NUM_PWM; i++) {
+			if (!(data->has_pwm & BIT(i)))
+				continue;
+			it87_update_pwm_ctrl(data, i);
+		}
+
+		data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE);
+		data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
+		/*
+		 * The IT8705F does not have VID capability.
+		 * The IT8718F and later don't use IT87_REG_VID for the
+		 * same purpose.
+		 */
+		if (data->type == it8712 || data->type == it8716) {
+			data->vid = it87_read_value(data, IT87_REG_VID);
+			/*
+			 * The older IT8712F revisions had only 5 VID pins,
+			 * but we assume it is always safe to read 6 bits.
+			 */
+			data->vid &= 0x3f;
+		}
+		data->last_updated = jiffies;
+		data->valid = 1;
+	}
+
+	mutex_unlock(&data->update_lock);
+
+	return data;
+}
 
 static ssize_t show_in(struct device *dev, struct device_attribute *attr,
 		       char *buf)
 {
 	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
-	int nr = sattr->nr;
-	int index = sattr->index;
-
 	struct it87_data *data = it87_update_device(dev);
+	int index = sattr->index;
+	int nr = sattr->nr;
+
 	return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr][index]));
 }
 
@@ -618,10 +845,9 @@
 		      const char *buf, size_t count)
 {
 	struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
-	int nr = sattr->nr;
-	int index = sattr->index;
-
 	struct it87_data *data = dev_get_drvdata(dev);
+	int index = sattr->index;
+	int nr = sattr->nr;
 	unsigned long val;
 
 	if (kstrtoul(buf, 10, &val) < 0)
@@ -687,8 +913,11 @@
 
 static SENSOR_DEVICE_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 8, 0);
 static SENSOR_DEVICE_ATTR_2(in9_input, S_IRUGO, show_in, NULL, 9, 0);
+static SENSOR_DEVICE_ATTR_2(in10_input, S_IRUGO, show_in, NULL, 10, 0);
+static SENSOR_DEVICE_ATTR_2(in11_input, S_IRUGO, show_in, NULL, 11, 0);
+static SENSOR_DEVICE_ATTR_2(in12_input, S_IRUGO, show_in, NULL, 12, 0);
 
-/* 3 temperatures */
+/* Up to 6 temperatures */
 static ssize_t show_temp(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
@@ -761,6 +990,9 @@
 			    2, 2);
 static SENSOR_DEVICE_ATTR_2(temp3_offset, S_IRUGO | S_IWUSR, show_temp,
 			    set_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(temp4_input, S_IRUGO, show_temp, NULL, 3, 0);
+static SENSOR_DEVICE_ATTR_2(temp5_input, S_IRUGO, show_temp, NULL, 4, 0);
+static SENSOR_DEVICE_ATTR_2(temp6_input, S_IRUGO, show_temp, NULL, 5, 0);
 
 static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr,
 			      char *buf)
@@ -771,8 +1003,8 @@
 	u8 reg = data->sensor;	    /* In case value is updated while used */
 	u8 extra = data->extra;
 
-	if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1))
-	    || (has_temp_old_peci(data, nr) && (extra & 0x80)))
+	if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1)) ||
+	    (has_temp_old_peci(data, nr) && (extra & 0x80)))
 		return sprintf(buf, "6\n");  /* Intel PECI */
 	if (reg & (1 << nr))
 		return sprintf(buf, "3\n");  /* thermal diode */
@@ -837,18 +1069,19 @@
 static SENSOR_DEVICE_ATTR(temp3_type, S_IRUGO | S_IWUSR, show_temp_type,
 			  set_temp_type, 2);
 
-/* 3 Fans */
+/* 6 Fans */
 
 static int pwm_mode(const struct it87_data *data, int nr)
 {
-	int ctrl = data->fan_main_ctrl & (1 << nr);
+	if (data->type != it8603 && nr < 3 && !(data->fan_main_ctrl & BIT(nr)))
+		return 0;				/* Full speed */
+	if (data->pwm_ctrl[nr] & 0x80)
+		return 2;				/* Automatic mode */
+	if ((data->type == it8603 || nr >= 3) &&
+	    data->pwm_duty[nr] == pwm_to_reg(data, 0xff))
+		return 0;			/* Full speed */
 
-	if (ctrl == 0 && data->type != it8603)		/* Full speed */
-		return 0;
-	if (data->pwm_ctrl[nr] & 0x80)			/* Automatic mode */
-		return 2;
-	else						/* Manual mode */
-		return 1;
+	return 1;				/* Manual mode */
 }
 
 static ssize_t show_fan(struct device *dev, struct device_attribute *attr,
@@ -868,39 +1101,49 @@
 }
 
 static ssize_t show_fan_div(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			    char *buf)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	struct it87_data *data = it87_update_device(dev);
 	int nr = sensor_attr->index;
 
-	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%d\n", DIV_FROM_REG(data->fan_div[nr]));
+	return sprintf(buf, "%lu\n", DIV_FROM_REG(data->fan_div[nr]));
 }
+
 static ssize_t show_pwm_enable(struct device *dev,
-		struct device_attribute *attr, char *buf)
+			       struct device_attribute *attr, char *buf)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	struct it87_data *data = it87_update_device(dev);
 	int nr = sensor_attr->index;
 
-	struct it87_data *data = it87_update_device(dev);
 	return sprintf(buf, "%d\n", pwm_mode(data, nr));
 }
+
 static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			char *buf)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	struct it87_data *data = it87_update_device(dev);
 	int nr = sensor_attr->index;
 
-	struct it87_data *data = it87_update_device(dev);
 	return sprintf(buf, "%d\n",
 		       pwm_from_reg(data, data->pwm_duty[nr]));
 }
+
 static ssize_t show_pwm_freq(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			     char *buf)
 {
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
 	struct it87_data *data = it87_update_device(dev);
-	int index = (data->fan_ctl >> 4) & 0x07;
+	int nr = sensor_attr->index;
 	unsigned int freq;
+	int index;
+
+	if (has_pwm_freq2(data) && nr == 1)
+		index = (data->extra >> 4) & 0x07;
+	else
+		index = (data->fan_ctl >> 4) & 0x07;
 
 	freq = pwm_freq[index] / (has_newer_autopwm(data) ? 256 : 128);
 
@@ -953,12 +1196,11 @@
 }
 
 static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+			   const char *buf, size_t count)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
 	struct it87_data *data = dev_get_drvdata(dev);
+	int nr = sensor_attr->index;
 	unsigned long val;
 	int min;
 	u8 old;
@@ -1013,6 +1255,11 @@
 			if (data->auto_pwm[nr][i] > data->auto_pwm[nr][i + 1])
 				err = -EINVAL;
 		}
+	} else if (has_newer_autopwm(data)) {
+		for (i = 1; i < 3; i++) {
+			if (data->auto_temp[nr][i] > data->auto_temp[nr][i + 1])
+				err = -EINVAL;
+		}
 	}
 
 	if (err) {
@@ -1023,13 +1270,12 @@
 	return err;
 }
 
-static ssize_t set_pwm_enable(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
 	struct it87_data *data = dev_get_drvdata(dev);
+	int nr = sensor_attr->index;
 	long val;
 
 	if (kstrtol(buf, 10, &val) < 0 || val < 0 || val > 2)
@@ -1041,21 +1287,30 @@
 			return -EINVAL;
 	}
 
-	/* IT8603E does not have on/off mode */
-	if (val == 0 && data->type == it8603)
-		return -EINVAL;
-
 	mutex_lock(&data->update_lock);
 
 	if (val == 0) {
-		int tmp;
-		/* make sure the fan is on when in on/off mode */
-		tmp = it87_read_value(data, IT87_REG_FAN_CTL);
-		it87_write_value(data, IT87_REG_FAN_CTL, tmp | (1 << nr));
-		/* set on/off mode */
-		data->fan_main_ctrl &= ~(1 << nr);
-		it87_write_value(data, IT87_REG_FAN_MAIN_CTRL,
-				 data->fan_main_ctrl);
+		if (nr < 3 && data->type != it8603) {
+			int tmp;
+			/* make sure the fan is on when in on/off mode */
+			tmp = it87_read_value(data, IT87_REG_FAN_CTL);
+			it87_write_value(data, IT87_REG_FAN_CTL, tmp | BIT(nr));
+			/* set on/off mode */
+			data->fan_main_ctrl &= ~BIT(nr);
+			it87_write_value(data, IT87_REG_FAN_MAIN_CTRL,
+					 data->fan_main_ctrl);
+		} else {
+			/* No on/off mode, set maximum pwm value */
+			data->pwm_duty[nr] = pwm_to_reg(data, 0xff);
+			it87_write_value(data, IT87_REG_PWM_DUTY[nr],
+					 data->pwm_duty[nr]);
+			/* and set manual mode */
+			data->pwm_ctrl[nr] = has_newer_autopwm(data) ?
+					     data->pwm_temp_map[nr] :
+					     data->pwm_duty[nr];
+			it87_write_value(data, IT87_REG_PWM[nr],
+					 data->pwm_ctrl[nr]);
+		}
 	} else {
 		if (val == 1)				/* Manual mode */
 			data->pwm_ctrl[nr] = has_newer_autopwm(data) ?
@@ -1063,11 +1318,11 @@
 					     data->pwm_duty[nr];
 		else					/* Automatic mode */
 			data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr];
-		it87_write_value(data, IT87_REG_PWM(nr), data->pwm_ctrl[nr]);
+		it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]);
 
-		if (data->type != it8603) {
+		if (data->type != it8603 && nr < 3) {
 			/* set SmartGuardian mode */
-			data->fan_main_ctrl |= (1 << nr);
+			data->fan_main_ctrl |= BIT(nr);
 			it87_write_value(data, IT87_REG_FAN_MAIN_CTRL,
 					 data->fan_main_ctrl);
 		}
@@ -1076,13 +1331,13 @@
 	mutex_unlock(&data->update_lock);
 	return count;
 }
+
 static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+		       const char *buf, size_t count)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
 	struct it87_data *data = dev_get_drvdata(dev);
+	int nr = sensor_attr->index;
 	long val;
 
 	if (kstrtol(buf, 10, &val) < 0 || val < 0 || val > 255)
@@ -1099,7 +1354,7 @@
 			return -EBUSY;
 		}
 		data->pwm_duty[nr] = pwm_to_reg(data, val);
-		it87_write_value(data, IT87_REG_PWM_DUTY(nr),
+		it87_write_value(data, IT87_REG_PWM_DUTY[nr],
 				 data->pwm_duty[nr]);
 	} else {
 		data->pwm_duty[nr] = pwm_to_reg(data, val);
@@ -1109,17 +1364,20 @@
 		 */
 		if (!(data->pwm_ctrl[nr] & 0x80)) {
 			data->pwm_ctrl[nr] = data->pwm_duty[nr];
-			it87_write_value(data, IT87_REG_PWM(nr),
+			it87_write_value(data, IT87_REG_PWM[nr],
 					 data->pwm_ctrl[nr]);
 		}
 	}
 	mutex_unlock(&data->update_lock);
 	return count;
 }
-static ssize_t set_pwm_freq(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+
+static ssize_t set_pwm_freq(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
 {
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
 	struct it87_data *data = dev_get_drvdata(dev);
+	int nr = sensor_attr->index;
 	unsigned long val;
 	int i;
 
@@ -1131,63 +1389,66 @@
 
 	/* Search for the nearest available frequency */
 	for (i = 0; i < 7; i++) {
-		if (val > (pwm_freq[i] + pwm_freq[i+1]) / 2)
+		if (val > (pwm_freq[i] + pwm_freq[i + 1]) / 2)
 			break;
 	}
 
 	mutex_lock(&data->update_lock);
-	data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL) & 0x8f;
-	data->fan_ctl |= i << 4;
-	it87_write_value(data, IT87_REG_FAN_CTL, data->fan_ctl);
+	if (nr == 0) {
+		data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL) & 0x8f;
+		data->fan_ctl |= i << 4;
+		it87_write_value(data, IT87_REG_FAN_CTL, data->fan_ctl);
+	} else {
+		data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA) & 0x8f;
+		data->extra |= i << 4;
+		it87_write_value(data, IT87_REG_TEMP_EXTRA, data->extra);
+	}
 	mutex_unlock(&data->update_lock);
 
 	return count;
 }
+
 static ssize_t show_pwm_temp_map(struct device *dev,
-		struct device_attribute *attr, char *buf)
+				 struct device_attribute *attr, char *buf)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
 	struct it87_data *data = it87_update_device(dev);
+	int nr = sensor_attr->index;
 	int map;
 
-	if (data->pwm_temp_map[nr] < 3)
-		map = 1 << data->pwm_temp_map[nr];
-	else
-		map = 0;			/* Should never happen */
-	return sprintf(buf, "%d\n", map);
+	map = data->pwm_temp_map[nr];
+	if (map >= 3)
+		map = 0;	/* Should never happen */
+	if (nr >= 3)		/* pwm channels 3..6 map to temp4..6 */
+		map += 3;
+
+	return sprintf(buf, "%d\n", (int)BIT(map));
 }
+
 static ssize_t set_pwm_temp_map(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+				struct device_attribute *attr, const char *buf,
+				size_t count)
 {
 	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
-	int nr = sensor_attr->index;
-
 	struct it87_data *data = dev_get_drvdata(dev);
+	int nr = sensor_attr->index;
 	long val;
 	u8 reg;
 
-	/*
-	 * This check can go away if we ever support automatic fan speed
-	 * control on newer chips.
-	 */
-	if (!has_old_autopwm(data)) {
-		dev_notice(dev, "Mapping change disabled for safety reasons\n");
-		return -EINVAL;
-	}
-
 	if (kstrtol(buf, 10, &val) < 0)
 		return -EINVAL;
 
+	if (nr >= 3)
+		val -= 3;
+
 	switch (val) {
-	case (1 << 0):
+	case BIT(0):
 		reg = 0x00;
 		break;
-	case (1 << 1):
+	case BIT(1):
 		reg = 0x01;
 		break;
-	case (1 << 2):
+	case BIT(2):
 		reg = 0x02;
 		break;
 	default:
@@ -1202,14 +1463,14 @@
 	 */
 	if (data->pwm_ctrl[nr] & 0x80) {
 		data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr];
-		it87_write_value(data, IT87_REG_PWM(nr), data->pwm_ctrl[nr]);
+		it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]);
 	}
 	mutex_unlock(&data->update_lock);
 	return count;
 }
 
-static ssize_t show_auto_pwm(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t show_auto_pwm(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
 	struct it87_data *data = it87_update_device(dev);
 	struct sensor_device_attribute_2 *sensor_attr =
@@ -1221,14 +1482,15 @@
 		       pwm_from_reg(data, data->auto_pwm[nr][point]));
 }
 
-static ssize_t set_auto_pwm(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t set_auto_pwm(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
 {
 	struct it87_data *data = dev_get_drvdata(dev);
 	struct sensor_device_attribute_2 *sensor_attr =
 			to_sensor_dev_attr_2(attr);
 	int nr = sensor_attr->nr;
 	int point = sensor_attr->index;
+	int regaddr;
 	long val;
 
 	if (kstrtol(buf, 10, &val) < 0 || val < 0 || val > 255)
@@ -1236,26 +1498,65 @@
 
 	mutex_lock(&data->update_lock);
 	data->auto_pwm[nr][point] = pwm_to_reg(data, val);
-	it87_write_value(data, IT87_REG_AUTO_PWM(nr, point),
-			 data->auto_pwm[nr][point]);
+	if (has_newer_autopwm(data))
+		regaddr = IT87_REG_AUTO_TEMP(nr, 3);
+	else
+		regaddr = IT87_REG_AUTO_PWM(nr, point);
+	it87_write_value(data, regaddr, data->auto_pwm[nr][point]);
 	mutex_unlock(&data->update_lock);
 	return count;
 }
 
-static ssize_t show_auto_temp(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t show_auto_pwm_slope(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct it87_data *data = it87_update_device(dev);
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	int nr = sensor_attr->index;
+
+	return sprintf(buf, "%d\n", data->auto_pwm[nr][1] & 0x7f);
+}
+
+static ssize_t set_auto_pwm_slope(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct it87_data *data = dev_get_drvdata(dev);
+	struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+	int nr = sensor_attr->index;
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val) < 0 || val > 127)
+		return -EINVAL;
+
+	mutex_lock(&data->update_lock);
+	data->auto_pwm[nr][1] = (data->auto_pwm[nr][1] & 0x80) | val;
+	it87_write_value(data, IT87_REG_AUTO_TEMP(nr, 4),
+			 data->auto_pwm[nr][1]);
+	mutex_unlock(&data->update_lock);
+	return count;
+}
+
+static ssize_t show_auto_temp(struct device *dev, struct device_attribute *attr,
+			      char *buf)
 {
 	struct it87_data *data = it87_update_device(dev);
 	struct sensor_device_attribute_2 *sensor_attr =
 			to_sensor_dev_attr_2(attr);
 	int nr = sensor_attr->nr;
 	int point = sensor_attr->index;
+	int reg;
 
-	return sprintf(buf, "%d\n", TEMP_FROM_REG(data->auto_temp[nr][point]));
+	if (has_old_autopwm(data) || point)
+		reg = data->auto_temp[nr][point];
+	else
+		reg = data->auto_temp[nr][1] - (data->auto_temp[nr][0] & 0x1f);
+
+	return sprintf(buf, "%d\n", TEMP_FROM_REG(reg));
 }
 
-static ssize_t set_auto_temp(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t set_auto_temp(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct it87_data *data = dev_get_drvdata(dev);
 	struct sensor_device_attribute_2 *sensor_attr =
@@ -1263,14 +1564,24 @@
 	int nr = sensor_attr->nr;
 	int point = sensor_attr->index;
 	long val;
+	int reg;
 
 	if (kstrtol(buf, 10, &val) < 0 || val < -128000 || val > 127000)
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
-	data->auto_temp[nr][point] = TEMP_TO_REG(val);
-	it87_write_value(data, IT87_REG_AUTO_TEMP(nr, point),
-			 data->auto_temp[nr][point]);
+	if (has_newer_autopwm(data) && !point) {
+		reg = data->auto_temp[nr][1] - TEMP_TO_REG(val);
+		reg = clamp_val(reg, 0, 0x1f) | (data->auto_temp[nr][0] & 0xe0);
+		data->auto_temp[nr][0] = reg;
+		it87_write_value(data, IT87_REG_AUTO_TEMP(nr, 5), reg);
+	} else {
+		reg = TEMP_TO_REG(val);
+		data->auto_temp[nr][point] = reg;
+		if (has_newer_autopwm(data))
+			point--;
+		it87_write_value(data, IT87_REG_AUTO_TEMP(nr, point), reg);
+	}
 	mutex_unlock(&data->update_lock);
 	return count;
 }
@@ -1308,8 +1619,9 @@
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
 			  show_pwm_enable, set_pwm_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0);
-static DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq, set_pwm_freq);
-static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO | S_IWUSR,
+static SENSOR_DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq,
+			  set_pwm_freq, 0);
+static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO,
 			  show_pwm_temp_map, set_pwm_temp_map, 0);
 static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR,
 			    show_auto_pwm, set_auto_pwm, 0, 0);
@@ -1329,12 +1641,16 @@
 			    show_auto_temp, set_auto_temp, 0, 3);
 static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR,
 			    show_auto_temp, set_auto_temp, 0, 4);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_start, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 0, 0);
+static SENSOR_DEVICE_ATTR(pwm1_auto_slope, S_IRUGO | S_IWUSR,
+			  show_auto_pwm_slope, set_auto_pwm_slope, 0);
 
 static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR,
 			  show_pwm_enable, set_pwm_enable, 1);
 static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 1);
-static DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, NULL);
-static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO | S_IWUSR,
+static SENSOR_DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, set_pwm_freq, 1);
+static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO,
 			  show_pwm_temp_map, set_pwm_temp_map, 1);
 static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR,
 			    show_auto_pwm, set_auto_pwm, 1, 0);
@@ -1354,12 +1670,16 @@
 			    show_auto_temp, set_auto_temp, 1, 3);
 static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR,
 			    show_auto_temp, set_auto_temp, 1, 4);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_start, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 1, 0);
+static SENSOR_DEVICE_ATTR(pwm2_auto_slope, S_IRUGO | S_IWUSR,
+			  show_auto_pwm_slope, set_auto_pwm_slope, 1);
 
 static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR,
 			  show_pwm_enable, set_pwm_enable, 2);
 static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 2);
-static DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL);
-static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO | S_IWUSR,
+static SENSOR_DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL, 2);
+static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO,
 			  show_pwm_temp_map, set_pwm_temp_map, 2);
 static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_pwm, S_IRUGO | S_IWUSR,
 			    show_auto_pwm, set_auto_pwm, 2, 0);
@@ -1379,30 +1699,94 @@
 			    show_auto_temp, set_auto_temp, 2, 3);
 static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_temp, S_IRUGO | S_IWUSR,
 			    show_auto_temp, set_auto_temp, 2, 4);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_start, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 2, 0);
+static SENSOR_DEVICE_ATTR(pwm3_auto_slope, S_IRUGO | S_IWUSR,
+			  show_auto_pwm_slope, set_auto_pwm_slope, 2);
+
+static SENSOR_DEVICE_ATTR(pwm4_enable, S_IRUGO | S_IWUSR,
+			  show_pwm_enable, set_pwm_enable, 3);
+static SENSOR_DEVICE_ATTR(pwm4, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 3);
+static SENSOR_DEVICE_ATTR(pwm4_freq, S_IRUGO, show_pwm_freq, NULL, 3);
+static SENSOR_DEVICE_ATTR(pwm4_auto_channels_temp, S_IRUGO,
+			  show_pwm_temp_map, set_pwm_temp_map, 3);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point1_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point2_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point3_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_start, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 3, 0);
+static SENSOR_DEVICE_ATTR(pwm4_auto_slope, S_IRUGO | S_IWUSR,
+			  show_auto_pwm_slope, set_auto_pwm_slope, 3);
+
+static SENSOR_DEVICE_ATTR(pwm5_enable, S_IRUGO | S_IWUSR,
+			  show_pwm_enable, set_pwm_enable, 4);
+static SENSOR_DEVICE_ATTR(pwm5, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 4);
+static SENSOR_DEVICE_ATTR(pwm5_freq, S_IRUGO, show_pwm_freq, NULL, 4);
+static SENSOR_DEVICE_ATTR(pwm5_auto_channels_temp, S_IRUGO,
+			  show_pwm_temp_map, set_pwm_temp_map, 4);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point1_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point2_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point3_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_start, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 4, 0);
+static SENSOR_DEVICE_ATTR(pwm5_auto_slope, S_IRUGO | S_IWUSR,
+			  show_auto_pwm_slope, set_auto_pwm_slope, 4);
+
+static SENSOR_DEVICE_ATTR(pwm6_enable, S_IRUGO | S_IWUSR,
+			  show_pwm_enable, set_pwm_enable, 5);
+static SENSOR_DEVICE_ATTR(pwm6, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 5);
+static SENSOR_DEVICE_ATTR(pwm6_freq, S_IRUGO, show_pwm_freq, NULL, 5);
+static SENSOR_DEVICE_ATTR(pwm6_auto_channels_temp, S_IRUGO,
+			  show_pwm_temp_map, set_pwm_temp_map, 5);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point1_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point2_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point3_temp, S_IRUGO | S_IWUSR,
+			    show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_start, S_IRUGO | S_IWUSR,
+			    show_auto_pwm, set_auto_pwm, 5, 0);
+static SENSOR_DEVICE_ATTR(pwm6_auto_slope, S_IRUGO | S_IWUSR,
+			  show_auto_pwm_slope, set_auto_pwm_slope, 5);
 
 /* Alarms */
 static ssize_t show_alarms(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			   char *buf)
 {
 	struct it87_data *data = it87_update_device(dev);
+
 	return sprintf(buf, "%u\n", data->alarms);
 }
 static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
 
 static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			  char *buf)
 {
-	int bitnr = to_sensor_dev_attr(attr)->index;
 	struct it87_data *data = it87_update_device(dev);
+	int bitnr = to_sensor_dev_attr(attr)->index;
+
 	return sprintf(buf, "%u\n", (data->alarms >> bitnr) & 1);
 }
 
-static ssize_t clear_intrusion(struct device *dev, struct device_attribute
-		*attr, const char *buf, size_t count)
+static ssize_t clear_intrusion(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
 {
 	struct it87_data *data = dev_get_drvdata(dev);
-	long val;
 	int config;
+	long val;
 
 	if (kstrtol(buf, 10, &val) < 0 || val != 0)
 		return -EINVAL;
@@ -1412,7 +1796,7 @@
 	if (config < 0) {
 		count = config;
 	} else {
-		config |= 1 << 5;
+		config |= BIT(5);
 		it87_write_value(data, IT87_REG_CONFIG, config);
 		/* Invalidate cache to force re-read */
 		data->valid = 0;
@@ -1443,29 +1827,30 @@
 			  show_alarm, clear_intrusion, 4);
 
 static ssize_t show_beep(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			 char *buf)
 {
-	int bitnr = to_sensor_dev_attr(attr)->index;
 	struct it87_data *data = it87_update_device(dev);
+	int bitnr = to_sensor_dev_attr(attr)->index;
+
 	return sprintf(buf, "%u\n", (data->beeps >> bitnr) & 1);
 }
+
 static ssize_t set_beep(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+			const char *buf, size_t count)
 {
 	int bitnr = to_sensor_dev_attr(attr)->index;
 	struct it87_data *data = dev_get_drvdata(dev);
 	long val;
 
-	if (kstrtol(buf, 10, &val) < 0
-	 || (val != 0 && val != 1))
+	if (kstrtol(buf, 10, &val) < 0 || (val != 0 && val != 1))
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
 	data->beeps = it87_read_value(data, IT87_REG_BEEP_ENABLE);
 	if (val)
-		data->beeps |= (1 << bitnr);
+		data->beeps |= BIT(bitnr);
 	else
-		data->beeps &= ~(1 << bitnr);
+		data->beeps &= ~BIT(bitnr);
 	it87_write_value(data, IT87_REG_BEEP_ENABLE, data->beeps);
 	mutex_unlock(&data->update_lock);
 	return count;
@@ -1493,13 +1878,15 @@
 static SENSOR_DEVICE_ATTR(temp3_beep, S_IRUGO, show_beep, NULL, 2);
 
 static ssize_t show_vrm_reg(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			    char *buf)
 {
 	struct it87_data *data = dev_get_drvdata(dev);
+
 	return sprintf(buf, "%u\n", data->vrm);
 }
+
 static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
+			     const char *buf, size_t count)
 {
 	struct it87_data *data = dev_get_drvdata(dev);
 	unsigned long val;
@@ -1514,15 +1901,16 @@
 static DEVICE_ATTR(vrm, S_IRUGO | S_IWUSR, show_vrm_reg, store_vrm_reg);
 
 static ssize_t show_vid_reg(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			    char *buf)
 {
 	struct it87_data *data = it87_update_device(dev);
-	return sprintf(buf, "%ld\n", (long) vid_from_reg(data->vid, data->vrm));
+
+	return sprintf(buf, "%ld\n", (long)vid_from_reg(data->vid, data->vrm));
 }
 static DEVICE_ATTR(cpu0_vid, S_IRUGO, show_vid_reg, NULL);
 
 static ssize_t show_label(struct device *dev, struct device_attribute *attr,
-		char *buf)
+			  char *buf)
 {
 	static const char * const labels[] = {
 		"+5V",
@@ -1548,227 +1936,348 @@
 static SENSOR_DEVICE_ATTR(in3_label, S_IRUGO, show_label, NULL, 0);
 static SENSOR_DEVICE_ATTR(in7_label, S_IRUGO, show_label, NULL, 1);
 static SENSOR_DEVICE_ATTR(in8_label, S_IRUGO, show_label, NULL, 2);
-/* special AVCC3 IT8603E in9 */
+/* AVCC3 */
 static SENSOR_DEVICE_ATTR(in9_label, S_IRUGO, show_label, NULL, 0);
 
-static ssize_t show_name(struct device *dev, struct device_attribute
-			 *devattr, char *buf)
+static umode_t it87_in_is_visible(struct kobject *kobj,
+				  struct attribute *attr, int index)
 {
+	struct device *dev = container_of(kobj, struct device, kobj);
 	struct it87_data *data = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", data->name);
-}
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+	int i = index / 5;	/* voltage index */
+	int a = index % 5;	/* attribute index */
 
-static struct attribute *it87_attributes_in[10][5] = {
-{
+	if (index >= 40) {	/* in8 and higher only have input attributes */
+		i = index - 40 + 8;
+		a = 0;
+	}
+
+	if (!(data->has_in & BIT(i)))
+		return 0;
+
+	if (a == 4 && !data->has_beep)
+		return 0;
+
+	return attr->mode;
+}
+
+static struct attribute *it87_attributes_in[] = {
 	&sensor_dev_attr_in0_input.dev_attr.attr,
 	&sensor_dev_attr_in0_min.dev_attr.attr,
 	&sensor_dev_attr_in0_max.dev_attr.attr,
 	&sensor_dev_attr_in0_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in0_beep.dev_attr.attr,	/* 4 */
+
 	&sensor_dev_attr_in1_input.dev_attr.attr,
 	&sensor_dev_attr_in1_min.dev_attr.attr,
 	&sensor_dev_attr_in1_max.dev_attr.attr,
 	&sensor_dev_attr_in1_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in1_beep.dev_attr.attr,	/* 9 */
+
 	&sensor_dev_attr_in2_input.dev_attr.attr,
 	&sensor_dev_attr_in2_min.dev_attr.attr,
 	&sensor_dev_attr_in2_max.dev_attr.attr,
 	&sensor_dev_attr_in2_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in2_beep.dev_attr.attr,	/* 14 */
+
 	&sensor_dev_attr_in3_input.dev_attr.attr,
 	&sensor_dev_attr_in3_min.dev_attr.attr,
 	&sensor_dev_attr_in3_max.dev_attr.attr,
 	&sensor_dev_attr_in3_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in3_beep.dev_attr.attr,	/* 19 */
+
 	&sensor_dev_attr_in4_input.dev_attr.attr,
 	&sensor_dev_attr_in4_min.dev_attr.attr,
 	&sensor_dev_attr_in4_max.dev_attr.attr,
 	&sensor_dev_attr_in4_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in4_beep.dev_attr.attr,	/* 24 */
+
 	&sensor_dev_attr_in5_input.dev_attr.attr,
 	&sensor_dev_attr_in5_min.dev_attr.attr,
 	&sensor_dev_attr_in5_max.dev_attr.attr,
 	&sensor_dev_attr_in5_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in5_beep.dev_attr.attr,	/* 29 */
+
 	&sensor_dev_attr_in6_input.dev_attr.attr,
 	&sensor_dev_attr_in6_min.dev_attr.attr,
 	&sensor_dev_attr_in6_max.dev_attr.attr,
 	&sensor_dev_attr_in6_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_in6_beep.dev_attr.attr,	/* 34 */
+
 	&sensor_dev_attr_in7_input.dev_attr.attr,
 	&sensor_dev_attr_in7_min.dev_attr.attr,
 	&sensor_dev_attr_in7_max.dev_attr.attr,
 	&sensor_dev_attr_in7_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_in8_input.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_in9_input.dev_attr.attr,
-	NULL
-} };
+	&sensor_dev_attr_in7_beep.dev_attr.attr,	/* 39 */
 
-static const struct attribute_group it87_group_in[10] = {
-	{ .attrs = it87_attributes_in[0] },
-	{ .attrs = it87_attributes_in[1] },
-	{ .attrs = it87_attributes_in[2] },
-	{ .attrs = it87_attributes_in[3] },
-	{ .attrs = it87_attributes_in[4] },
-	{ .attrs = it87_attributes_in[5] },
-	{ .attrs = it87_attributes_in[6] },
-	{ .attrs = it87_attributes_in[7] },
-	{ .attrs = it87_attributes_in[8] },
-	{ .attrs = it87_attributes_in[9] },
+	&sensor_dev_attr_in8_input.dev_attr.attr,	/* 40 */
+	&sensor_dev_attr_in9_input.dev_attr.attr,	/* 41 */
+	&sensor_dev_attr_in10_input.dev_attr.attr,	/* 41 */
+	&sensor_dev_attr_in11_input.dev_attr.attr,	/* 41 */
+	&sensor_dev_attr_in12_input.dev_attr.attr,	/* 41 */
 };
 
-static struct attribute *it87_attributes_temp[3][6] = {
+static const struct attribute_group it87_group_in = {
+	.attrs = it87_attributes_in,
+	.is_visible = it87_in_is_visible,
+};
+
+static umode_t it87_temp_is_visible(struct kobject *kobj,
+				    struct attribute *attr, int index)
 {
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct it87_data *data = dev_get_drvdata(dev);
+	int i = index / 7;	/* temperature index */
+	int a = index % 7;	/* attribute index */
+
+	if (index >= 21) {
+		i = index - 21 + 3;
+		a = 0;
+	}
+
+	if (!(data->has_temp & BIT(i)))
+		return 0;
+
+	if (a == 5 && !has_temp_offset(data))
+		return 0;
+
+	if (a == 6 && !data->has_beep)
+		return 0;
+
+	return attr->mode;
+}
+
+static struct attribute *it87_attributes_temp[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_max.dev_attr.attr,
 	&sensor_dev_attr_temp1_min.dev_attr.attr,
 	&sensor_dev_attr_temp1_type.dev_attr.attr,
 	&sensor_dev_attr_temp1_alarm.dev_attr.attr,
-	NULL
-} , {
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_offset.dev_attr.attr,	/* 5 */
+	&sensor_dev_attr_temp1_beep.dev_attr.attr,	/* 6 */
+
+	&sensor_dev_attr_temp2_input.dev_attr.attr,	/* 7 */
 	&sensor_dev_attr_temp2_max.dev_attr.attr,
 	&sensor_dev_attr_temp2_min.dev_attr.attr,
 	&sensor_dev_attr_temp2_type.dev_attr.attr,
 	&sensor_dev_attr_temp2_alarm.dev_attr.attr,
-	NULL
-} , {
-	&sensor_dev_attr_temp3_input.dev_attr.attr,
+	&sensor_dev_attr_temp2_offset.dev_attr.attr,
+	&sensor_dev_attr_temp2_beep.dev_attr.attr,
+
+	&sensor_dev_attr_temp3_input.dev_attr.attr,	/* 14 */
 	&sensor_dev_attr_temp3_max.dev_attr.attr,
 	&sensor_dev_attr_temp3_min.dev_attr.attr,
 	&sensor_dev_attr_temp3_type.dev_attr.attr,
 	&sensor_dev_attr_temp3_alarm.dev_attr.attr,
-	NULL
-} };
-
-static const struct attribute_group it87_group_temp[3] = {
-	{ .attrs = it87_attributes_temp[0] },
-	{ .attrs = it87_attributes_temp[1] },
-	{ .attrs = it87_attributes_temp[2] },
-};
-
-static struct attribute *it87_attributes_temp_offset[] = {
-	&sensor_dev_attr_temp1_offset.dev_attr.attr,
-	&sensor_dev_attr_temp2_offset.dev_attr.attr,
 	&sensor_dev_attr_temp3_offset.dev_attr.attr,
+	&sensor_dev_attr_temp3_beep.dev_attr.attr,
+
+	&sensor_dev_attr_temp4_input.dev_attr.attr,	/* 21 */
+	&sensor_dev_attr_temp5_input.dev_attr.attr,
+	&sensor_dev_attr_temp6_input.dev_attr.attr,
+	NULL
 };
 
+static const struct attribute_group it87_group_temp = {
+	.attrs = it87_attributes_temp,
+	.is_visible = it87_temp_is_visible,
+};
+
+static umode_t it87_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct it87_data *data = dev_get_drvdata(dev);
+
+	if ((index == 2 || index == 3) && !data->has_vid)
+		return 0;
+
+	if (index > 3 && !(data->in_internal & BIT(index - 4)))
+		return 0;
+
+	return attr->mode;
+}
+
 static struct attribute *it87_attributes[] = {
 	&dev_attr_alarms.attr,
 	&sensor_dev_attr_intrusion0_alarm.dev_attr.attr,
-	&dev_attr_name.attr,
+	&dev_attr_vrm.attr,				/* 2 */
+	&dev_attr_cpu0_vid.attr,			/* 3 */
+	&sensor_dev_attr_in3_label.dev_attr.attr,	/* 4 .. 7 */
+	&sensor_dev_attr_in7_label.dev_attr.attr,
+	&sensor_dev_attr_in8_label.dev_attr.attr,
+	&sensor_dev_attr_in9_label.dev_attr.attr,
 	NULL
 };
 
 static const struct attribute_group it87_group = {
 	.attrs = it87_attributes,
+	.is_visible = it87_is_visible,
 };
 
-static struct attribute *it87_attributes_in_beep[] = {
-	&sensor_dev_attr_in0_beep.dev_attr.attr,
-	&sensor_dev_attr_in1_beep.dev_attr.attr,
-	&sensor_dev_attr_in2_beep.dev_attr.attr,
-	&sensor_dev_attr_in3_beep.dev_attr.attr,
-	&sensor_dev_attr_in4_beep.dev_attr.attr,
-	&sensor_dev_attr_in5_beep.dev_attr.attr,
-	&sensor_dev_attr_in6_beep.dev_attr.attr,
-	&sensor_dev_attr_in7_beep.dev_attr.attr,
-	NULL,
-	NULL,
-};
+static umode_t it87_fan_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct it87_data *data = dev_get_drvdata(dev);
+	int i = index / 5;	/* fan index */
+	int a = index % 5;	/* attribute index */
 
-static struct attribute *it87_attributes_temp_beep[] = {
-	&sensor_dev_attr_temp1_beep.dev_attr.attr,
-	&sensor_dev_attr_temp2_beep.dev_attr.attr,
-	&sensor_dev_attr_temp3_beep.dev_attr.attr,
-};
+	if (index >= 15) {	/* fan 4..6 don't have divisor attributes */
+		i = (index - 15) / 4 + 3;
+		a = (index - 15) % 4;
+	}
 
-static struct attribute *it87_attributes_fan[6][3+1] = { {
+	if (!(data->has_fan & BIT(i)))
+		return 0;
+
+	if (a == 3) {				/* beep */
+		if (!data->has_beep)
+			return 0;
+		/* first fan beep attribute is writable */
+		if (i == __ffs(data->has_fan))
+			return attr->mode | S_IWUSR;
+	}
+
+	if (a == 4 && has_16bit_fans(data))	/* divisor */
+		return 0;
+
+	return attr->mode;
+}
+
+static struct attribute *it87_attributes_fan[] = {
 	&sensor_dev_attr_fan1_input.dev_attr.attr,
 	&sensor_dev_attr_fan1_min.dev_attr.attr,
 	&sensor_dev_attr_fan1_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_fan1_beep.dev_attr.attr,	/* 3 */
+	&sensor_dev_attr_fan1_div.dev_attr.attr,	/* 4 */
+
 	&sensor_dev_attr_fan2_input.dev_attr.attr,
 	&sensor_dev_attr_fan2_min.dev_attr.attr,
 	&sensor_dev_attr_fan2_alarm.dev_attr.attr,
-	NULL
-}, {
+	&sensor_dev_attr_fan2_beep.dev_attr.attr,
+	&sensor_dev_attr_fan2_div.dev_attr.attr,	/* 9 */
+
 	&sensor_dev_attr_fan3_input.dev_attr.attr,
 	&sensor_dev_attr_fan3_min.dev_attr.attr,
 	&sensor_dev_attr_fan3_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan4_input.dev_attr.attr,
+	&sensor_dev_attr_fan3_beep.dev_attr.attr,
+	&sensor_dev_attr_fan3_div.dev_attr.attr,	/* 14 */
+
+	&sensor_dev_attr_fan4_input.dev_attr.attr,	/* 15 */
 	&sensor_dev_attr_fan4_min.dev_attr.attr,
 	&sensor_dev_attr_fan4_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan5_input.dev_attr.attr,
+	&sensor_dev_attr_fan4_beep.dev_attr.attr,
+
+	&sensor_dev_attr_fan5_input.dev_attr.attr,	/* 19 */
 	&sensor_dev_attr_fan5_min.dev_attr.attr,
 	&sensor_dev_attr_fan5_alarm.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_fan6_input.dev_attr.attr,
+	&sensor_dev_attr_fan5_beep.dev_attr.attr,
+
+	&sensor_dev_attr_fan6_input.dev_attr.attr,	/* 23 */
 	&sensor_dev_attr_fan6_min.dev_attr.attr,
 	&sensor_dev_attr_fan6_alarm.dev_attr.attr,
+	&sensor_dev_attr_fan6_beep.dev_attr.attr,
 	NULL
-} };
-
-static const struct attribute_group it87_group_fan[6] = {
-	{ .attrs = it87_attributes_fan[0] },
-	{ .attrs = it87_attributes_fan[1] },
-	{ .attrs = it87_attributes_fan[2] },
-	{ .attrs = it87_attributes_fan[3] },
-	{ .attrs = it87_attributes_fan[4] },
-	{ .attrs = it87_attributes_fan[5] },
 };
 
-static const struct attribute *it87_attributes_fan_div[] = {
-	&sensor_dev_attr_fan1_div.dev_attr.attr,
-	&sensor_dev_attr_fan2_div.dev_attr.attr,
-	&sensor_dev_attr_fan3_div.dev_attr.attr,
+static const struct attribute_group it87_group_fan = {
+	.attrs = it87_attributes_fan,
+	.is_visible = it87_fan_is_visible,
 };
 
-static struct attribute *it87_attributes_pwm[3][4+1] = { {
+static umode_t it87_pwm_is_visible(struct kobject *kobj,
+				   struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct it87_data *data = dev_get_drvdata(dev);
+	int i = index / 4;	/* pwm index */
+	int a = index % 4;	/* attribute index */
+
+	if (!(data->has_pwm & BIT(i)))
+		return 0;
+
+	/* pwmX_auto_channels_temp is only writable if auto pwm is supported */
+	if (a == 3 && (has_old_autopwm(data) || has_newer_autopwm(data)))
+		return attr->mode | S_IWUSR;
+
+	/* pwm2_freq is writable if there are two pwm frequency selects */
+	if (has_pwm_freq2(data) && i == 1 && a == 2)
+		return attr->mode | S_IWUSR;
+
+	return attr->mode;
+}
+
+static struct attribute *it87_attributes_pwm[] = {
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
-	&dev_attr_pwm1_freq.attr,
+	&sensor_dev_attr_pwm1_freq.dev_attr.attr,
 	&sensor_dev_attr_pwm1_auto_channels_temp.dev_attr.attr,
-	NULL
-}, {
+
 	&sensor_dev_attr_pwm2_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm2.dev_attr.attr,
-	&dev_attr_pwm2_freq.attr,
+	&sensor_dev_attr_pwm2_freq.dev_attr.attr,
 	&sensor_dev_attr_pwm2_auto_channels_temp.dev_attr.attr,
-	NULL
-}, {
+
 	&sensor_dev_attr_pwm3_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm3.dev_attr.attr,
-	&dev_attr_pwm3_freq.attr,
+	&sensor_dev_attr_pwm3_freq.dev_attr.attr,
 	&sensor_dev_attr_pwm3_auto_channels_temp.dev_attr.attr,
-	NULL
-} };
 
-static const struct attribute_group it87_group_pwm[3] = {
-	{ .attrs = it87_attributes_pwm[0] },
-	{ .attrs = it87_attributes_pwm[1] },
-	{ .attrs = it87_attributes_pwm[2] },
+	&sensor_dev_attr_pwm4_enable.dev_attr.attr,
+	&sensor_dev_attr_pwm4.dev_attr.attr,
+	&sensor_dev_attr_pwm4_freq.dev_attr.attr,
+	&sensor_dev_attr_pwm4_auto_channels_temp.dev_attr.attr,
+
+	&sensor_dev_attr_pwm5_enable.dev_attr.attr,
+	&sensor_dev_attr_pwm5.dev_attr.attr,
+	&sensor_dev_attr_pwm5_freq.dev_attr.attr,
+	&sensor_dev_attr_pwm5_auto_channels_temp.dev_attr.attr,
+
+	&sensor_dev_attr_pwm6_enable.dev_attr.attr,
+	&sensor_dev_attr_pwm6.dev_attr.attr,
+	&sensor_dev_attr_pwm6_freq.dev_attr.attr,
+	&sensor_dev_attr_pwm6_auto_channels_temp.dev_attr.attr,
+
+	NULL
 };
 
-static struct attribute *it87_attributes_autopwm[3][9+1] = { {
+static const struct attribute_group it87_group_pwm = {
+	.attrs = it87_attributes_pwm,
+	.is_visible = it87_pwm_is_visible,
+};
+
+static umode_t it87_auto_pwm_is_visible(struct kobject *kobj,
+					struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct it87_data *data = dev_get_drvdata(dev);
+	int i = index / 11;	/* pwm index */
+	int a = index % 11;	/* attribute index */
+
+	if (index >= 33) {	/* pwm 4..6 */
+		i = (index - 33) / 6 + 3;
+		a = (index - 33) % 6 + 4;
+	}
+
+	if (!(data->has_pwm & BIT(i)))
+		return 0;
+
+	if (has_newer_autopwm(data)) {
+		if (a < 4)	/* no auto point pwm */
+			return 0;
+		if (a == 8)	/* no auto_point4 */
+			return 0;
+	}
+	if (has_old_autopwm(data)) {
+		if (a >= 9)	/* no pwm_auto_start, pwm_auto_slope */
+			return 0;
+	}
+
+	return attr->mode;
+}
+
+static struct attribute *it87_attributes_auto_pwm[] = {
 	&sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr,
 	&sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr,
 	&sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr,
@@ -1778,9 +2287,10 @@
 	&sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr,
 	&sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr,
 	&sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_start.dev_attr.attr,
+	&sensor_dev_attr_pwm1_auto_slope.dev_attr.attr,
+
+	&sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr,	/* 11 */
 	&sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr,
 	&sensor_dev_attr_pwm2_auto_point3_pwm.dev_attr.attr,
 	&sensor_dev_attr_pwm2_auto_point4_pwm.dev_attr.attr,
@@ -1789,9 +2299,10 @@
 	&sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr,
 	&sensor_dev_attr_pwm2_auto_point3_temp.dev_attr.attr,
 	&sensor_dev_attr_pwm2_auto_point4_temp.dev_attr.attr,
-	NULL
-}, {
-	&sensor_dev_attr_pwm3_auto_point1_pwm.dev_attr.attr,
+	&sensor_dev_attr_pwm2_auto_start.dev_attr.attr,
+	&sensor_dev_attr_pwm2_auto_slope.dev_attr.attr,
+
+	&sensor_dev_attr_pwm3_auto_point1_pwm.dev_attr.attr,	/* 22 */
 	&sensor_dev_attr_pwm3_auto_point2_pwm.dev_attr.attr,
 	&sensor_dev_attr_pwm3_auto_point3_pwm.dev_attr.attr,
 	&sensor_dev_attr_pwm3_auto_point4_pwm.dev_attr.attr,
@@ -1800,61 +2311,53 @@
 	&sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr,
 	&sensor_dev_attr_pwm3_auto_point3_temp.dev_attr.attr,
 	&sensor_dev_attr_pwm3_auto_point4_temp.dev_attr.attr,
-	NULL
-} };
+	&sensor_dev_attr_pwm3_auto_start.dev_attr.attr,
+	&sensor_dev_attr_pwm3_auto_slope.dev_attr.attr,
 
-static const struct attribute_group it87_group_autopwm[3] = {
-	{ .attrs = it87_attributes_autopwm[0] },
-	{ .attrs = it87_attributes_autopwm[1] },
-	{ .attrs = it87_attributes_autopwm[2] },
+	&sensor_dev_attr_pwm4_auto_point1_temp.dev_attr.attr,	/* 33 */
+	&sensor_dev_attr_pwm4_auto_point1_temp_hyst.dev_attr.attr,
+	&sensor_dev_attr_pwm4_auto_point2_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm4_auto_point3_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm4_auto_start.dev_attr.attr,
+	&sensor_dev_attr_pwm4_auto_slope.dev_attr.attr,
+
+	&sensor_dev_attr_pwm5_auto_point1_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm5_auto_point1_temp_hyst.dev_attr.attr,
+	&sensor_dev_attr_pwm5_auto_point2_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm5_auto_point3_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm5_auto_start.dev_attr.attr,
+	&sensor_dev_attr_pwm5_auto_slope.dev_attr.attr,
+
+	&sensor_dev_attr_pwm6_auto_point1_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm6_auto_point1_temp_hyst.dev_attr.attr,
+	&sensor_dev_attr_pwm6_auto_point2_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm6_auto_point3_temp.dev_attr.attr,
+	&sensor_dev_attr_pwm6_auto_start.dev_attr.attr,
+	&sensor_dev_attr_pwm6_auto_slope.dev_attr.attr,
+
+	NULL,
 };
 
-static struct attribute *it87_attributes_fan_beep[] = {
-	&sensor_dev_attr_fan1_beep.dev_attr.attr,
-	&sensor_dev_attr_fan2_beep.dev_attr.attr,
-	&sensor_dev_attr_fan3_beep.dev_attr.attr,
-	&sensor_dev_attr_fan4_beep.dev_attr.attr,
-	&sensor_dev_attr_fan5_beep.dev_attr.attr,
-	&sensor_dev_attr_fan6_beep.dev_attr.attr,
-};
-
-static struct attribute *it87_attributes_vid[] = {
-	&dev_attr_vrm.attr,
-	&dev_attr_cpu0_vid.attr,
-	NULL
-};
-
-static const struct attribute_group it87_group_vid = {
-	.attrs = it87_attributes_vid,
-};
-
-static struct attribute *it87_attributes_label[] = {
-	&sensor_dev_attr_in3_label.dev_attr.attr,
-	&sensor_dev_attr_in7_label.dev_attr.attr,
-	&sensor_dev_attr_in8_label.dev_attr.attr,
-	&sensor_dev_attr_in9_label.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group it87_group_label = {
-	.attrs = it87_attributes_label,
+static const struct attribute_group it87_group_auto_pwm = {
+	.attrs = it87_attributes_auto_pwm,
+	.is_visible = it87_auto_pwm_is_visible,
 };
 
 /* SuperIO detection - will change isa_address if a chip is found */
-static int __init it87_find(unsigned short *address,
-	struct it87_sio_data *sio_data)
+static int __init it87_find(int sioaddr, unsigned short *address,
+			    struct it87_sio_data *sio_data)
 {
 	int err;
 	u16 chip_type;
 	const char *board_vendor, *board_name;
 	const struct it87_devices *config;
 
-	err = superio_enter();
+	err = superio_enter(sioaddr);
 	if (err)
 		return err;
 
 	err = -ENODEV;
-	chip_type = force_id ? force_id : superio_inw(DEVID);
+	chip_type = force_id ? force_id : superio_inw(sioaddr, DEVID);
 
 	switch (chip_type) {
 	case IT8705F_DEVID:
@@ -1910,6 +2413,9 @@
 	case IT8620E_DEVID:
 		sio_data->type = it8620;
 		break;
+	case IT8628E_DEVID:
+		sio_data->type = it8628;
+		break;
 	case 0xffff:	/* No device at all */
 		goto exit;
 	default:
@@ -1917,20 +2423,20 @@
 		goto exit;
 	}
 
-	superio_select(PME);
-	if (!(superio_inb(IT87_ACT_REG) & 0x01)) {
+	superio_select(sioaddr, PME);
+	if (!(superio_inb(sioaddr, IT87_ACT_REG) & 0x01)) {
 		pr_info("Device not activated, skipping\n");
 		goto exit;
 	}
 
-	*address = superio_inw(IT87_BASE_REG) & ~(IT87_EXTENT - 1);
+	*address = superio_inw(sioaddr, IT87_BASE_REG) & ~(IT87_EXTENT - 1);
 	if (*address == 0) {
 		pr_info("Base address not set, skipping\n");
 		goto exit;
 	}
 
 	err = 0;
-	sio_data->revision = superio_inb(DEVREV) & 0x0f;
+	sio_data->revision = superio_inb(sioaddr, DEVREV) & 0x0f;
 	pr_info("Found IT%04x%s chip at 0x%x, revision %d\n", chip_type,
 		it87_devices[sio_data->type].suffix,
 		*address, sio_data->revision);
@@ -1939,14 +2445,19 @@
 
 	/* in7 (VSB or VCCH5V) is always internal on some chips */
 	if (has_in7_internal(config))
-		sio_data->internal |= (1 << 1);
+		sio_data->internal |= BIT(1);
 
 	/* in8 (Vbat) is always internal */
-	sio_data->internal |= (1 << 2);
+	sio_data->internal |= BIT(2);
 
-	/* Only the IT8603E has in9 */
-	if (sio_data->type != it8603)
-		sio_data->skip_in |= (1 << 9);
+	/* in9 (AVCC3), always internal if supported */
+	if (has_avcc3(config))
+		sio_data->internal |= BIT(3); /* in9 is AVCC */
+	else
+		sio_data->skip_in |= BIT(9);
+
+	if (!has_six_pwm(config))
+		sio_data->skip_pwm |= BIT(3) | BIT(4) | BIT(5);
 
 	if (!has_vid(config))
 		sio_data->skip_vid = 1;
@@ -1954,45 +2465,46 @@
 	/* Read GPIO config and VID value from LDN 7 (GPIO) */
 	if (sio_data->type == it87) {
 		/* The IT8705F has a different LD number for GPIO */
-		superio_select(5);
-		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+		superio_select(sioaddr, 5);
+		sio_data->beep_pin = superio_inb(sioaddr,
+						 IT87_SIO_BEEP_PIN_REG) & 0x3f;
 	} else if (sio_data->type == it8783) {
 		int reg25, reg27, reg2a, reg2c, regef;
 
-		superio_select(GPIO);
+		superio_select(sioaddr, GPIO);
 
-		reg25 = superio_inb(IT87_SIO_GPIO1_REG);
-		reg27 = superio_inb(IT87_SIO_GPIO3_REG);
-		reg2a = superio_inb(IT87_SIO_PINX1_REG);
-		reg2c = superio_inb(IT87_SIO_PINX2_REG);
-		regef = superio_inb(IT87_SIO_SPI_REG);
+		reg25 = superio_inb(sioaddr, IT87_SIO_GPIO1_REG);
+		reg27 = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
+		reg2a = superio_inb(sioaddr, IT87_SIO_PINX1_REG);
+		reg2c = superio_inb(sioaddr, IT87_SIO_PINX2_REG);
+		regef = superio_inb(sioaddr, IT87_SIO_SPI_REG);
 
 		/* Check if fan3 is there or not */
-		if ((reg27 & (1 << 0)) || !(reg2c & (1 << 2)))
-			sio_data->skip_fan |= (1 << 2);
-		if ((reg25 & (1 << 4))
-		    || (!(reg2a & (1 << 1)) && (regef & (1 << 0))))
-			sio_data->skip_pwm |= (1 << 2);
+		if ((reg27 & BIT(0)) || !(reg2c & BIT(2)))
+			sio_data->skip_fan |= BIT(2);
+		if ((reg25 & BIT(4)) ||
+		    (!(reg2a & BIT(1)) && (regef & BIT(0))))
+			sio_data->skip_pwm |= BIT(2);
 
 		/* Check if fan2 is there or not */
-		if (reg27 & (1 << 7))
-			sio_data->skip_fan |= (1 << 1);
-		if (reg27 & (1 << 3))
-			sio_data->skip_pwm |= (1 << 1);
+		if (reg27 & BIT(7))
+			sio_data->skip_fan |= BIT(1);
+		if (reg27 & BIT(3))
+			sio_data->skip_pwm |= BIT(1);
 
 		/* VIN5 */
-		if ((reg27 & (1 << 0)) || (reg2c & (1 << 2)))
-			sio_data->skip_in |= (1 << 5); /* No VIN5 */
+		if ((reg27 & BIT(0)) || (reg2c & BIT(2)))
+			sio_data->skip_in |= BIT(5); /* No VIN5 */
 
 		/* VIN6 */
-		if (reg27 & (1 << 1))
-			sio_data->skip_in |= (1 << 6); /* No VIN6 */
+		if (reg27 & BIT(1))
+			sio_data->skip_in |= BIT(6); /* No VIN6 */
 
 		/*
 		 * VIN7
 		 * Does not depend on bit 2 of Reg2C, contrary to datasheet.
 		 */
-		if (reg27 & (1 << 2)) {
+		if (reg27 & BIT(2)) {
 			/*
 			 * The data sheet is a bit unclear regarding the
 			 * internal voltage divider for VCCH5V. It says
@@ -2006,81 +2518,121 @@
 			 * not the case, and ask the user to report if the
 			 * resulting voltage is sane.
 			 */
-			if (!(reg2c & (1 << 1))) {
-				reg2c |= (1 << 1);
-				superio_outb(IT87_SIO_PINX2_REG, reg2c);
+			if (!(reg2c & BIT(1))) {
+				reg2c |= BIT(1);
+				superio_outb(sioaddr, IT87_SIO_PINX2_REG,
+					     reg2c);
 				pr_notice("Routing internal VCCH5V to in7.\n");
 			}
 			pr_notice("in7 routed to internal voltage divider, with external pin disabled.\n");
 			pr_notice("Please report if it displays a reasonable voltage.\n");
 		}
 
-		if (reg2c & (1 << 0))
-			sio_data->internal |= (1 << 0);
-		if (reg2c & (1 << 1))
-			sio_data->internal |= (1 << 1);
+		if (reg2c & BIT(0))
+			sio_data->internal |= BIT(0);
+		if (reg2c & BIT(1))
+			sio_data->internal |= BIT(1);
 
-		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+		sio_data->beep_pin = superio_inb(sioaddr,
+						 IT87_SIO_BEEP_PIN_REG) & 0x3f;
 	} else if (sio_data->type == it8603) {
 		int reg27, reg29;
 
-		superio_select(GPIO);
+		superio_select(sioaddr, GPIO);
 
-		reg27 = superio_inb(IT87_SIO_GPIO3_REG);
+		reg27 = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
 
 		/* Check if fan3 is there or not */
-		if (reg27 & (1 << 6))
-			sio_data->skip_pwm |= (1 << 2);
-		if (reg27 & (1 << 7))
-			sio_data->skip_fan |= (1 << 2);
+		if (reg27 & BIT(6))
+			sio_data->skip_pwm |= BIT(2);
+		if (reg27 & BIT(7))
+			sio_data->skip_fan |= BIT(2);
 
 		/* Check if fan2 is there or not */
-		reg29 = superio_inb(IT87_SIO_GPIO5_REG);
-		if (reg29 & (1 << 1))
-			sio_data->skip_pwm |= (1 << 1);
-		if (reg29 & (1 << 2))
-			sio_data->skip_fan |= (1 << 1);
+		reg29 = superio_inb(sioaddr, IT87_SIO_GPIO5_REG);
+		if (reg29 & BIT(1))
+			sio_data->skip_pwm |= BIT(1);
+		if (reg29 & BIT(2))
+			sio_data->skip_fan |= BIT(1);
 
-		sio_data->skip_in |= (1 << 5); /* No VIN5 */
-		sio_data->skip_in |= (1 << 6); /* No VIN6 */
+		sio_data->skip_in |= BIT(5); /* No VIN5 */
+		sio_data->skip_in |= BIT(6); /* No VIN6 */
 
-		sio_data->internal |= (1 << 3); /* in9 is AVCC */
-
-		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
-	} else if (sio_data->type == it8620) {
+		sio_data->beep_pin = superio_inb(sioaddr,
+						 IT87_SIO_BEEP_PIN_REG) & 0x3f;
+	} else if (sio_data->type == it8620 || sio_data->type == it8628) {
 		int reg;
 
-		superio_select(GPIO);
+		superio_select(sioaddr, GPIO);
+
+		/* Check for pwm5 */
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO1_REG);
+		if (reg & BIT(6))
+			sio_data->skip_pwm |= BIT(4);
 
 		/* Check for fan4, fan5 */
-		reg = superio_inb(IT87_SIO_GPIO2_REG);
-		if (!(reg & (1 << 5)))
-			sio_data->skip_fan |= (1 << 3);
-		if (!(reg & (1 << 4)))
-			sio_data->skip_fan |= (1 << 4);
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO2_REG);
+		if (!(reg & BIT(5)))
+			sio_data->skip_fan |= BIT(3);
+		if (!(reg & BIT(4)))
+			sio_data->skip_fan |= BIT(4);
 
 		/* Check for pwm3, fan3 */
-		reg = superio_inb(IT87_SIO_GPIO3_REG);
-		if (reg & (1 << 6))
-			sio_data->skip_pwm |= (1 << 2);
-		if (reg & (1 << 7))
-			sio_data->skip_fan |= (1 << 2);
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
+		if (reg & BIT(6))
+			sio_data->skip_pwm |= BIT(2);
+		if (reg & BIT(7))
+			sio_data->skip_fan |= BIT(2);
+
+		/* Check for pwm4 */
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO4_REG);
+		if (!(reg & BIT(2)))
+			sio_data->skip_pwm |= BIT(3);
 
 		/* Check for pwm2, fan2 */
-		reg = superio_inb(IT87_SIO_GPIO5_REG);
-		if (reg & (1 << 1))
-			sio_data->skip_pwm |= (1 << 1);
-		if (reg & (1 << 2))
-			sio_data->skip_fan |= (1 << 1);
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO5_REG);
+		if (reg & BIT(1))
+			sio_data->skip_pwm |= BIT(1);
+		if (reg & BIT(2))
+			sio_data->skip_fan |= BIT(1);
+		/* Check for pwm6, fan6 */
+		if (!(reg & BIT(7))) {
+			sio_data->skip_pwm |= BIT(5);
+			sio_data->skip_fan |= BIT(5);
+		}
 
-		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+		sio_data->beep_pin = superio_inb(sioaddr,
+						 IT87_SIO_BEEP_PIN_REG) & 0x3f;
 	} else {
 		int reg;
 		bool uart6;
 
-		superio_select(GPIO);
+		superio_select(sioaddr, GPIO);
 
-		reg = superio_inb(IT87_SIO_GPIO3_REG);
+		/* Check for fan4, fan5 */
+		if (has_five_fans(config)) {
+			reg = superio_inb(sioaddr, IT87_SIO_GPIO2_REG);
+			switch (sio_data->type) {
+			case it8718:
+				if (reg & BIT(5))
+					sio_data->skip_fan |= BIT(3);
+				if (reg & BIT(4))
+					sio_data->skip_fan |= BIT(4);
+				break;
+			case it8720:
+			case it8721:
+			case it8728:
+				if (!(reg & BIT(5)))
+					sio_data->skip_fan |= BIT(3);
+				if (!(reg & BIT(4)))
+					sio_data->skip_fan |= BIT(4);
+				break;
+			default:
+				break;
+			}
+		}
+
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
 		if (!sio_data->skip_vid) {
 			/* We need at least 4 VID pins */
 			if (reg & 0x0f) {
@@ -2090,25 +2642,26 @@
 		}
 
 		/* Check if fan3 is there or not */
-		if (reg & (1 << 6))
-			sio_data->skip_pwm |= (1 << 2);
-		if (reg & (1 << 7))
-			sio_data->skip_fan |= (1 << 2);
+		if (reg & BIT(6))
+			sio_data->skip_pwm |= BIT(2);
+		if (reg & BIT(7))
+			sio_data->skip_fan |= BIT(2);
 
 		/* Check if fan2 is there or not */
-		reg = superio_inb(IT87_SIO_GPIO5_REG);
-		if (reg & (1 << 1))
-			sio_data->skip_pwm |= (1 << 1);
-		if (reg & (1 << 2))
-			sio_data->skip_fan |= (1 << 1);
+		reg = superio_inb(sioaddr, IT87_SIO_GPIO5_REG);
+		if (reg & BIT(1))
+			sio_data->skip_pwm |= BIT(1);
+		if (reg & BIT(2))
+			sio_data->skip_fan |= BIT(1);
 
-		if ((sio_data->type == it8718 || sio_data->type == it8720)
-		 && !(sio_data->skip_vid))
-			sio_data->vid_value = superio_inb(IT87_SIO_VID_REG);
+		if ((sio_data->type == it8718 || sio_data->type == it8720) &&
+		    !(sio_data->skip_vid))
+			sio_data->vid_value = superio_inb(sioaddr,
+							  IT87_SIO_VID_REG);
 
-		reg = superio_inb(IT87_SIO_PINX2_REG);
+		reg = superio_inb(sioaddr, IT87_SIO_PINX2_REG);
 
-		uart6 = sio_data->type == it8782 && (reg & (1 << 2));
+		uart6 = sio_data->type == it8782 && (reg & BIT(2));
 
 		/*
 		 * The IT8720F has no VIN7 pin, so VCCH should always be
@@ -2124,15 +2677,15 @@
 		 * If UART6 is enabled, re-route VIN7 to the internal divider
 		 * if that is not already the case.
 		 */
-		if ((sio_data->type == it8720 || uart6) && !(reg & (1 << 1))) {
-			reg |= (1 << 1);
-			superio_outb(IT87_SIO_PINX2_REG, reg);
+		if ((sio_data->type == it8720 || uart6) && !(reg & BIT(1))) {
+			reg |= BIT(1);
+			superio_outb(sioaddr, IT87_SIO_PINX2_REG, reg);
 			pr_notice("Routing internal VCCH to in7\n");
 		}
-		if (reg & (1 << 0))
-			sio_data->internal |= (1 << 0);
-		if (reg & (1 << 1))
-			sio_data->internal |= (1 << 1);
+		if (reg & BIT(0))
+			sio_data->internal |= BIT(0);
+		if (reg & BIT(1))
+			sio_data->internal |= BIT(1);
 
 		/*
 		 * On IT8782F, UART6 pins overlap with VIN5, VIN6, and VIN7.
@@ -2144,11 +2697,12 @@
 		 * temperature source here, skip_temp is preliminary.
 		 */
 		if (uart6) {
-			sio_data->skip_in |= (1 << 5) | (1 << 6);
-			sio_data->skip_temp |= (1 << 2);
+			sio_data->skip_in |= BIT(5) | BIT(6);
+			sio_data->skip_temp |= BIT(2);
 		}
 
-		sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+		sio_data->beep_pin = superio_inb(sioaddr,
+						 IT87_SIO_BEEP_PIN_REG) & 0x3f;
 	}
 	if (sio_data->beep_pin)
 		pr_info("Beeping is supported\n");
@@ -2157,8 +2711,8 @@
 	board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
 	board_name = dmi_get_system_info(DMI_BOARD_NAME);
 	if (board_vendor && board_name) {
-		if (strcmp(board_vendor, "nVIDIA") == 0
-		 && strcmp(board_name, "FN68PT") == 0) {
+		if (strcmp(board_vendor, "nVIDIA") == 0 &&
+		    strcmp(board_name, "FN68PT") == 0) {
 			/*
 			 * On the Shuttle SN68PT, FAN_CTL2 is apparently not
 			 * connected to a fan, but to something else. One user
@@ -2168,373 +2722,15 @@
 			 * the same board is ever used in other systems.
 			 */
 			pr_info("Disabling pwm2 due to hardware constraints\n");
-			sio_data->skip_pwm = (1 << 1);
+			sio_data->skip_pwm = BIT(1);
 		}
 	}
 
 exit:
-	superio_exit();
+	superio_exit(sioaddr);
 	return err;
 }
 
-static void it87_remove_files(struct device *dev)
-{
-	struct it87_data *data = platform_get_drvdata(pdev);
-	struct it87_sio_data *sio_data = dev_get_platdata(dev);
-	int i;
-
-	sysfs_remove_group(&dev->kobj, &it87_group);
-	for (i = 0; i < 10; i++) {
-		if (sio_data->skip_in & (1 << i))
-			continue;
-		sysfs_remove_group(&dev->kobj, &it87_group_in[i]);
-		if (it87_attributes_in_beep[i])
-			sysfs_remove_file(&dev->kobj,
-					  it87_attributes_in_beep[i]);
-	}
-	for (i = 0; i < 3; i++) {
-		if (!(data->has_temp & (1 << i)))
-			continue;
-		sysfs_remove_group(&dev->kobj, &it87_group_temp[i]);
-		if (has_temp_offset(data))
-			sysfs_remove_file(&dev->kobj,
-					  it87_attributes_temp_offset[i]);
-		if (sio_data->beep_pin)
-			sysfs_remove_file(&dev->kobj,
-					  it87_attributes_temp_beep[i]);
-	}
-	for (i = 0; i < 6; i++) {
-		if (!(data->has_fan & (1 << i)))
-			continue;
-		sysfs_remove_group(&dev->kobj, &it87_group_fan[i]);
-		if (sio_data->beep_pin)
-			sysfs_remove_file(&dev->kobj,
-					  it87_attributes_fan_beep[i]);
-		if (i < 3 && !has_16bit_fans(data))
-			sysfs_remove_file(&dev->kobj,
-					  it87_attributes_fan_div[i]);
-	}
-	for (i = 0; i < 3; i++) {
-		if (sio_data->skip_pwm & (1 << i))
-			continue;
-		sysfs_remove_group(&dev->kobj, &it87_group_pwm[i]);
-		if (has_old_autopwm(data))
-			sysfs_remove_group(&dev->kobj,
-					   &it87_group_autopwm[i]);
-	}
-	if (!sio_data->skip_vid)
-		sysfs_remove_group(&dev->kobj, &it87_group_vid);
-	sysfs_remove_group(&dev->kobj, &it87_group_label);
-}
-
-static int it87_probe(struct platform_device *pdev)
-{
-	struct it87_data *data;
-	struct resource *res;
-	struct device *dev = &pdev->dev;
-	struct it87_sio_data *sio_data = dev_get_platdata(dev);
-	int err = 0, i;
-	int enable_pwm_interface;
-	int fan_beep_need_rw;
-
-	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
-	if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT,
-				 DRVNAME)) {
-		dev_err(dev, "Failed to request region 0x%lx-0x%lx\n",
-			(unsigned long)res->start,
-			(unsigned long)(res->start + IT87_EC_EXTENT - 1));
-		return -EBUSY;
-	}
-
-	data = devm_kzalloc(&pdev->dev, sizeof(struct it87_data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->addr = res->start;
-	data->type = sio_data->type;
-	data->features = it87_devices[sio_data->type].features;
-	data->peci_mask = it87_devices[sio_data->type].peci_mask;
-	data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask;
-	data->name = it87_devices[sio_data->type].name;
-	/*
-	 * IT8705F Datasheet 0.4.1, 3h == Version G.
-	 * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J.
-	 * These are the first revisions with 16-bit tachometer support.
-	 */
-	switch (data->type) {
-	case it87:
-		if (sio_data->revision >= 0x03) {
-			data->features &= ~FEAT_OLD_AUTOPWM;
-			data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS;
-		}
-		break;
-	case it8712:
-		if (sio_data->revision >= 0x08) {
-			data->features &= ~FEAT_OLD_AUTOPWM;
-			data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS |
-					  FEAT_FIVE_FANS;
-		}
-		break;
-	default:
-		break;
-	}
-
-	/* Now, we do the remaining detection. */
-	if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80)
-	 || it87_read_value(data, IT87_REG_CHIPID) != 0x90)
-		return -ENODEV;
-
-	platform_set_drvdata(pdev, data);
-
-	mutex_init(&data->update_lock);
-
-	/* Check PWM configuration */
-	enable_pwm_interface = it87_check_pwm(dev);
-
-	/* Starting with IT8721F, we handle scaling of internal voltages */
-	if (has_12mv_adc(data)) {
-		if (sio_data->internal & (1 << 0))
-			data->in_scaled |= (1 << 3);	/* in3 is AVCC */
-		if (sio_data->internal & (1 << 1))
-			data->in_scaled |= (1 << 7);	/* in7 is VSB */
-		if (sio_data->internal & (1 << 2))
-			data->in_scaled |= (1 << 8);	/* in8 is Vbat */
-		if (sio_data->internal & (1 << 3))
-			data->in_scaled |= (1 << 9);	/* in9 is AVCC */
-	} else if (sio_data->type == it8781 || sio_data->type == it8782 ||
-		   sio_data->type == it8783) {
-		if (sio_data->internal & (1 << 0))
-			data->in_scaled |= (1 << 3);	/* in3 is VCC5V */
-		if (sio_data->internal & (1 << 1))
-			data->in_scaled |= (1 << 7);	/* in7 is VCCH5V */
-	}
-
-	data->has_temp = 0x07;
-	if (sio_data->skip_temp & (1 << 2)) {
-		if (sio_data->type == it8782
-		    && !(it87_read_value(data, IT87_REG_TEMP_EXTRA) & 0x80))
-			data->has_temp &= ~(1 << 2);
-	}
-
-	/* Initialize the IT87 chip */
-	it87_init_device(pdev);
-
-	/* Register sysfs hooks */
-	err = sysfs_create_group(&dev->kobj, &it87_group);
-	if (err)
-		return err;
-
-	for (i = 0; i < 10; i++) {
-		if (sio_data->skip_in & (1 << i))
-			continue;
-		err = sysfs_create_group(&dev->kobj, &it87_group_in[i]);
-		if (err)
-			goto error;
-		if (sio_data->beep_pin && it87_attributes_in_beep[i]) {
-			err = sysfs_create_file(&dev->kobj,
-						it87_attributes_in_beep[i]);
-			if (err)
-				goto error;
-		}
-	}
-
-	for (i = 0; i < 3; i++) {
-		if (!(data->has_temp & (1 << i)))
-			continue;
-		err = sysfs_create_group(&dev->kobj, &it87_group_temp[i]);
-		if (err)
-			goto error;
-		if (has_temp_offset(data)) {
-			err = sysfs_create_file(&dev->kobj,
-						it87_attributes_temp_offset[i]);
-			if (err)
-				goto error;
-		}
-		if (sio_data->beep_pin) {
-			err = sysfs_create_file(&dev->kobj,
-						it87_attributes_temp_beep[i]);
-			if (err)
-				goto error;
-		}
-	}
-
-	/* Do not create fan files for disabled fans */
-	fan_beep_need_rw = 1;
-	for (i = 0; i < 6; i++) {
-		if (!(data->has_fan & (1 << i)))
-			continue;
-		err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]);
-		if (err)
-			goto error;
-
-		if (i < 3 && !has_16bit_fans(data)) {
-			err = sysfs_create_file(&dev->kobj,
-						it87_attributes_fan_div[i]);
-			if (err)
-				goto error;
-		}
-
-		if (sio_data->beep_pin) {
-			err = sysfs_create_file(&dev->kobj,
-						it87_attributes_fan_beep[i]);
-			if (err)
-				goto error;
-			if (!fan_beep_need_rw)
-				continue;
-
-			/*
-			 * As we have a single beep enable bit for all fans,
-			 * only the first enabled fan has a writable attribute
-			 * for it.
-			 */
-			if (sysfs_chmod_file(&dev->kobj,
-					     it87_attributes_fan_beep[i],
-					     S_IRUGO | S_IWUSR))
-				dev_dbg(dev, "chmod +w fan%d_beep failed\n",
-					i + 1);
-			fan_beep_need_rw = 0;
-		}
-	}
-
-	if (enable_pwm_interface) {
-		for (i = 0; i < 3; i++) {
-			if (sio_data->skip_pwm & (1 << i))
-				continue;
-			err = sysfs_create_group(&dev->kobj,
-						 &it87_group_pwm[i]);
-			if (err)
-				goto error;
-
-			if (!has_old_autopwm(data))
-				continue;
-			err = sysfs_create_group(&dev->kobj,
-						 &it87_group_autopwm[i]);
-			if (err)
-				goto error;
-		}
-	}
-
-	if (!sio_data->skip_vid) {
-		data->vrm = vid_which_vrm();
-		/* VID reading from Super-I/O config space if available */
-		data->vid = sio_data->vid_value;
-		err = sysfs_create_group(&dev->kobj, &it87_group_vid);
-		if (err)
-			goto error;
-	}
-
-	/* Export labels for internal sensors */
-	for (i = 0; i < 4; i++) {
-		if (!(sio_data->internal & (1 << i)))
-			continue;
-		err = sysfs_create_file(&dev->kobj,
-					it87_attributes_label[i]);
-		if (err)
-			goto error;
-	}
-
-	data->hwmon_dev = hwmon_device_register(dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		err = PTR_ERR(data->hwmon_dev);
-		goto error;
-	}
-
-	return 0;
-
-error:
-	it87_remove_files(dev);
-	return err;
-}
-
-static int it87_remove(struct platform_device *pdev)
-{
-	struct it87_data *data = platform_get_drvdata(pdev);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	it87_remove_files(&pdev->dev);
-
-	return 0;
-}
-
-/*
- * Must be called with data->update_lock held, except during initialization.
- * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
- * would slow down the IT87 access and should not be necessary.
- */
-static int it87_read_value(struct it87_data *data, u8 reg)
-{
-	outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
-	return inb_p(data->addr + IT87_DATA_REG_OFFSET);
-}
-
-/*
- * Must be called with data->update_lock held, except during initialization.
- * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
- * would slow down the IT87 access and should not be necessary.
- */
-static void it87_write_value(struct it87_data *data, u8 reg, u8 value)
-{
-	outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
-	outb_p(value, data->addr + IT87_DATA_REG_OFFSET);
-}
-
-/* Return 1 if and only if the PWM interface is safe to use */
-static int it87_check_pwm(struct device *dev)
-{
-	struct it87_data *data = dev_get_drvdata(dev);
-	/*
-	 * Some BIOSes fail to correctly configure the IT87 fans. All fans off
-	 * and polarity set to active low is sign that this is the case so we
-	 * disable pwm control to protect the user.
-	 */
-	int tmp = it87_read_value(data, IT87_REG_FAN_CTL);
-	if ((tmp & 0x87) == 0) {
-		if (fix_pwm_polarity) {
-			/*
-			 * The user asks us to attempt a chip reconfiguration.
-			 * This means switching to active high polarity and
-			 * inverting all fan speed values.
-			 */
-			int i;
-			u8 pwm[3];
-
-			for (i = 0; i < 3; i++)
-				pwm[i] = it87_read_value(data,
-							 IT87_REG_PWM(i));
-
-			/*
-			 * If any fan is in automatic pwm mode, the polarity
-			 * might be correct, as suspicious as it seems, so we
-			 * better don't change anything (but still disable the
-			 * PWM interface).
-			 */
-			if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) {
-				dev_info(dev,
-					 "Reconfiguring PWM to active high polarity\n");
-				it87_write_value(data, IT87_REG_FAN_CTL,
-						 tmp | 0x87);
-				for (i = 0; i < 3; i++)
-					it87_write_value(data,
-							 IT87_REG_PWM(i),
-							 0x7f & ~pwm[i]);
-				return 1;
-			}
-
-			dev_info(dev,
-				 "PWM configuration is too broken to be fixed\n");
-		}
-
-		dev_info(dev,
-			 "Detected broken BIOS defaults, disabling PWM interface\n");
-		return 0;
-	} else if (fix_pwm_polarity) {
-		dev_info(dev,
-			 "PWM configuration looks sane, won't touch\n");
-	}
-
-	return 1;
-}
-
 /* Called when we have found a new IT87. */
 static void it87_init_device(struct platform_device *pdev)
 {
@@ -2556,7 +2752,7 @@
 	 * these have separate registers for the temperature mapping and the
 	 * manual duty cycle.
 	 */
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < NUM_AUTO_PWM; i++) {
 		data->pwm_temp_map[i] = i;
 		data->pwm_duty[i] = 0x7f;	/* Full speed */
 		data->auto_pwm[i][3] = 0x7f;	/* Full speed, hard-coded */
@@ -2569,12 +2765,12 @@
 	 * means -1 degree C, which surprisingly doesn't trigger an alarm,
 	 * but is still confusing, so change to 127 degrees C.
 	 */
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < NUM_VIN_LIMIT; i++) {
 		tmp = it87_read_value(data, IT87_REG_VIN_MIN(i));
 		if (tmp == 0xff)
 			it87_write_value(data, IT87_REG_VIN_MIN(i), 0);
 	}
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < NUM_TEMP_LIMIT; i++) {
 		tmp = it87_read_value(data, IT87_REG_TEMP_HIGH(i));
 		if (tmp == 0xff)
 			it87_write_value(data, IT87_REG_TEMP_HIGH(i), 127);
@@ -2619,158 +2815,245 @@
 
 	/* Check for additional fans */
 	if (has_five_fans(data)) {
-		if (tmp & (1 << 4))
-			data->has_fan |= (1 << 3); /* fan4 enabled */
-		if (tmp & (1 << 5))
-			data->has_fan |= (1 << 4); /* fan5 enabled */
-		if (has_six_fans(data) && (tmp & (1 << 2)))
-			data->has_fan |= (1 << 5); /* fan6 enabled */
+		if (tmp & BIT(4))
+			data->has_fan |= BIT(3); /* fan4 enabled */
+		if (tmp & BIT(5))
+			data->has_fan |= BIT(4); /* fan5 enabled */
+		if (has_six_fans(data) && (tmp & BIT(2)))
+			data->has_fan |= BIT(5); /* fan6 enabled */
 	}
 
 	/* Fan input pins may be used for alternative functions */
 	data->has_fan &= ~sio_data->skip_fan;
 
+	/* Check if pwm5, pwm6 are enabled */
+	if (has_six_pwm(data)) {
+		/* The following code may be IT8620E specific */
+		tmp = it87_read_value(data, IT87_REG_FAN_DIV);
+		if ((tmp & 0xc0) == 0xc0)
+			sio_data->skip_pwm |= BIT(4);
+		if (!(tmp & BIT(3)))
+			sio_data->skip_pwm |= BIT(5);
+	}
+
 	/* Start monitoring */
 	it87_write_value(data, IT87_REG_CONFIG,
 			 (it87_read_value(data, IT87_REG_CONFIG) & 0x3e)
 			 | (update_vbat ? 0x41 : 0x01));
 }
 
-static void it87_update_pwm_ctrl(struct it87_data *data, int nr)
-{
-	data->pwm_ctrl[nr] = it87_read_value(data, IT87_REG_PWM(nr));
-	if (has_newer_autopwm(data)) {
-		data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
-		data->pwm_duty[nr] = it87_read_value(data,
-						     IT87_REG_PWM_DUTY(nr));
-	} else {
-		if (data->pwm_ctrl[nr] & 0x80)	/* Automatic mode */
-			data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
-		else				/* Manual mode */
-			data->pwm_duty[nr] = data->pwm_ctrl[nr] & 0x7f;
-	}
-
-	if (has_old_autopwm(data)) {
-		int i;
-
-		for (i = 0; i < 5 ; i++)
-			data->auto_temp[nr][i] = it87_read_value(data,
-						IT87_REG_AUTO_TEMP(nr, i));
-		for (i = 0; i < 3 ; i++)
-			data->auto_pwm[nr][i] = it87_read_value(data,
-						IT87_REG_AUTO_PWM(nr, i));
-	}
-}
-
-static struct it87_data *it87_update_device(struct device *dev)
+/* Return 1 if and only if the PWM interface is safe to use */
+static int it87_check_pwm(struct device *dev)
 {
 	struct it87_data *data = dev_get_drvdata(dev);
-	int i;
+	/*
+	 * Some BIOSes fail to correctly configure the IT87 fans. All fans off
+	 * and polarity set to active low is sign that this is the case so we
+	 * disable pwm control to protect the user.
+	 */
+	int tmp = it87_read_value(data, IT87_REG_FAN_CTL);
 
-	mutex_lock(&data->update_lock);
-
-	if (time_after(jiffies, data->last_updated + HZ + HZ / 2)
-	    || !data->valid) {
-		if (update_vbat) {
+	if ((tmp & 0x87) == 0) {
+		if (fix_pwm_polarity) {
 			/*
-			 * Cleared after each update, so reenable.  Value
-			 * returned by this read will be previous value
+			 * The user asks us to attempt a chip reconfiguration.
+			 * This means switching to active high polarity and
+			 * inverting all fan speed values.
 			 */
-			it87_write_value(data, IT87_REG_CONFIG,
-				it87_read_value(data, IT87_REG_CONFIG) | 0x40);
-		}
-		for (i = 0; i <= 7; i++) {
-			data->in[i][0] =
-				it87_read_value(data, IT87_REG_VIN(i));
-			data->in[i][1] =
-				it87_read_value(data, IT87_REG_VIN_MIN(i));
-			data->in[i][2] =
-				it87_read_value(data, IT87_REG_VIN_MAX(i));
-		}
-		/* in8 (battery) has no limit registers */
-		data->in[8][0] = it87_read_value(data, IT87_REG_VIN(8));
-		if (data->type == it8603)
-			data->in[9][0] = it87_read_value(data, 0x2f);
+			int i;
+			u8 pwm[3];
 
-		for (i = 0; i < 6; i++) {
-			/* Skip disabled fans */
-			if (!(data->has_fan & (1 << i)))
-				continue;
+			for (i = 0; i < ARRAY_SIZE(pwm); i++)
+				pwm[i] = it87_read_value(data,
+							 IT87_REG_PWM[i]);
 
-			data->fan[i][1] =
-				it87_read_value(data, IT87_REG_FAN_MIN[i]);
-			data->fan[i][0] = it87_read_value(data,
-				       IT87_REG_FAN[i]);
-			/* Add high byte if in 16-bit mode */
-			if (has_16bit_fans(data)) {
-				data->fan[i][0] |= it87_read_value(data,
-						IT87_REG_FANX[i]) << 8;
-				data->fan[i][1] |= it87_read_value(data,
-						IT87_REG_FANX_MIN[i]) << 8;
+			/*
+			 * If any fan is in automatic pwm mode, the polarity
+			 * might be correct, as suspicious as it seems, so we
+			 * better don't change anything (but still disable the
+			 * PWM interface).
+			 */
+			if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) {
+				dev_info(dev,
+					 "Reconfiguring PWM to active high polarity\n");
+				it87_write_value(data, IT87_REG_FAN_CTL,
+						 tmp | 0x87);
+				for (i = 0; i < 3; i++)
+					it87_write_value(data,
+							 IT87_REG_PWM[i],
+							 0x7f & ~pwm[i]);
+				return 1;
 			}
-		}
-		for (i = 0; i < 3; i++) {
-			if (!(data->has_temp & (1 << i)))
-				continue;
-			data->temp[i][0] =
-				it87_read_value(data, IT87_REG_TEMP(i));
-			data->temp[i][1] =
-				it87_read_value(data, IT87_REG_TEMP_LOW(i));
-			data->temp[i][2] =
-				it87_read_value(data, IT87_REG_TEMP_HIGH(i));
-			if (has_temp_offset(data))
-				data->temp[i][3] =
-				  it87_read_value(data,
-						  IT87_REG_TEMP_OFFSET[i]);
+
+			dev_info(dev,
+				 "PWM configuration is too broken to be fixed\n");
 		}
 
-		/* Newer chips don't have clock dividers */
-		if ((data->has_fan & 0x07) && !has_16bit_fans(data)) {
-			i = it87_read_value(data, IT87_REG_FAN_DIV);
-			data->fan_div[0] = i & 0x07;
-			data->fan_div[1] = (i >> 3) & 0x07;
-			data->fan_div[2] = (i & 0x40) ? 3 : 1;
-		}
-
-		data->alarms =
-			it87_read_value(data, IT87_REG_ALARM1) |
-			(it87_read_value(data, IT87_REG_ALARM2) << 8) |
-			(it87_read_value(data, IT87_REG_ALARM3) << 16);
-		data->beeps = it87_read_value(data, IT87_REG_BEEP_ENABLE);
-
-		data->fan_main_ctrl = it87_read_value(data,
-				IT87_REG_FAN_MAIN_CTRL);
-		data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL);
-		for (i = 0; i < 3; i++)
-			it87_update_pwm_ctrl(data, i);
-
-		data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE);
-		data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
-		/*
-		 * The IT8705F does not have VID capability.
-		 * The IT8718F and later don't use IT87_REG_VID for the
-		 * same purpose.
-		 */
-		if (data->type == it8712 || data->type == it8716) {
-			data->vid = it87_read_value(data, IT87_REG_VID);
-			/*
-			 * The older IT8712F revisions had only 5 VID pins,
-			 * but we assume it is always safe to read 6 bits.
-			 */
-			data->vid &= 0x3f;
-		}
-		data->last_updated = jiffies;
-		data->valid = 1;
+		dev_info(dev,
+			 "Detected broken BIOS defaults, disabling PWM interface\n");
+		return 0;
+	} else if (fix_pwm_polarity) {
+		dev_info(dev,
+			 "PWM configuration looks sane, won't touch\n");
 	}
 
-	mutex_unlock(&data->update_lock);
-
-	return data;
+	return 1;
 }
 
-static int __init it87_device_add(unsigned short address,
+static int it87_probe(struct platform_device *pdev)
+{
+	struct it87_data *data;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct it87_sio_data *sio_data = dev_get_platdata(dev);
+	int enable_pwm_interface;
+	struct device *hwmon_dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT,
+				 DRVNAME)) {
+		dev_err(dev, "Failed to request region 0x%lx-0x%lx\n",
+			(unsigned long)res->start,
+			(unsigned long)(res->start + IT87_EC_EXTENT - 1));
+		return -EBUSY;
+	}
+
+	data = devm_kzalloc(&pdev->dev, sizeof(struct it87_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->addr = res->start;
+	data->type = sio_data->type;
+	data->features = it87_devices[sio_data->type].features;
+	data->peci_mask = it87_devices[sio_data->type].peci_mask;
+	data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask;
+	/*
+	 * IT8705F Datasheet 0.4.1, 3h == Version G.
+	 * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J.
+	 * These are the first revisions with 16-bit tachometer support.
+	 */
+	switch (data->type) {
+	case it87:
+		if (sio_data->revision >= 0x03) {
+			data->features &= ~FEAT_OLD_AUTOPWM;
+			data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS;
+		}
+		break;
+	case it8712:
+		if (sio_data->revision >= 0x08) {
+			data->features &= ~FEAT_OLD_AUTOPWM;
+			data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS |
+					  FEAT_FIVE_FANS;
+		}
+		break;
+	default:
+		break;
+	}
+
+	/* Now, we do the remaining detection. */
+	if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80) ||
+	    it87_read_value(data, IT87_REG_CHIPID) != 0x90)
+		return -ENODEV;
+
+	platform_set_drvdata(pdev, data);
+
+	mutex_init(&data->update_lock);
+
+	/* Check PWM configuration */
+	enable_pwm_interface = it87_check_pwm(dev);
+
+	/* Starting with IT8721F, we handle scaling of internal voltages */
+	if (has_12mv_adc(data)) {
+		if (sio_data->internal & BIT(0))
+			data->in_scaled |= BIT(3);	/* in3 is AVCC */
+		if (sio_data->internal & BIT(1))
+			data->in_scaled |= BIT(7);	/* in7 is VSB */
+		if (sio_data->internal & BIT(2))
+			data->in_scaled |= BIT(8);	/* in8 is Vbat */
+		if (sio_data->internal & BIT(3))
+			data->in_scaled |= BIT(9);	/* in9 is AVCC */
+	} else if (sio_data->type == it8781 || sio_data->type == it8782 ||
+		   sio_data->type == it8783) {
+		if (sio_data->internal & BIT(0))
+			data->in_scaled |= BIT(3);	/* in3 is VCC5V */
+		if (sio_data->internal & BIT(1))
+			data->in_scaled |= BIT(7);	/* in7 is VCCH5V */
+	}
+
+	data->has_temp = 0x07;
+	if (sio_data->skip_temp & BIT(2)) {
+		if (sio_data->type == it8782 &&
+		    !(it87_read_value(data, IT87_REG_TEMP_EXTRA) & 0x80))
+			data->has_temp &= ~BIT(2);
+	}
+
+	data->in_internal = sio_data->internal;
+	data->has_in = 0x3ff & ~sio_data->skip_in;
+
+	if (has_six_temp(data)) {
+		u8 reg = it87_read_value(data, IT87_REG_TEMP456_ENABLE);
+
+		/* Check for additional temperature sensors */
+		if ((reg & 0x03) >= 0x02)
+			data->has_temp |= BIT(3);
+		if (((reg >> 2) & 0x03) >= 0x02)
+			data->has_temp |= BIT(4);
+		if (((reg >> 4) & 0x03) >= 0x02)
+			data->has_temp |= BIT(5);
+
+		/* Check for additional voltage sensors */
+		if ((reg & 0x03) == 0x01)
+			data->has_in |= BIT(10);
+		if (((reg >> 2) & 0x03) == 0x01)
+			data->has_in |= BIT(11);
+		if (((reg >> 4) & 0x03) == 0x01)
+			data->has_in |= BIT(12);
+	}
+
+	data->has_beep = !!sio_data->beep_pin;
+
+	/* Initialize the IT87 chip */
+	it87_init_device(pdev);
+
+	if (!sio_data->skip_vid) {
+		data->has_vid = true;
+		data->vrm = vid_which_vrm();
+		/* VID reading from Super-I/O config space if available */
+		data->vid = sio_data->vid_value;
+	}
+
+	/* Prepare for sysfs hooks */
+	data->groups[0] = &it87_group;
+	data->groups[1] = &it87_group_in;
+	data->groups[2] = &it87_group_temp;
+	data->groups[3] = &it87_group_fan;
+
+	if (enable_pwm_interface) {
+		data->has_pwm = BIT(ARRAY_SIZE(IT87_REG_PWM)) - 1;
+		data->has_pwm &= ~sio_data->skip_pwm;
+
+		data->groups[4] = &it87_group_pwm;
+		if (has_old_autopwm(data) || has_newer_autopwm(data))
+			data->groups[5] = &it87_group_auto_pwm;
+	}
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+					it87_devices[sio_data->type].name,
+					data, data->groups);
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static struct platform_driver it87_driver = {
+	.driver = {
+		.name	= DRVNAME,
+	},
+	.probe	= it87_probe,
+};
+
+static int __init it87_device_add(int index, unsigned short address,
 				  const struct it87_sio_data *sio_data)
 {
+	struct platform_device *pdev;
 	struct resource res = {
 		.start	= address + IT87_EC_OFFSET,
 		.end	= address + IT87_EC_OFFSET + IT87_EC_EXTENT - 1,
@@ -2781,14 +3064,11 @@
 
 	err = acpi_check_resource_conflict(&res);
 	if (err)
-		goto exit;
+		return err;
 
 	pdev = platform_device_alloc(DRVNAME, address);
-	if (!pdev) {
-		err = -ENOMEM;
-		pr_err("Device allocation failed\n");
-		goto exit;
-	}
+	if (!pdev)
+		return -ENOMEM;
 
 	err = platform_device_add_resources(pdev, &res, 1);
 	if (err) {
@@ -2809,44 +3089,61 @@
 		goto exit_device_put;
 	}
 
+	it87_pdev[index] = pdev;
 	return 0;
 
 exit_device_put:
 	platform_device_put(pdev);
-exit:
 	return err;
 }
 
 static int __init sm_it87_init(void)
 {
-	int err;
-	unsigned short isa_address = 0;
+	int sioaddr[2] = { REG_2E, REG_4E };
 	struct it87_sio_data sio_data;
+	unsigned short isa_address;
+	bool found = false;
+	int i, err;
 
-	memset(&sio_data, 0, sizeof(struct it87_sio_data));
-	err = it87_find(&isa_address, &sio_data);
-	if (err)
-		return err;
 	err = platform_driver_register(&it87_driver);
 	if (err)
 		return err;
 
-	err = it87_device_add(isa_address, &sio_data);
-	if (err) {
-		platform_driver_unregister(&it87_driver);
-		return err;
+	for (i = 0; i < ARRAY_SIZE(sioaddr); i++) {
+		memset(&sio_data, 0, sizeof(struct it87_sio_data));
+		isa_address = 0;
+		err = it87_find(sioaddr[i], &isa_address, &sio_data);
+		if (err || isa_address == 0)
+			continue;
+
+		err = it87_device_add(i, isa_address, &sio_data);
+		if (err)
+			goto exit_dev_unregister;
+		found = true;
 	}
 
+	if (!found) {
+		err = -ENODEV;
+		goto exit_unregister;
+	}
 	return 0;
+
+exit_dev_unregister:
+	/* NULL check handled by platform_device_unregister */
+	platform_device_unregister(it87_pdev[0]);
+exit_unregister:
+	platform_driver_unregister(&it87_driver);
+	return err;
 }
 
 static void __exit sm_it87_exit(void)
 {
-	platform_device_unregister(pdev);
+	/* NULL check handled by platform_device_unregister */
+	platform_device_unregister(it87_pdev[1]);
+	platform_device_unregister(it87_pdev[0]);
 	platform_driver_unregister(&it87_driver);
 }
 
-
 MODULE_AUTHOR("Chris Gauthron, Jean Delvare <jdelvare@suse.de>");
 MODULE_DESCRIPTION("IT8705F/IT871xF/IT872xF hardware monitoring driver");
 module_param(update_vbat, bool, 0);

diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c
new file mode 100644
index 0000000..30a100e
--- /dev/null
+++ b/drivers/hwmon/max31722.c

@@ -0,0 +1,165 @@
+/*
+ * max31722 - hwmon driver for Maxim Integrated MAX31722/MAX31723 SPI
+ * digital thermometer and thermostats.
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License. See the file COPYING in the main
+ * directory of this archive for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+
+#define MAX31722_REG_CFG				0x00
+#define MAX31722_REG_TEMP_LSB				0x01
+
+#define MAX31722_MODE_CONTINUOUS			0x00
+#define MAX31722_MODE_STANDBY				0x01
+#define MAX31722_MODE_MASK				0xFE
+#define MAX31722_RESOLUTION_12BIT			0x06
+#define MAX31722_WRITE_MASK				0x80
+
+struct max31722_data {
+	struct device *hwmon_dev;
+	struct spi_device *spi_device;
+	u8 mode;
+};
+
+static int max31722_set_mode(struct max31722_data *data, u8 mode)
+{
+	int ret;
+	struct spi_device *spi = data->spi_device;
+	u8 buf[2] = {
+		MAX31722_REG_CFG | MAX31722_WRITE_MASK,
+		(data->mode & MAX31722_MODE_MASK) | mode
+	};
+
+	ret = spi_write(spi, &buf, sizeof(buf));
+	if (ret < 0) {
+		dev_err(&spi->dev, "failed to set sensor mode.\n");
+		return ret;
+	}
+	data->mode = (data->mode & MAX31722_MODE_MASK) | mode;
+
+	return 0;
+}
+
+static ssize_t max31722_show_temp(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	ssize_t ret;
+	struct max31722_data *data = dev_get_drvdata(dev);
+
+	ret = spi_w8r16(data->spi_device, MAX31722_REG_TEMP_LSB);
+	if (ret < 0)
+		return ret;
+	/* Keep 12 bits and multiply by the scale of 62.5 millidegrees/bit. */
+	return sprintf(buf, "%d\n", (s16)le16_to_cpu(ret) * 125 / 32);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+			  max31722_show_temp, NULL, 0);
+
+static struct attribute *max31722_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(max31722);
+
+static int max31722_probe(struct spi_device *spi)
+{
+	int ret;
+	struct max31722_data *data;
+
+	data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, data);
+	data->spi_device = spi;
+	/*
+	 * Set SD bit to 0 so we can have continuous measurements.
+	 * Set resolution to 12 bits for maximum precision.
+	 */
+	data->mode = MAX31722_MODE_CONTINUOUS | MAX31722_RESOLUTION_12BIT;
+	ret = max31722_set_mode(data, MAX31722_MODE_CONTINUOUS);
+	if (ret < 0)
+		return ret;
+
+	data->hwmon_dev = hwmon_device_register_with_groups(&spi->dev,
+							    spi->modalias,
+							    data,
+							    max31722_groups);
+	if (IS_ERR(data->hwmon_dev)) {
+		max31722_set_mode(data, MAX31722_MODE_STANDBY);
+		return PTR_ERR(data->hwmon_dev);
+	}
+
+	return 0;
+}
+
+static int max31722_remove(struct spi_device *spi)
+{
+	struct max31722_data *data = spi_get_drvdata(spi);
+
+	hwmon_device_unregister(data->hwmon_dev);
+
+	return max31722_set_mode(data, MAX31722_MODE_STANDBY);
+}
+
+static int __maybe_unused max31722_suspend(struct device *dev)
+{
+	struct spi_device *spi_device = to_spi_device(dev);
+	struct max31722_data *data = spi_get_drvdata(spi_device);
+
+	return max31722_set_mode(data, MAX31722_MODE_STANDBY);
+}
+
+static int __maybe_unused max31722_resume(struct device *dev)
+{
+	struct spi_device *spi_device = to_spi_device(dev);
+	struct max31722_data *data = spi_get_drvdata(spi_device);
+
+	return max31722_set_mode(data, MAX31722_MODE_CONTINUOUS);
+}
+
+static SIMPLE_DEV_PM_OPS(max31722_pm_ops, max31722_suspend, max31722_resume);
+
+static const struct spi_device_id max31722_spi_id[] = {
+	{"max31722", 0},
+	{"max31723", 0},
+	{}
+};
+
+static const struct acpi_device_id __maybe_unused max31722_acpi_id[] = {
+	{"MAX31722", 0},
+	{"MAX31723", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(spi, max31722_spi_id);
+
+static struct spi_driver max31722_driver = {
+	.driver = {
+		.name = "max31722",
+		.pm = &max31722_pm_ops,
+		.acpi_match_table = ACPI_PTR(max31722_acpi_id),
+	},
+	.probe =            max31722_probe,
+	.remove =           max31722_remove,
+	.id_table =         max31722_spi_id,
+};
+
+module_spi_driver(max31722_driver);
+
+MODULE_AUTHOR("Tiberiu Breana <tiberiu.a.breana@intel.com>");
+MODULE_DESCRIPTION("max31722 sensor driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c
index 131a281..d24d7b6 100644
--- a/drivers/hwmon/sch5636.c
+++ b/drivers/hwmon/sch5636.c

@@ -449,7 +449,7 @@
 		}
 		revision[i] = val;
 	}
-	pr_info("Found %s chip at %#hx, revison: %d.%02d\n", DEVNAME,
+	pr_info("Found %s chip at %#hx, revision: %d.%02d\n", DEVNAME,
 		data->addr, revision[0], revision[1]);
 
 	/* Read all temp + fan ctrl registers to determine which are active */

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c6935de..c966492 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c

@@ -766,6 +766,67 @@
 		.enter = NULL }
 };
 
+static struct cpuidle_state bxt_cstates[] = {
+	{
+		.name = "C1-BXT",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C1E-BXT",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01),
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C6-BXT",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 133,
+		.target_residency = 133,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C7s-BXT",
+		.desc = "MWAIT 0x31",
+		.flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 155,
+		.target_residency = 155,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C8-BXT",
+		.desc = "MWAIT 0x40",
+		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 1000,
+		.target_residency = 1000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C9-BXT",
+		.desc = "MWAIT 0x50",
+		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 2000,
+		.target_residency = 2000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C10-BXT",
+		.desc = "MWAIT 0x60",
+		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 10000,
+		.target_residency = 10000,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.enter = NULL }
+};
+
 /**
  * intel_idle
  * @dev: cpuidle_device
@@ -950,6 +1011,11 @@
 	.state_table = knl_cstates,
 };
 
+static const struct idle_cpu idle_cpu_bxt = {
+	.state_table = bxt_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -985,6 +1051,7 @@
 	ICPU(0x9e, idle_cpu_skl),
 	ICPU(0x55, idle_cpu_skx),
 	ICPU(0x57, idle_cpu_knl),
+	ICPU(0x5c, idle_cpu_bxt),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -1075,6 +1142,73 @@
 
 	/* else, 1 and 2 socket systems use default ivt_cstates */
 }
+
+/*
+ * Translate IRTL (Interrupt Response Time Limit) MSR to usec
+ */
+
+static unsigned int irtl_ns_units[] = {
+	1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
+
+static unsigned long long irtl_2_usec(unsigned long long irtl)
+{
+	unsigned long long ns;
+
+	ns = irtl_ns_units[(irtl >> 10) & 0x3];
+
+	return div64_u64((irtl & 0x3FF) * ns, 1000);
+}
+/*
+ * bxt_idle_state_table_update(void)
+ *
+ * On BXT, we trust the IRTL to show the definitive maximum latency
+ * We use the same value for target_residency.
+ */
+static void bxt_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	rdmsrl(MSR_PKGC6_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[2].exit_latency = usec;
+		bxt_cstates[2].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC7_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[3].exit_latency = usec;
+		bxt_cstates[3].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC8_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[4].exit_latency = usec;
+		bxt_cstates[4].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC9_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[5].exit_latency = usec;
+		bxt_cstates[5].target_residency = usec;
+	}
+
+	rdmsrl(MSR_PKGC10_IRTL, msr);
+	if (msr) {
+		unsigned int usec = irtl_2_usec(msr);
+
+		bxt_cstates[6].exit_latency = usec;
+		bxt_cstates[6].target_residency = usec;
+	}
+
+}
 /*
  * sklh_idle_state_table_update(void)
  *
@@ -1130,6 +1264,9 @@
 	case 0x3e: /* IVT */
 		ivt_idle_state_table_update();
 		break;
+	case 0x5c: /* BXT */
+		bxt_idle_state_table_update();
+		break;
 	case 0x5e: /* SKL-H */
 		sklh_idle_state_table_update();
 		break;

diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 6f8b084..3d8ff09 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c

@@ -143,9 +143,9 @@
 
 #include <linux/i8253.h>
 
-#define GET_TIME(x)	do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
-#define DELTA(x,y)	(cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
-#define TIME_NAME	(cpu_has_tsc?"TSC":"PIT")
+#define GET_TIME(x)	do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
+#define DELTA(x,y)	(boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
+#define TIME_NAME	(boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT")
 static unsigned int get_time_pit(void)
 {
         unsigned long flags;

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 8adaaea..49721b4 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c

@@ -36,7 +36,7 @@
 	 * As this gets called during crash dump, keep this simple for
 	 * now.
 	 */
-	if (cpu_has_apic || apic_from_smp_config())
+	if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
 		disconnect_bsp_APIC(0);
 }
 

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 282344b..095bb5b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c

@@ -55,7 +55,7 @@
 
 static void gic_check_cpu_features(void)
 {
-	WARN_TAINT_ONCE(cpus_have_cap(ARM64_HAS_SYSREG_GIC_CPUIF),
+	WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_SYSREG_GIC_CPUIF),
 			TAINT_CPU_OUT_OF_SPEC,
 			"GICv3 system registers enabled, broken firmware!\n");
 }
@@ -490,6 +490,7 @@
 		 * Get what the GIC says our CPU mask is.
 		 */
 		BUG_ON(cpu >= NR_GIC_CPU_IF);
+		gic_check_cpu_features();
 		cpu_mask = gic_get_cpumask(gic);
 		gic_cpu_map[cpu] = cpu_mask;
 
@@ -1021,8 +1022,6 @@
 
 	BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
 
-	gic_check_cpu_features();
-
 	gic = &gic_data[gic_nr];
 
 	/* Initialize irq_chip */

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 2251478..5ae2834 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig

@@ -413,10 +413,11 @@
 	tristate "LED driver for Intel NAS SS4200 series"
 	depends on LEDS_CLASS
 	depends on PCI && DMI
+	depends on X86
 	help
 	  This option enables support for the Intel SS4200 series of
-	  Network Attached Storage servers.  You may control the hard
-	  drive or power LEDs on the front panel.  Using this driver
+	  Network Attached Storage servers. You may control the hard
+	  drive or power LEDs on the front panel. Using this driver
 	  can stop the front LED from blinking after startup.
 
 config LEDS_LT3593

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 2181581..55fa65e 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c

@@ -26,7 +26,7 @@
  * Nests outside led_cdev->trigger_lock
  */
 static DECLARE_RWSEM(triggers_list_lock);
-static LIST_HEAD(trigger_list);
+LIST_HEAD(trigger_list);
 
  /* Used by LED Class */
 

diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 61143f5..8229f06 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c

@@ -127,6 +127,8 @@
 	led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
 	if (!template->retain_state_suspended)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
+	if (template->panic_indicator)
+		led_dat->cdev.flags |= LED_PANIC_INDICATOR;
 
 	ret = gpiod_direction_output(led_dat->gpiod, state);
 	if (ret < 0)
@@ -200,6 +202,8 @@
 
 		if (fwnode_property_present(child, "retain-state-suspended"))
 			led.retain_state_suspended = 1;
+		if (fwnode_property_present(child, "panic-indicator"))
+			led.panic_indicator = 1;
 
 		ret = create_gpio_led(&led, &priv->leds[priv->num_leds],
 				      dev, NULL);

diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index 046cb70..732eb86 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c

@@ -101,6 +101,19 @@
 			DMI_MATCH(DMI_PRODUCT_VERSION, "1.00.00")
 		}
 	},
+	{
+		/*
+		 * FUJITSU SIEMENS SCALEO Home Server/SS4200-E
+		 * BIOS V090L 12/19/2007
+		 */
+		.callback = ss4200_led_dmi_callback,
+		.ident = "Fujitsu Siemens SCALEO Home Server",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "SCALEO Home Server"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "1.00.00")
+		}
+	},
 	{}
 };
 

diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
index c548ea1..45222a7 100644
--- a/drivers/leds/leds-tca6507.c
+++ b/drivers/leds/leds-tca6507.c

@@ -327,6 +327,8 @@
 	int result;
 
 	result = choose_times(tca->bank[bank].ontime, &c1, &c2);
+	if (result < 0)
+		return;
 	dev_dbg(&tca->client->dev,
 		"Chose on  times %d(%d) %d(%d) for %dms\n",
 		c1, time_codes[c1],

diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index db3f20d..7d38e6b 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h

@@ -30,5 +30,6 @@
 
 extern struct rw_semaphore leds_list_lock;
 extern struct list_head leds_list;
+extern struct list_head trigger_list;
 
 #endif	/* __LEDS_H_INCLUDED */

diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
index 5bda6a9..9893d91 100644
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig

@@ -41,6 +41,14 @@
 	  This allows LEDs to be controlled by IDE disk activity.
 	  If unsure, say Y.
 
+config LEDS_TRIGGER_MTD
+	bool "LED MTD (NAND/NOR) Trigger"
+	depends on MTD
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be controlled by MTD activity.
+	  If unsure, say N.
+
 config LEDS_TRIGGER_HEARTBEAT
 	tristate "LED Heartbeat Trigger"
 	depends on LEDS_TRIGGERS
@@ -108,4 +116,14 @@
 	  This enables direct flash/torch on/off by the driver, kernel space.
 	  If unsure, say Y.
 
+config LEDS_TRIGGER_PANIC
+	bool "LED Panic Trigger"
+	depends on LEDS_TRIGGERS
+	help
+	  This allows LEDs to be configured to blink on a kernel panic.
+	  Enabling this option will allow to mark certain LEDs as panic indicators,
+	  allowing to blink them on a kernel panic, even if they are set to
+	  a different trigger.
+	  If unsure, say Y.
+
 endif # LEDS_TRIGGERS

diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile
index 1abf48d..8cc64a4 100644
--- a/drivers/leds/trigger/Makefile
+++ b/drivers/leds/trigger/Makefile

@@ -1,6 +1,7 @@
 obj-$(CONFIG_LEDS_TRIGGER_TIMER)	+= ledtrig-timer.o
 obj-$(CONFIG_LEDS_TRIGGER_ONESHOT)	+= ledtrig-oneshot.o
 obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK)	+= ledtrig-ide-disk.o
+obj-$(CONFIG_LEDS_TRIGGER_MTD)		+= ledtrig-mtd.o
 obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT)	+= ledtrig-heartbeat.o
 obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT)	+= ledtrig-backlight.o
 obj-$(CONFIG_LEDS_TRIGGER_GPIO)		+= ledtrig-gpio.o
@@ -8,3 +9,4 @@
 obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON)	+= ledtrig-default-on.o
 obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT)	+= ledtrig-transient.o
 obj-$(CONFIG_LEDS_TRIGGER_CAMERA)	+= ledtrig-camera.o
+obj-$(CONFIG_LEDS_TRIGGER_PANIC)	+= ledtrig-panic.o

diff --git a/drivers/leds/trigger/ledtrig-ide-disk.c b/drivers/leds/trigger/ledtrig-ide-disk.c
index c02a3ac..15123d3 100644
--- a/drivers/leds/trigger/ledtrig-ide-disk.c
+++ b/drivers/leds/trigger/ledtrig-ide-disk.c

@@ -18,10 +18,11 @@
 #define BLINK_DELAY 30
 
 DEFINE_LED_TRIGGER(ledtrig_ide);
-static unsigned long ide_blink_delay = BLINK_DELAY;
 
 void ledtrig_ide_activity(void)
 {
+	unsigned long ide_blink_delay = BLINK_DELAY;
+
 	led_trigger_blink_oneshot(ledtrig_ide,
 				  &ide_blink_delay, &ide_blink_delay, 0);
 }

diff --git a/drivers/leds/trigger/ledtrig-mtd.c b/drivers/leds/trigger/ledtrig-mtd.c
new file mode 100644
index 0000000..99b5b0a
--- /dev/null
+++ b/drivers/leds/trigger/ledtrig-mtd.c

@@ -0,0 +1,45 @@
+/*
+ * LED MTD trigger
+ *
+ * Copyright 2016 Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
+ *
+ * Based on LED IDE-Disk Activity Trigger
+ *
+ * Copyright 2006 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+
+#define BLINK_DELAY 30
+
+DEFINE_LED_TRIGGER(ledtrig_mtd);
+DEFINE_LED_TRIGGER(ledtrig_nand);
+
+void ledtrig_mtd_activity(void)
+{
+	unsigned long blink_delay = BLINK_DELAY;
+
+	led_trigger_blink_oneshot(ledtrig_mtd,
+				  &blink_delay, &blink_delay, 0);
+	led_trigger_blink_oneshot(ledtrig_nand,
+				  &blink_delay, &blink_delay, 0);
+}
+EXPORT_SYMBOL(ledtrig_mtd_activity);
+
+static int __init ledtrig_mtd_init(void)
+{
+	led_trigger_register_simple("mtd", &ledtrig_mtd);
+	led_trigger_register_simple("nand-disk", &ledtrig_nand);
+
+	return 0;
+}
+device_initcall(ledtrig_mtd_init);

diff --git a/drivers/leds/trigger/ledtrig-panic.c b/drivers/leds/trigger/ledtrig-panic.c
new file mode 100644
index 0000000..d735526
--- /dev/null
+++ b/drivers/leds/trigger/ledtrig-panic.c

@@ -0,0 +1,77 @@
+/*
+ * Kernel Panic LED Trigger
+ *
+ * Copyright 2016 Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/leds.h>
+#include "../leds.h"
+
+static struct led_trigger *trigger;
+
+/*
+ * This is called in a special context by the atomic panic
+ * notifier. This means the trigger can be changed without
+ * worrying about locking.
+ */
+static void led_trigger_set_panic(struct led_classdev *led_cdev)
+{
+	struct led_trigger *trig;
+
+	list_for_each_entry(trig, &trigger_list, next_trig) {
+		if (strcmp("panic", trig->name))
+			continue;
+		if (led_cdev->trigger)
+			list_del(&led_cdev->trig_list);
+		list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
+
+		/* Avoid the delayed blink path */
+		led_cdev->blink_delay_on = 0;
+		led_cdev->blink_delay_off = 0;
+
+		led_cdev->trigger = trig;
+		if (trig->activate)
+			trig->activate(led_cdev);
+		break;
+	}
+}
+
+static int led_trigger_panic_notifier(struct notifier_block *nb,
+				      unsigned long code, void *unused)
+{
+	struct led_classdev *led_cdev;
+
+	list_for_each_entry(led_cdev, &leds_list, node)
+		if (led_cdev->flags & LED_PANIC_INDICATOR)
+			led_trigger_set_panic(led_cdev);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block led_trigger_panic_nb = {
+	.notifier_call = led_trigger_panic_notifier,
+};
+
+static long led_panic_blink(int state)
+{
+	led_trigger_event(trigger, state ? LED_FULL : LED_OFF);
+	return 0;
+}
+
+static int __init ledtrig_panic_init(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &led_trigger_panic_nb);
+
+	led_trigger_register_simple("panic", &trigger);
+	panic_blink = led_panic_blink;
+	return 0;
+}
+device_initcall(ledtrig_panic_init);

diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index adc162c..6e9042e 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c

@@ -603,7 +603,7 @@
 	 * doing this.
 	 */
 	get_online_cpus();
-	if (cpu_has_pge) { /* We have a broader idea of "global". */
+	if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */
 		/* Remember that this was originally set (for cleanup). */
 		cpu_had_pge = 1;
 		/*

diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 967b9dd..0307690 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c

@@ -718,8 +718,8 @@
 static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
 			void *mesg, int lines)
 {
-	unsigned long m, *val = mesg, gpa, save;
-	int ret;
+	unsigned long m;
+	int ret, loops = 200;	/* experimentally determined */
 
 	m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 	if (lines == 2) {
@@ -735,22 +735,28 @@
 		return MQE_OK;
 
 	/*
-	 * Send a cross-partition interrupt to the SSI that contains the target
-	 * message queue. Normally, the interrupt is automatically delivered by
-	 * hardware but some error conditions require explicit delivery.
-	 * Use the GRU to deliver the interrupt. Otherwise partition failures
+	 * Send a noop message in order to deliver a cross-partition interrupt
+	 * to the SSI that contains the target message queue. Normally, the
+	 * interrupt is automatically delivered by hardware following mesq
+	 * operations, but some error conditions require explicit delivery.
+	 * The noop message will trigger delivery. Otherwise partition failures
 	 * could cause unrecovered errors.
 	 */
-	gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
-	save = *val;
-	*val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
-				dest_Fixed);
-	gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
-	ret = gru_wait(cb);
-	*val = save;
-	if (ret != CBS_IDLE)
-		return MQE_UNEXPECTED_CB_ERR;
-	return MQE_OK;
+	do {
+		ret = send_noop_message(cb, mqd, mesg);
+	} while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0));
+
+	if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) {
+		/*
+		 * Don't indicate to the app to resend the message, as it's
+		 * already been successfully sent.  We simply send an OK
+		 * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
+		 * assuming that the other side is receiving enough
+		 * interrupts to get this message processed anyway.
+		 */
+		ret = MQE_OK;
+	}
+	return ret;
 }
 
 /*

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 8a0147d..5f2a3d69 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c

@@ -35,6 +35,7 @@
 #include <linux/capability.h>
 #include <linux/compat.h>
 #include <linux/pm_runtime.h>
+#include <linux/idr.h>
 
 #include <linux/mmc/ioctl.h>
 #include <linux/mmc/card.h>
@@ -78,14 +79,14 @@
 /*
  * We've only got one major, so number of mmcblk devices is
  * limited to (1 << 20) / number of minors per device.  It is also
- * currently limited by the size of the static bitmaps below.
+ * limited by the MAX_DEVICES below.
  */
 static int max_devices;
 
 #define MAX_DEVICES 256
 
-/* TODO: Replace these with struct ida */
-static DECLARE_BITMAP(dev_use, MAX_DEVICES);
+static DEFINE_IDA(mmc_blk_ida);
+static DEFINE_SPINLOCK(mmc_blk_lock);
 
 /*
  * There is one mmc_blk_data per slot.
@@ -178,7 +179,9 @@
 		int devidx = mmc_get_devidx(md->disk);
 		blk_cleanup_queue(md->queue.queue);
 
-		__clear_bit(devidx, dev_use);
+		spin_lock(&mmc_blk_lock);
+		ida_remove(&mmc_blk_ida, devidx);
+		spin_unlock(&mmc_blk_lock);
 
 		put_disk(md->disk);
 		kfree(md);
@@ -945,16 +948,22 @@
 			req->rq_disk->disk_name, "timed out", name, status);
 
 		/* If the status cmd initially failed, retry the r/w cmd */
-		if (!status_valid)
+		if (!status_valid) {
+			pr_err("%s: status not valid, retrying timeout\n",
+				req->rq_disk->disk_name);
 			return ERR_RETRY;
+		}
 
 		/*
 		 * If it was a r/w cmd crc error, or illegal command
 		 * (eg, issued in wrong state) then retry - we should
 		 * have corrected the state problem above.
 		 */
-		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
+		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) {
+			pr_err("%s: command error, retrying timeout\n",
+				req->rq_disk->disk_name);
 			return ERR_RETRY;
+		}
 
 		/* Otherwise abort the command */
 		return ERR_ABORT;
@@ -2189,10 +2198,23 @@
 	struct mmc_blk_data *md;
 	int devidx, ret;
 
-	devidx = find_first_zero_bit(dev_use, max_devices);
-	if (devidx >= max_devices)
-		return ERR_PTR(-ENOSPC);
-	__set_bit(devidx, dev_use);
+again:
+	if (!ida_pre_get(&mmc_blk_ida, GFP_KERNEL))
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock(&mmc_blk_lock);
+	ret = ida_get_new(&mmc_blk_ida, &devidx);
+	spin_unlock(&mmc_blk_lock);
+
+	if (ret == -EAGAIN)
+		goto again;
+	else if (ret)
+		return ERR_PTR(ret);
+
+	if (devidx >= max_devices) {
+		ret = -ENOSPC;
+		goto out;
+	}
 
 	md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
 	if (!md) {
@@ -2289,6 +2311,9 @@
  err_kfree:
 	kfree(md);
  out:
+	spin_lock(&mmc_blk_lock);
+	ida_remove(&mmc_blk_ida, devidx);
+	spin_unlock(&mmc_blk_lock);
 	return ERR_PTR(ret);
 }
 

diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index 4c33d76..250f223 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig

@@ -1,3 +1,24 @@
 #
 # MMC core configuration
 #
+config PWRSEQ_EMMC
+	tristate "HW reset support for eMMC"
+	default y
+	depends on OF
+	help
+	  This selects Hardware reset support aka pwrseq-emmc for eMMC
+	  devices. By default this option is set to y.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called pwrseq_emmc.
+
+config PWRSEQ_SIMPLE
+	tristate "Simple HW reset support for MMC"
+	default y
+	depends on OF
+	help
+	  This selects simple hardware reset support aka pwrseq-simple for MMC
+	  devices. By default this option is set to y.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called pwrseq_simple.

diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 2c25138..f007151 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile

@@ -8,5 +8,7 @@
 				   sdio.o sdio_ops.o sdio_bus.o \
 				   sdio_cis.o sdio_io.o sdio_irq.o \
 				   quirks.o slot-gpio.o
-mmc_core-$(CONFIG_OF)		+= pwrseq.o pwrseq_simple.o pwrseq_emmc.o
+mmc_core-$(CONFIG_OF)		+= pwrseq.o
+obj-$(CONFIG_PWRSEQ_SIMPLE)	+= pwrseq_simple.o
+obj-$(CONFIG_PWRSEQ_EMMC)	+= pwrseq_emmc.o
 mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 41b1e76..99275e4 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c

@@ -36,6 +36,9 @@
 #include <linux/mmc/sd.h>
 #include <linux/mmc/slot-gpio.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mmc.h>
+
 #include "core.h"
 #include "bus.h"
 #include "host.h"
@@ -140,6 +143,8 @@
 			cmd->retries = 0;
 	}
 
+	trace_mmc_request_done(host, mrq);
+
 	if (err && cmd->retries && !mmc_card_removed(host->card)) {
 		/*
 		 * Request starter must handle retries - see
@@ -215,6 +220,8 @@
 		}
 	}
 
+	trace_mmc_request_start(host, mrq);
+
 	host->ops->request(host, mrq);
 }
 
@@ -2449,8 +2456,9 @@
 	ret = host->bus_ops->reset(host);
 	mmc_bus_put(host);
 
-	if (ret != -EOPNOTSUPP)
-		pr_warn("%s: tried to reset card\n", mmc_hostname(host));
+	if (ret)
+		pr_warn("%s: tried to reset card, got error %d\n",
+			mmc_hostname(host), ret);
 
 	return ret;
 }

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 6e4c55a..e0a3ee1 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c

@@ -33,14 +33,14 @@
 
 #define cls_dev_to_mmc_host(d)	container_of(d, struct mmc_host, class_dev)
 
-static DEFINE_IDR(mmc_host_idr);
+static DEFINE_IDA(mmc_host_ida);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
 static void mmc_host_classdev_release(struct device *dev)
 {
 	struct mmc_host *host = cls_dev_to_mmc_host(dev);
 	spin_lock(&mmc_host_lock);
-	idr_remove(&mmc_host_idr, host->index);
+	ida_remove(&mmc_host_ida, host->index);
 	spin_unlock(&mmc_host_lock);
 	kfree(host);
 }
@@ -321,14 +321,20 @@
 
 	/* scanning will be enabled when we're ready */
 	host->rescan_disable = 1;
-	idr_preload(GFP_KERNEL);
+
+again:
+	if (!ida_pre_get(&mmc_host_ida, GFP_KERNEL)) {
+		kfree(host);
+		return NULL;
+	}
+
 	spin_lock(&mmc_host_lock);
-	err = idr_alloc(&mmc_host_idr, host, 0, 0, GFP_NOWAIT);
-	if (err >= 0)
-		host->index = err;
+	err = ida_get_new(&mmc_host_ida, &host->index);
 	spin_unlock(&mmc_host_lock);
-	idr_preload_end();
-	if (err < 0) {
+
+	if (err == -EAGAIN) {
+		goto again;
+	} else if (err) {
 		kfree(host);
 		return NULL;
 	}

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4dbe3df..b81b08f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c

@@ -333,6 +333,9 @@
 	}
 }
 
+/* Minimum partition switch timeout in milliseconds */
+#define MMC_MIN_PART_SWITCH_TIME	300
+
 /*
  * Decode extended CSD.
  */
@@ -397,6 +400,10 @@
 
 		/* EXT_CSD value is in units of 10ms, but we store in ms */
 		card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME];
+		/* Some eMMC set the value too low so set a minimum */
+		if (card->ext_csd.part_time &&
+		    card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME)
+			card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME;
 
 		/* Sleep / awake timeout in 100ns units */
 		if (sa_shift > 0 && sa_shift <= 0x17)
@@ -1244,10 +1251,11 @@
 {
 	struct mmc_host *host = card->host;
 	bool send_status = true;
-	unsigned int old_timing;
+	unsigned int old_timing, old_signal_voltage;
 	int err = -EINVAL;
 	u8 val;
 
+	old_signal_voltage = host->ios.signal_voltage;
 	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
 		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
 
@@ -1256,7 +1264,7 @@
 
 	/* If fails try again during next card power cycle */
 	if (err)
-		goto err;
+		return err;
 
 	mmc_select_driver_type(card);
 
@@ -1290,9 +1298,14 @@
 		}
 	}
 err:
-	if (err)
+	if (err) {
+		/* fall back to the old signal voltage, if fails report error */
+		if (__mmc_set_signal_voltage(host, old_signal_voltage))
+			err = -EIO;
+
 		pr_err("%s: %s failed, error %d\n", mmc_hostname(card->host),
 		       __func__, err);
+	}
 	return err;
 }
 
@@ -1314,21 +1327,13 @@
 	if (err && err != -EBADMSG)
 		return err;
 
-	if (err) {
-		pr_warn("%s: switch to %s failed\n",
-			mmc_card_hs(card) ? "high-speed" :
-			(mmc_card_hs200(card) ? "hs200" : ""),
-			mmc_hostname(card->host));
-		err = 0;
-	}
-
 bus_speed:
 	/*
 	 * Set the bus speed to the selected bus timing.
 	 * If timing is not selected, backward compatible is the default.
 	 */
 	mmc_set_bus_speed(card);
-	return err;
+	return 0;
 }
 
 /*
@@ -1483,12 +1488,13 @@
 		if (err)
 			goto free_card;
 
-		/* If doing byte addressing, check if required to do sector
+		/*
+		 * If doing byte addressing, check if required to do sector
 		 * addressing.  Handle the case of <2GB cards needing sector
 		 * addressing.  See section 8.1 JEDEC Standard JED84-A441;
 		 * ocr register has bit 30 set for sector addressing.
 		 */
-		if (!(mmc_card_blockaddr(card)) && (rocr & (1<<30)))
+		if (rocr & BIT(30))
 			mmc_card_set_blockaddr(card);
 
 		/* Erase size depends on CSD and Extended CSD */
@@ -1957,19 +1963,23 @@
 {
 	struct mmc_card *card = host->card;
 
-	if (!(host->caps & MMC_CAP_HW_RESET) || !host->ops->hw_reset)
-		return -EOPNOTSUPP;
+	/*
+	 * In the case of recovery, we can't expect flushing the cache to work
+	 * always, but we have a go and ignore errors.
+	 */
+	mmc_flush_cache(host->card);
 
-	if (!mmc_can_reset(card))
-		return -EOPNOTSUPP;
-
-	mmc_set_clock(host, host->f_init);
-
-	host->ops->hw_reset(host);
-
-	/* Set initial state and call mmc_set_ios */
-	mmc_set_initial_state(host);
-
+	if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
+	     mmc_can_reset(card)) {
+		/* If the card accept RST_n signal, send it. */
+		mmc_set_clock(host, host->f_init);
+		host->ops->hw_reset(host);
+		/* Set initial state and call mmc_set_ios */
+		mmc_set_initial_state(host);
+	} else {
+		/* Do a brute force power cycle */
+		mmc_power_cycle(host, card->ocr);
+	}
 	return mmc_init_card(host, card->ocr, card);
 }
 

diff --git a/drivers/mmc/core/pwrseq.c b/drivers/mmc/core/pwrseq.c
index 4c1d175..9386c47 100644
--- a/drivers/mmc/core/pwrseq.c
+++ b/drivers/mmc/core/pwrseq.c

@@ -8,88 +8,55 @@
  *  MMC power sequence management
  */
 #include <linux/kernel.h>
-#include <linux/platform_device.h>
 #include <linux/err.h>
+#include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
 
 #include <linux/mmc/host.h>
 
 #include "pwrseq.h"
 
-struct mmc_pwrseq_match {
-	const char *compatible;
-	struct mmc_pwrseq *(*alloc)(struct mmc_host *host, struct device *dev);
-};
-
-static struct mmc_pwrseq_match pwrseq_match[] = {
-	{
-		.compatible = "mmc-pwrseq-simple",
-		.alloc = mmc_pwrseq_simple_alloc,
-	}, {
-		.compatible = "mmc-pwrseq-emmc",
-		.alloc = mmc_pwrseq_emmc_alloc,
-	},
-};
-
-static struct mmc_pwrseq_match *mmc_pwrseq_find(struct device_node *np)
-{
-	struct mmc_pwrseq_match *match = ERR_PTR(-ENODEV);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(pwrseq_match); i++) {
-		if (of_device_is_compatible(np,	pwrseq_match[i].compatible)) {
-			match = &pwrseq_match[i];
-			break;
-		}
-	}
-
-	return match;
-}
+static DEFINE_MUTEX(pwrseq_list_mutex);
+static LIST_HEAD(pwrseq_list);
 
 int mmc_pwrseq_alloc(struct mmc_host *host)
 {
-	struct platform_device *pdev;
 	struct device_node *np;
-	struct mmc_pwrseq_match *match;
-	struct mmc_pwrseq *pwrseq;
-	int ret = 0;
+	struct mmc_pwrseq *p;
 
 	np = of_parse_phandle(host->parent->of_node, "mmc-pwrseq", 0);
 	if (!np)
 		return 0;
 
-	pdev = of_find_device_by_node(np);
-	if (!pdev) {
-		ret = -ENODEV;
-		goto err;
+	mutex_lock(&pwrseq_list_mutex);
+	list_for_each_entry(p, &pwrseq_list, pwrseq_node) {
+		if (p->dev->of_node == np) {
+			if (!try_module_get(p->owner))
+				dev_err(host->parent,
+					"increasing module refcount failed\n");
+			else
+				host->pwrseq = p;
+
+			break;
+		}
 	}
 
-	match = mmc_pwrseq_find(np);
-	if (IS_ERR(match)) {
-		ret = PTR_ERR(match);
-		goto err;
-	}
+	of_node_put(np);
+	mutex_unlock(&pwrseq_list_mutex);
 
-	pwrseq = match->alloc(host, &pdev->dev);
-	if (IS_ERR(pwrseq)) {
-		ret = PTR_ERR(pwrseq);
-		goto err;
-	}
+	if (!host->pwrseq)
+		return -EPROBE_DEFER;
 
-	host->pwrseq = pwrseq;
 	dev_info(host->parent, "allocated mmc-pwrseq\n");
 
-err:
-	of_node_put(np);
-	return ret;
+	return 0;
 }
 
 void mmc_pwrseq_pre_power_on(struct mmc_host *host)
 {
 	struct mmc_pwrseq *pwrseq = host->pwrseq;
 
-	if (pwrseq && pwrseq->ops && pwrseq->ops->pre_power_on)
+	if (pwrseq && pwrseq->ops->pre_power_on)
 		pwrseq->ops->pre_power_on(host);
 }
 
@@ -97,7 +64,7 @@
 {
 	struct mmc_pwrseq *pwrseq = host->pwrseq;
 
-	if (pwrseq && pwrseq->ops && pwrseq->ops->post_power_on)
+	if (pwrseq && pwrseq->ops->post_power_on)
 		pwrseq->ops->post_power_on(host);
 }
 
@@ -105,7 +72,7 @@
 {
 	struct mmc_pwrseq *pwrseq = host->pwrseq;
 
-	if (pwrseq && pwrseq->ops && pwrseq->ops->power_off)
+	if (pwrseq && pwrseq->ops->power_off)
 		pwrseq->ops->power_off(host);
 }
 
@@ -113,8 +80,31 @@
 {
 	struct mmc_pwrseq *pwrseq = host->pwrseq;
 
-	if (pwrseq && pwrseq->ops && pwrseq->ops->free)
-		pwrseq->ops->free(host);
-
-	host->pwrseq = NULL;
+	if (pwrseq) {
+		module_put(pwrseq->owner);
+		host->pwrseq = NULL;
+	}
 }
+
+int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq)
+{
+	if (!pwrseq || !pwrseq->ops || !pwrseq->dev)
+		return -EINVAL;
+
+	mutex_lock(&pwrseq_list_mutex);
+	list_add(&pwrseq->pwrseq_node, &pwrseq_list);
+	mutex_unlock(&pwrseq_list_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mmc_pwrseq_register);
+
+void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq)
+{
+	if (pwrseq) {
+		mutex_lock(&pwrseq_list_mutex);
+		list_del(&pwrseq->pwrseq_node);
+		mutex_unlock(&pwrseq_list_mutex);
+	}
+}
+EXPORT_SYMBOL_GPL(mmc_pwrseq_unregister);

diff --git a/drivers/mmc/core/pwrseq.h b/drivers/mmc/core/pwrseq.h
index 133de04..d69e751 100644
--- a/drivers/mmc/core/pwrseq.h
+++ b/drivers/mmc/core/pwrseq.h

@@ -8,32 +8,39 @@
 #ifndef _MMC_CORE_PWRSEQ_H
 #define _MMC_CORE_PWRSEQ_H
 
+#include <linux/mmc/host.h>
+
 struct mmc_pwrseq_ops {
 	void (*pre_power_on)(struct mmc_host *host);
 	void (*post_power_on)(struct mmc_host *host);
 	void (*power_off)(struct mmc_host *host);
-	void (*free)(struct mmc_host *host);
 };
 
 struct mmc_pwrseq {
 	const struct mmc_pwrseq_ops *ops;
+	struct device *dev;
+	struct list_head pwrseq_node;
+	struct module *owner;
 };
 
 #ifdef CONFIG_OF
 
+int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq);
+void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq);
+
 int mmc_pwrseq_alloc(struct mmc_host *host);
 void mmc_pwrseq_pre_power_on(struct mmc_host *host);
 void mmc_pwrseq_post_power_on(struct mmc_host *host);
 void mmc_pwrseq_power_off(struct mmc_host *host);
 void mmc_pwrseq_free(struct mmc_host *host);
 
-struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
-					   struct device *dev);
-struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
-					 struct device *dev);
-
 #else
 
+static inline int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq)
+{
+	return -ENOSYS;
+}
+static inline void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq) {}
 static inline int mmc_pwrseq_alloc(struct mmc_host *host) { return 0; }
 static inline void mmc_pwrseq_pre_power_on(struct mmc_host *host) {}
 static inline void mmc_pwrseq_post_power_on(struct mmc_host *host) {}

diff --git a/drivers/mmc/core/pwrseq_emmc.c b/drivers/mmc/core/pwrseq_emmc.c
index 4a82bc7..adc9c0c 100644
--- a/drivers/mmc/core/pwrseq_emmc.c
+++ b/drivers/mmc/core/pwrseq_emmc.c

@@ -9,6 +9,9 @@
  */
 #include <linux/delay.h>
 #include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -25,6 +28,8 @@
 	struct gpio_desc *reset_gpio;
 };
 
+#define to_pwrseq_emmc(p) container_of(p, struct mmc_pwrseq_emmc, pwrseq)
+
 static void __mmc_pwrseq_emmc_reset(struct mmc_pwrseq_emmc *pwrseq)
 {
 	gpiod_set_value(pwrseq->reset_gpio, 1);
@@ -35,27 +40,11 @@
 
 static void mmc_pwrseq_emmc_reset(struct mmc_host *host)
 {
-	struct mmc_pwrseq_emmc *pwrseq = container_of(host->pwrseq,
-					struct mmc_pwrseq_emmc, pwrseq);
+	struct mmc_pwrseq_emmc *pwrseq =  to_pwrseq_emmc(host->pwrseq);
 
 	__mmc_pwrseq_emmc_reset(pwrseq);
 }
 
-static void mmc_pwrseq_emmc_free(struct mmc_host *host)
-{
-	struct mmc_pwrseq_emmc *pwrseq = container_of(host->pwrseq,
-					struct mmc_pwrseq_emmc, pwrseq);
-
-	unregister_restart_handler(&pwrseq->reset_nb);
-	gpiod_put(pwrseq->reset_gpio);
-	kfree(pwrseq);
-}
-
-static const struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
-	.post_power_on = mmc_pwrseq_emmc_reset,
-	.free = mmc_pwrseq_emmc_free,
-};
-
 static int mmc_pwrseq_emmc_reset_nb(struct notifier_block *this,
 				    unsigned long mode, void *cmd)
 {
@@ -66,21 +55,22 @@
 	return NOTIFY_DONE;
 }
 
-struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
-					 struct device *dev)
+static const struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
+	.post_power_on = mmc_pwrseq_emmc_reset,
+};
+
+static int mmc_pwrseq_emmc_probe(struct platform_device *pdev)
 {
 	struct mmc_pwrseq_emmc *pwrseq;
-	int ret = 0;
+	struct device *dev = &pdev->dev;
 
-	pwrseq = kzalloc(sizeof(struct mmc_pwrseq_emmc), GFP_KERNEL);
+	pwrseq = devm_kzalloc(dev, sizeof(*pwrseq), GFP_KERNEL);
 	if (!pwrseq)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	pwrseq->reset_gpio = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
-	if (IS_ERR(pwrseq->reset_gpio)) {
-		ret = PTR_ERR(pwrseq->reset_gpio);
-		goto free;
-	}
+	pwrseq->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(pwrseq->reset_gpio))
+		return PTR_ERR(pwrseq->reset_gpio);
 
 	/*
 	 * register reset handler to ensure emmc reset also from
@@ -92,9 +82,38 @@
 	register_restart_handler(&pwrseq->reset_nb);
 
 	pwrseq->pwrseq.ops = &mmc_pwrseq_emmc_ops;
+	pwrseq->pwrseq.dev = dev;
+	pwrseq->pwrseq.owner = THIS_MODULE;
+	platform_set_drvdata(pdev, pwrseq);
 
-	return &pwrseq->pwrseq;
-free:
-	kfree(pwrseq);
-	return ERR_PTR(ret);
+	return mmc_pwrseq_register(&pwrseq->pwrseq);
 }
+
+static int mmc_pwrseq_emmc_remove(struct platform_device *pdev)
+{
+	struct mmc_pwrseq_emmc *pwrseq = platform_get_drvdata(pdev);
+
+	unregister_restart_handler(&pwrseq->reset_nb);
+	mmc_pwrseq_unregister(&pwrseq->pwrseq);
+
+	return 0;
+}
+
+static const struct of_device_id mmc_pwrseq_emmc_of_match[] = {
+	{ .compatible = "mmc-pwrseq-emmc",},
+	{/* sentinel */},
+};
+
+MODULE_DEVICE_TABLE(of, mmc_pwrseq_emmc_of_match);
+
+static struct platform_driver mmc_pwrseq_emmc_driver = {
+	.probe = mmc_pwrseq_emmc_probe,
+	.remove = mmc_pwrseq_emmc_remove,
+	.driver = {
+		.name = "pwrseq_emmc",
+		.of_match_table = mmc_pwrseq_emmc_of_match,
+	},
+};
+
+module_platform_driver(mmc_pwrseq_emmc_driver);
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index bc173e1..450d907 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c

@@ -8,7 +8,10 @@
  *  Simple MMC power sequence management
  */
 #include <linux/clk.h>
+#include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -25,6 +28,8 @@
 	struct gpio_descs *reset_gpios;
 };
 
+#define to_pwrseq_simple(p) container_of(p, struct mmc_pwrseq_simple, pwrseq)
+
 static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
 					      int value)
 {
@@ -44,8 +49,7 @@
 
 static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
 {
-	struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
-					struct mmc_pwrseq_simple, pwrseq);
+	struct mmc_pwrseq_simple *pwrseq = to_pwrseq_simple(host->pwrseq);
 
 	if (!IS_ERR(pwrseq->ext_clk) && !pwrseq->clk_enabled) {
 		clk_prepare_enable(pwrseq->ext_clk);
@@ -57,16 +61,14 @@
 
 static void mmc_pwrseq_simple_post_power_on(struct mmc_host *host)
 {
-	struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
-					struct mmc_pwrseq_simple, pwrseq);
+	struct mmc_pwrseq_simple *pwrseq = to_pwrseq_simple(host->pwrseq);
 
 	mmc_pwrseq_simple_set_gpios_value(pwrseq, 0);
 }
 
 static void mmc_pwrseq_simple_power_off(struct mmc_host *host)
 {
-	struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
-					struct mmc_pwrseq_simple, pwrseq);
+	struct mmc_pwrseq_simple *pwrseq = to_pwrseq_simple(host->pwrseq);
 
 	mmc_pwrseq_simple_set_gpios_value(pwrseq, 1);
 
@@ -76,59 +78,64 @@
 	}
 }
 
-static void mmc_pwrseq_simple_free(struct mmc_host *host)
-{
-	struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
-					struct mmc_pwrseq_simple, pwrseq);
-
-	if (!IS_ERR(pwrseq->reset_gpios))
-		gpiod_put_array(pwrseq->reset_gpios);
-
-	if (!IS_ERR(pwrseq->ext_clk))
-		clk_put(pwrseq->ext_clk);
-
-	kfree(pwrseq);
-}
-
 static const struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
 	.pre_power_on = mmc_pwrseq_simple_pre_power_on,
 	.post_power_on = mmc_pwrseq_simple_post_power_on,
 	.power_off = mmc_pwrseq_simple_power_off,
-	.free = mmc_pwrseq_simple_free,
 };
 
-struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
-					   struct device *dev)
+static const struct of_device_id mmc_pwrseq_simple_of_match[] = {
+	{ .compatible = "mmc-pwrseq-simple",},
+	{/* sentinel */},
+};
+MODULE_DEVICE_TABLE(of, mmc_pwrseq_simple_of_match);
+
+static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
 {
 	struct mmc_pwrseq_simple *pwrseq;
-	int ret = 0;
+	struct device *dev = &pdev->dev;
 
-	pwrseq = kzalloc(sizeof(*pwrseq), GFP_KERNEL);
+	pwrseq = devm_kzalloc(dev, sizeof(*pwrseq), GFP_KERNEL);
 	if (!pwrseq)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	pwrseq->ext_clk = clk_get(dev, "ext_clock");
-	if (IS_ERR(pwrseq->ext_clk) &&
-	    PTR_ERR(pwrseq->ext_clk) != -ENOENT) {
-		ret = PTR_ERR(pwrseq->ext_clk);
-		goto free;
-	}
+	pwrseq->ext_clk = devm_clk_get(dev, "ext_clock");
+	if (IS_ERR(pwrseq->ext_clk) && PTR_ERR(pwrseq->ext_clk) != -ENOENT)
+		return PTR_ERR(pwrseq->ext_clk);
 
-	pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
+	pwrseq->reset_gpios = devm_gpiod_get_array(dev, "reset",
+							GPIOD_OUT_HIGH);
 	if (IS_ERR(pwrseq->reset_gpios) &&
 	    PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
 	    PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
-		ret = PTR_ERR(pwrseq->reset_gpios);
-		goto clk_put;
+		return PTR_ERR(pwrseq->reset_gpios);
 	}
 
+	pwrseq->pwrseq.dev = dev;
 	pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
+	pwrseq->pwrseq.owner = THIS_MODULE;
+	platform_set_drvdata(pdev, pwrseq);
 
-	return &pwrseq->pwrseq;
-clk_put:
-	if (!IS_ERR(pwrseq->ext_clk))
-		clk_put(pwrseq->ext_clk);
-free:
-	kfree(pwrseq);
-	return ERR_PTR(ret);
+	return mmc_pwrseq_register(&pwrseq->pwrseq);
 }
+
+static int mmc_pwrseq_simple_remove(struct platform_device *pdev)
+{
+	struct mmc_pwrseq_simple *pwrseq = platform_get_drvdata(pdev);
+
+	mmc_pwrseq_unregister(&pwrseq->pwrseq);
+
+	return 0;
+}
+
+static struct platform_driver mmc_pwrseq_simple_driver = {
+	.probe = mmc_pwrseq_simple_probe,
+	.remove = mmc_pwrseq_simple_remove,
+	.driver = {
+		.name = "pwrseq_simple",
+		.of_match_table = mmc_pwrseq_simple_of_match,
+	},
+};
+
+module_platform_driver(mmc_pwrseq_simple_driver);
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index 6f6fc52..dcb3dee 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c

@@ -177,8 +177,13 @@
 	vsn = func->card->cccr.sdio_vsn;
 	min_size = (vsn == SDIO_SDIO_REV_1_00) ? 28 : 42;
 
-	if (size < min_size)
+	if (size == 28 && vsn == SDIO_SDIO_REV_1_10) {
+		pr_warn("%s: card has broken SDIO 1.1 CIS, forcing SDIO 1.0\n",
+			mmc_hostname(card->host));
+		vsn = SDIO_SDIO_REV_1_00;
+	} else if (size < min_size) {
 		return -EINVAL;
+	}
 
 	/* TPLFE_MAX_BLK_SIZE */
 	func->max_blksize = buf[12] | (buf[13] << 8);

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index e657af0..0aa484c 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig

@@ -677,9 +677,9 @@
 	depends on HAS_DMA
 	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 	help
-	  This selects the MMC Host Interface controller (MMCIF).
+	  This selects the MMC Host Interface controller (MMCIF) found in various
+	  Renesas SoCs for SH and ARM architectures.
 
-	  This driver supports MMCIF in sh7724/sh7757/sh7372.
 
 config MMC_JZ4740
 	tristate "JZ4740 SD/Multimedia Card Interface support"

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 9268c41..0ad8ef5 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c

@@ -1410,8 +1410,6 @@
 	WARN_ON(slot->mrq);
 	dev_dbg(&host->pdev->dev, "MRQ: cmd %u\n", mrq->cmd->opcode);
 
-	pm_runtime_get_sync(&host->pdev->dev);
-
 	/*
 	 * We may "know" the card is gone even though there's still an
 	 * electrical connection. If so, we really need to communicate
@@ -1442,8 +1440,6 @@
 	struct atmel_mci	*host = slot->host;
 	unsigned int		i;
 
-	pm_runtime_get_sync(&host->pdev->dev);
-
 	slot->sdc_reg &= ~ATMCI_SDCBUS_MASK;
 	switch (ios->bus_width) {
 	case MMC_BUS_WIDTH_1:
@@ -1576,8 +1572,6 @@
 		break;
 	}
 
-	pm_runtime_mark_last_busy(&host->pdev->dev);
-	pm_runtime_put_autosuspend(&host->pdev->dev);
 }
 
 static int atmci_get_ro(struct mmc_host *mmc)
@@ -1669,9 +1663,6 @@
 	spin_unlock(&host->lock);
 	mmc_request_done(prev_mmc, mrq);
 	spin_lock(&host->lock);
-
-	pm_runtime_mark_last_busy(&host->pdev->dev);
-	pm_runtime_put_autosuspend(&host->pdev->dev);
 }
 
 static void atmci_command_complete(struct atmel_mci *host,

diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 693144e..a56373c 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c

@@ -32,12 +32,10 @@
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
-#include <linux/edma.h>
 #include <linux/mmc/mmc.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 
-#include <linux/platform_data/edma.h>
 #include <linux/platform_data/mmc-davinci.h>
 
 /*
@@ -202,7 +200,6 @@
 	u32 buffer_bytes_left;
 	u32 bytes_left;
 
-	u32 rxdma, txdma;
 	struct dma_chan *dma_tx;
 	struct dma_chan *dma_rx;
 	bool use_dma;
@@ -513,35 +510,20 @@
 
 static int __init davinci_acquire_dma_channels(struct mmc_davinci_host *host)
 {
-	int r;
-	dma_cap_mask_t mask;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-
-	host->dma_tx =
-		dma_request_slave_channel_compat(mask, edma_filter_fn,
-				&host->txdma, mmc_dev(host->mmc), "tx");
-	if (!host->dma_tx) {
+	host->dma_tx = dma_request_chan(mmc_dev(host->mmc), "tx");
+	if (IS_ERR(host->dma_tx)) {
 		dev_err(mmc_dev(host->mmc), "Can't get dma_tx channel\n");
-		return -ENODEV;
+		return PTR_ERR(host->dma_tx);
 	}
 
-	host->dma_rx =
-		dma_request_slave_channel_compat(mask, edma_filter_fn,
-				&host->rxdma, mmc_dev(host->mmc), "rx");
-	if (!host->dma_rx) {
+	host->dma_rx = dma_request_chan(mmc_dev(host->mmc), "rx");
+	if (IS_ERR(host->dma_rx)) {
 		dev_err(mmc_dev(host->mmc), "Can't get dma_rx channel\n");
-		r = -ENODEV;
-		goto free_master_write;
+		dma_release_channel(host->dma_tx);
+		return PTR_ERR(host->dma_rx);
 	}
 
 	return 0;
-
-free_master_write:
-	dma_release_channel(host->dma_tx);
-
-	return r;
 }
 
 /*----------------------------------------------------------------------*/
@@ -1223,7 +1205,7 @@
 	struct mmc_davinci_host *host = NULL;
 	struct mmc_host *mmc = NULL;
 	struct resource *r, *mem = NULL;
-	int ret = 0, irq = 0;
+	int ret, irq;
 	size_t mem_size;
 	const struct platform_device_id *id_entry;
 
@@ -1233,50 +1215,40 @@
 		return -ENOENT;
 	}
 
-	ret = -ENODEV;
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (!r || irq == NO_IRQ)
-		goto out;
+		return -ENODEV;
 
-	ret = -EBUSY;
 	mem_size = resource_size(r);
-	mem = request_mem_region(r->start, mem_size, pdev->name);
+	mem = devm_request_mem_region(&pdev->dev, r->start, mem_size,
+				      pdev->name);
 	if (!mem)
-		goto out;
+		return -EBUSY;
 
-	ret = -ENOMEM;
 	mmc = mmc_alloc_host(sizeof(struct mmc_davinci_host), &pdev->dev);
 	if (!mmc)
-		goto out;
+		return -ENOMEM;
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;	/* Important */
 
-	r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!r)
-		dev_warn(&pdev->dev, "RX DMA resource not specified\n");
-	else
-		host->rxdma = r->start;
-
-	r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!r)
-		dev_warn(&pdev->dev, "TX DMA resource not specified\n");
-	else
-		host->txdma = r->start;
-
 	host->mem_res = mem;
-	host->base = ioremap(mem->start, mem_size);
-	if (!host->base)
-		goto out;
+	host->base = devm_ioremap(&pdev->dev, mem->start, mem_size);
+	if (!host->base) {
+		ret = -ENOMEM;
+		goto ioremap_fail;
+	}
 
-	ret = -ENXIO;
-	host->clk = clk_get(&pdev->dev, "MMCSDCLK");
+	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
 		ret = PTR_ERR(host->clk);
-		goto out;
+		goto clk_get_fail;
 	}
-	clk_enable(host->clk);
+	ret = clk_prepare_enable(host->clk);
+	if (ret)
+		goto clk_prepare_enable_fail;
+
 	host->mmc_input_clk = clk_get_rate(host->clk);
 
 	init_mmcsd_host(host);
@@ -1291,8 +1263,13 @@
 	host->mmc_irq = irq;
 	host->sdio_irq = platform_get_irq(pdev, 1);
 
-	if (host->use_dma && davinci_acquire_dma_channels(host) != 0)
-		host->use_dma = 0;
+	if (host->use_dma) {
+		ret = davinci_acquire_dma_channels(host);
+		if (ret == -EPROBE_DEFER)
+			goto dma_probe_defer;
+		else if (ret)
+			host->use_dma = 0;
+	}
 
 	/* REVISIT:  someday, support IRQ-driven card detection.  */
 	mmc->caps |= MMC_CAP_NEEDS_POLL;
@@ -1346,15 +1323,17 @@
 
 	ret = mmc_add_host(mmc);
 	if (ret < 0)
-		goto out;
+		goto mmc_add_host_fail;
 
-	ret = request_irq(irq, mmc_davinci_irq, 0, mmc_hostname(mmc), host);
+	ret = devm_request_irq(&pdev->dev, irq, mmc_davinci_irq, 0,
+			       mmc_hostname(mmc), host);
 	if (ret)
-		goto out;
+		goto request_irq_fail;
 
 	if (host->sdio_irq >= 0) {
-		ret = request_irq(host->sdio_irq, mmc_davinci_sdio_irq, 0,
-				  mmc_hostname(mmc), host);
+		ret = devm_request_irq(&pdev->dev, host->sdio_irq,
+				       mmc_davinci_sdio_irq, 0,
+				       mmc_hostname(mmc), host);
 		if (!ret)
 			mmc->caps |= MMC_CAP_SDIO_IRQ;
 	}
@@ -1367,28 +1346,18 @@
 
 	return 0;
 
-out:
+request_irq_fail:
+	mmc_remove_host(mmc);
+mmc_add_host_fail:
 	mmc_davinci_cpufreq_deregister(host);
 cpu_freq_fail:
-	if (host) {
-		davinci_release_dma_channels(host);
-
-		if (host->clk) {
-			clk_disable(host->clk);
-			clk_put(host->clk);
-		}
-
-		if (host->base)
-			iounmap(host->base);
-	}
-
-	if (mmc)
-		mmc_free_host(mmc);
-
-	if (mem)
-		release_resource(mem);
-
-	dev_dbg(&pdev->dev, "probe err %d\n", ret);
+	davinci_release_dma_channels(host);
+dma_probe_defer:
+	clk_disable_unprepare(host->clk);
+clk_prepare_enable_fail:
+clk_get_fail:
+ioremap_fail:
+	mmc_free_host(mmc);
 
 	return ret;
 }
@@ -1397,25 +1366,11 @@
 {
 	struct mmc_davinci_host *host = platform_get_drvdata(pdev);
 
-	if (host) {
-		mmc_davinci_cpufreq_deregister(host);
-
-		mmc_remove_host(host->mmc);
-		free_irq(host->mmc_irq, host);
-		if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
-			free_irq(host->sdio_irq, host);
-
-		davinci_release_dma_channels(host);
-
-		clk_disable(host->clk);
-		clk_put(host->clk);
-
-		iounmap(host->base);
-
-		release_resource(host->mem_res);
-
-		mmc_free_host(host->mmc);
-	}
+	mmc_remove_host(host->mmc);
+	mmc_davinci_cpufreq_deregister(host);
+	davinci_release_dma_channels(host);
+	clk_disable_unprepare(host->clk);
+	mmc_free_host(host->mmc);
 
 	return 0;
 }

diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 8790f2a..7e3a324 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c

@@ -91,10 +91,14 @@
 		return SDMMC_CLKSEL_GET_DIV(mci_readl(host, CLKSEL)) + 1;
 }
 
-static int dw_mci_exynos_priv_init(struct dw_mci *host)
+static void dw_mci_exynos_config_smu(struct dw_mci *host)
 {
 	struct dw_mci_exynos_priv_data *priv = host->priv;
 
+	/*
+	 * If Exynos is provided the Security management,
+	 * set for non-ecryption mode at this time.
+	 */
 	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS5420_SMU ||
 		priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU) {
 		mci_writel(host, MPSBEGIN0, 0);
@@ -104,6 +108,13 @@
 			   SDMMC_MPSCTRL_VALID |
 			   SDMMC_MPSCTRL_NON_SECURE_WRITE_BIT);
 	}
+}
+
+static int dw_mci_exynos_priv_init(struct dw_mci *host)
+{
+	struct dw_mci_exynos_priv_data *priv = host->priv;
+
+	dw_mci_exynos_config_smu(host);
 
 	if (priv->ctrl_type >= DW_MCI_TYPE_EXYNOS5420) {
 		priv->saved_strobe_ctrl = mci_readl(host, HS400_DLINE_CTRL);
@@ -115,13 +126,6 @@
 				DQS_CTRL_GET_RD_DELAY(priv->saved_strobe_ctrl);
 	}
 
-	return 0;
-}
-
-static int dw_mci_exynos_setup_clock(struct dw_mci *host)
-{
-	struct dw_mci_exynos_priv_data *priv = host->priv;
-
 	host->bus_hz /= (priv->ciu_div + 1);
 
 	return 0;
@@ -169,7 +173,7 @@
 {
 	struct dw_mci *host = dev_get_drvdata(dev);
 
-	dw_mci_exynos_priv_init(host);
+	dw_mci_exynos_config_smu(host);
 	return dw_mci_resume(host);
 }
 
@@ -489,7 +493,6 @@
 static const struct dw_mci_drv_data exynos_drv_data = {
 	.caps			= exynos_dwmmc_caps,
 	.init			= dw_mci_exynos_priv_init,
-	.setup_clock		= dw_mci_exynos_setup_clock,
 	.set_ios		= dw_mci_exynos_set_ios,
 	.parse_dt		= dw_mci_exynos_parse_dt,
 	.execute_tuning		= dw_mci_exynos_execute_tuning,

diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 84e50f3..8c20b81 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c

@@ -26,13 +26,6 @@
 	int			default_sample_phase;
 };
 
-static int dw_mci_rk3288_setup_clock(struct dw_mci *host)
-{
-	host->bus_hz /= RK3288_CLKGEN_DIV;
-
-	return 0;
-}
-
 static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios)
 {
 	struct dw_mci_rockchip_priv_data *priv = host->priv;
@@ -231,18 +224,30 @@
 	/* It needs this quirk on all Rockchip SoCs */
 	host->pdata->quirks |= DW_MCI_QUIRK_BROKEN_DTO;
 
+	if (of_device_is_compatible(host->dev->of_node,
+				    "rockchip,rk3288-dw-mshc"))
+		host->bus_hz /= RK3288_CLKGEN_DIV;
+
 	return 0;
 }
 
+/* Common capabilities of RK3288 SoC */
+static unsigned long dw_mci_rk3288_dwmmc_caps[4] = {
+	MMC_CAP_ERASE,
+	MMC_CAP_ERASE,
+	MMC_CAP_ERASE,
+	MMC_CAP_ERASE,
+};
+
 static const struct dw_mci_drv_data rk2928_drv_data = {
 	.init			= dw_mci_rockchip_init,
 };
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
+	.caps			= dw_mci_rk3288_dwmmc_caps,
 	.set_ios		= dw_mci_rk3288_set_ios,
 	.execute_tuning		= dw_mci_rk3288_execute_tuning,
 	.parse_dt		= dw_mci_rk3288_parse_dt,
-	.setup_clock    = dw_mci_rk3288_setup_clock,
 	.init			= dw_mci_rockchip_init,
 };
 
@@ -269,33 +274,13 @@
 	return dw_mci_pltfm_register(pdev, drv_data);
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int dw_mci_rockchip_suspend(struct device *dev)
-{
-	struct dw_mci *host = dev_get_drvdata(dev);
-
-	return dw_mci_suspend(host);
-}
-
-static int dw_mci_rockchip_resume(struct device *dev)
-{
-	struct dw_mci *host = dev_get_drvdata(dev);
-
-	return dw_mci_resume(host);
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(dw_mci_rockchip_pmops,
-			 dw_mci_rockchip_suspend,
-			 dw_mci_rockchip_resume);
-
 static struct platform_driver dw_mci_rockchip_pltfm_driver = {
 	.probe		= dw_mci_rockchip_probe,
 	.remove		= dw_mci_pltfm_remove,
 	.driver		= {
 		.name		= "dwmmc_rockchip",
 		.of_match_table	= dw_mci_rockchip_match,
-		.pm		= &dw_mci_rockchip_pmops,
+		.pm		= &dw_mci_pltfm_pmops,
 	},
 };
 

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 242f9a0..9dd1bd3 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c

@@ -680,7 +680,7 @@
 
 static void dw_mci_edmac_stop_dma(struct dw_mci *host)
 {
-	dmaengine_terminate_all(host->dms->ch);
+	dmaengine_terminate_async(host->dms->ch);
 }
 
 static int dw_mci_edmac_start_dma(struct dw_mci *host,
@@ -3003,15 +3003,6 @@
 		}
 	}
 
-	if (drv_data && drv_data->setup_clock) {
-		ret = drv_data->setup_clock(host);
-		if (ret) {
-			dev_err(host->dev,
-				"implementation specific clock setup failed\n");
-			goto err_clk_ciu;
-		}
-	}
-
 	setup_timer(&host->cmd11_timer,
 		    dw_mci_cmd11_timer, (unsigned long)host);
 

diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 68d5da2..1e8d838 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h

@@ -277,7 +277,6 @@
  * dw_mci driver data - dw-mshc implementation specific driver data.
  * @caps: mmc subsystem specified capabilities of the controller(s).
  * @init: early implementation specific initialization.
- * @setup_clock: implementation specific clock configuration.
  * @set_ios: handle bus specific extensions.
  * @parse_dt: parse implementation specific device tree properties.
  * @execute_tuning: implementation specific tuning procedure.
@@ -289,7 +288,6 @@
 struct dw_mci_drv_data {
 	unsigned long	*caps;
 	int		(*init)(struct dw_mci *host);
-	int		(*setup_clock)(struct dw_mci *host);
 	void		(*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
 	int		(*parse_dt)(struct dw_mci *host);
 	int		(*execute_tuning)(struct dw_mci_slot *slot, u32 opcode);

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 2e6c968..df990bb 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c

@@ -226,16 +226,11 @@
 	unsigned long flags;
 	int busy = 0;
 
-	pm_runtime_get_sync(mmc_dev(mmc));
-
 	spin_lock_irqsave(&host->lock, flags);
 	if (readl(host->base + MMCISTATUS) & MCI_ST_CARDBUSY)
 		busy = 1;
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	pm_runtime_mark_last_busy(mmc_dev(mmc));
-	pm_runtime_put_autosuspend(mmc_dev(mmc));
-
 	return busy;
 }
 
@@ -381,9 +376,6 @@
 	host->cmd = NULL;
 
 	mmc_request_done(host->mmc, mrq);
-
-	pm_runtime_mark_last_busy(mmc_dev(host->mmc));
-	pm_runtime_put_autosuspend(mmc_dev(host->mmc));
 }
 
 static void mmci_set_mask1(struct mmci_host *host, unsigned int mask)
@@ -1290,8 +1282,6 @@
 		return;
 	}
 
-	pm_runtime_get_sync(mmc_dev(mmc));
-
 	spin_lock_irqsave(&host->lock, flags);
 
 	host->mrq = mrq;
@@ -1318,8 +1308,6 @@
 	unsigned long flags;
 	int ret;
 
-	pm_runtime_get_sync(mmc_dev(mmc));
-
 	if (host->plat->ios_handler &&
 		host->plat->ios_handler(mmc_dev(mmc), ios))
 			dev_err(mmc_dev(mmc), "platform ios_handler failed\n");
@@ -1414,9 +1402,6 @@
 	mmci_reg_delay(host);
 
 	spin_unlock_irqrestore(&host->lock, flags);
-
-	pm_runtime_mark_last_busy(mmc_dev(mmc));
-	pm_runtime_put_autosuspend(mmc_dev(mmc));
 }
 
 static int mmci_get_cd(struct mmc_host *mmc)
@@ -1440,8 +1425,6 @@
 
 	if (!IS_ERR(mmc->supply.vqmmc)) {
 
-		pm_runtime_get_sync(mmc_dev(mmc));
-
 		switch (ios->signal_voltage) {
 		case MMC_SIGNAL_VOLTAGE_330:
 			ret = regulator_set_voltage(mmc->supply.vqmmc,
@@ -1459,9 +1442,6 @@
 
 		if (ret)
 			dev_warn(mmc_dev(mmc), "Voltage switch failed\n");
-
-		pm_runtime_mark_last_busy(mmc_dev(mmc));
-		pm_runtime_put_autosuspend(mmc_dev(mmc));
 	}
 
 	return ret;

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index b17f30d..5642f71 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c

@@ -736,9 +736,6 @@
 	if (mrq->data)
 		msdc_unprepare_data(host, mrq);
 	mmc_request_done(host->mmc, mrq);
-
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
 }
 
 /* returns true if command is fully handled; returns false otherwise */
@@ -886,8 +883,6 @@
 	WARN_ON(host->mrq);
 	host->mrq = mrq;
 
-	pm_runtime_get_sync(host->dev);
-
 	if (mrq->data)
 		msdc_prepare_data(host, mrq);
 
@@ -1201,8 +1196,6 @@
 	struct msdc_host *host = mmc_priv(mmc);
 	int ret;
 
-	pm_runtime_get_sync(host->dev);
-
 	msdc_set_buswidth(host, ios->bus_width);
 
 	/* Suspend/Resume will do power off/on */
@@ -1214,7 +1207,7 @@
 					ios->vdd);
 			if (ret) {
 				dev_err(host->dev, "Failed to set vmmc power!\n");
-				goto end;
+				return;
 			}
 		}
 		break;
@@ -1242,10 +1235,6 @@
 
 	if (host->mclk != ios->clock || host->timing != ios->timing)
 		msdc_set_mclk(host, ios->timing, ios->clock);
-
-end:
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
 }
 
 static u32 test_delay_bit(u32 delay, u32 bit)
@@ -1408,19 +1397,15 @@
 	struct msdc_host *host = mmc_priv(mmc);
 	int ret;
 
-	pm_runtime_get_sync(host->dev);
 	ret = msdc_tune_response(mmc, opcode);
 	if (ret == -EIO) {
 		dev_err(host->dev, "Tune response fail!\n");
-		goto out;
+		return ret;
 	}
 	ret = msdc_tune_data(mmc, opcode);
 	if (ret == -EIO)
 		dev_err(host->dev, "Tune data fail!\n");
 
-out:
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
 	return ret;
 }
 

diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index b9958a1..f23d65e 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c

@@ -23,7 +23,6 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/of.h>
-#include <linux/omap-dma.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/mmc.h>
@@ -1321,8 +1320,6 @@
 	struct omap_mmc_platform_data *pdata = pdev->dev.platform_data;
 	struct mmc_omap_host *host = NULL;
 	struct resource *res;
-	dma_cap_mask_t mask;
-	unsigned sig = 0;
 	int i, ret = 0;
 	int irq;
 
@@ -1382,29 +1379,34 @@
 		goto err_free_iclk;
 	}
 
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-
 	host->dma_tx_burst = -1;
 	host->dma_rx_burst = -1;
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "tx");
-	if (res)
-		sig = res->start;
-	host->dma_tx = dma_request_slave_channel_compat(mask,
-				omap_dma_filter_fn, &sig, &pdev->dev, "tx");
-	if (!host->dma_tx)
-		dev_warn(host->dev, "unable to obtain TX DMA engine channel %u\n",
-			sig);
+	host->dma_tx = dma_request_chan(&pdev->dev, "tx");
+	if (IS_ERR(host->dma_tx)) {
+		ret = PTR_ERR(host->dma_tx);
+		if (ret == -EPROBE_DEFER) {
+			clk_put(host->fclk);
+			goto err_free_iclk;
+		}
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "rx");
-	if (res)
-		sig = res->start;
-	host->dma_rx = dma_request_slave_channel_compat(mask,
-				omap_dma_filter_fn, &sig, &pdev->dev, "rx");
-	if (!host->dma_rx)
-		dev_warn(host->dev, "unable to obtain RX DMA engine channel %u\n",
-			sig);
+		host->dma_tx = NULL;
+		dev_warn(host->dev, "TX DMA channel request failed\n");
+	}
+
+	host->dma_rx = dma_request_chan(&pdev->dev, "rx");
+	if (IS_ERR(host->dma_rx)) {
+		ret = PTR_ERR(host->dma_rx);
+		if (ret == -EPROBE_DEFER) {
+			if (host->dma_tx)
+				dma_release_channel(host->dma_tx);
+			clk_put(host->fclk);
+			goto err_free_iclk;
+		}
+
+		host->dma_rx = NULL;
+		dev_warn(host->dev, "RX DMA channel request failed\n");
+	}
 
 	ret = request_irq(host->irq, mmc_omap_irq, 0, DRIVER_NAME, host);
 	if (ret)

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f9ac3bb..24ebc9a 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c

@@ -32,7 +32,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
-#include <linux/omap-dmaengine.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
@@ -351,15 +350,14 @@
 	return 0;
 }
 
-static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
+static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
+				int vdd)
 {
-	struct omap_hsmmc_host *host =
-		platform_get_drvdata(to_platform_device(dev));
 	struct mmc_host *mmc = host->mmc;
 	int ret = 0;
 
 	if (mmc_pdata(host)->set_power)
-		return mmc_pdata(host)->set_power(dev, power_on, vdd);
+		return mmc_pdata(host)->set_power(host->dev, power_on, vdd);
 
 	/*
 	 * If we don't see a Vcc regulator, assume it's a fixed
@@ -369,7 +367,7 @@
 		return 0;
 
 	if (mmc_pdata(host)->before_set_reg)
-		mmc_pdata(host)->before_set_reg(dev, power_on, vdd);
+		mmc_pdata(host)->before_set_reg(host->dev, power_on, vdd);
 
 	ret = omap_hsmmc_set_pbias(host, false, 0);
 	if (ret)
@@ -403,7 +401,7 @@
 	}
 
 	if (mmc_pdata(host)->after_set_reg)
-		mmc_pdata(host)->after_set_reg(dev, power_on, vdd);
+		mmc_pdata(host)->after_set_reg(host->dev, power_on, vdd);
 
 	return 0;
 
@@ -968,8 +966,6 @@
 		return;
 	host->mrq = NULL;
 	mmc_request_done(host->mmc, mrq);
-	pm_runtime_mark_last_busy(host->dev);
-	pm_runtime_put_autosuspend(host->dev);
 }
 
 /*
@@ -1250,17 +1246,15 @@
 	int ret;
 
 	/* Disable the clocks */
-	pm_runtime_put_sync(host->dev);
 	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
 
 	/* Turn the power off */
-	ret = omap_hsmmc_set_power(host->dev, 0, 0);
+	ret = omap_hsmmc_set_power(host, 0, 0);
 
 	/* Turn the power ON with given VDD 1.8 or 3.0v */
 	if (!ret)
-		ret = omap_hsmmc_set_power(host->dev, 1, vdd);
-	pm_runtime_get_sync(host->dev);
+		ret = omap_hsmmc_set_power(host, 1, vdd);
 	if (host->dbclk)
 		clk_prepare_enable(host->dbclk);
 
@@ -1368,8 +1362,6 @@
 
 		host->mrq = NULL;
 		mmc_request_done(host->mmc, mrq);
-		pm_runtime_mark_last_busy(host->dev);
-		pm_runtime_put_autosuspend(host->dev);
 	}
 }
 
@@ -1602,7 +1594,6 @@
 
 	BUG_ON(host->req_in_progress);
 	BUG_ON(host->dma_ch != -1);
-	pm_runtime_get_sync(host->dev);
 	if (host->protect_card) {
 		if (host->reqs_blocked < 3) {
 			/*
@@ -1619,8 +1610,6 @@
 			req->data->error = -EBADF;
 		req->cmd->retries = 0;
 		mmc_request_done(mmc, req);
-		pm_runtime_mark_last_busy(host->dev);
-		pm_runtime_put_autosuspend(host->dev);
 		return;
 	} else if (host->reqs_blocked)
 		host->reqs_blocked = 0;
@@ -1634,8 +1623,6 @@
 			req->data->error = err;
 		host->mrq = NULL;
 		mmc_request_done(mmc, req);
-		pm_runtime_mark_last_busy(host->dev);
-		pm_runtime_put_autosuspend(host->dev);
 		return;
 	}
 	if (req->sbc && !(host->flags & AUTO_CMD23)) {
@@ -1653,15 +1640,13 @@
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 	int do_send_init_stream = 0;
 
-	pm_runtime_get_sync(host->dev);
-
 	if (ios->power_mode != host->power_mode) {
 		switch (ios->power_mode) {
 		case MMC_POWER_OFF:
-			omap_hsmmc_set_power(host->dev, 0, 0);
+			omap_hsmmc_set_power(host, 0, 0);
 			break;
 		case MMC_POWER_UP:
-			omap_hsmmc_set_power(host->dev, 1, ios->vdd);
+			omap_hsmmc_set_power(host, 1, ios->vdd);
 			break;
 		case MMC_POWER_ON:
 			do_send_init_stream = 1;
@@ -1698,8 +1683,6 @@
 		send_init_stream(host);
 
 	omap_hsmmc_set_bus_mode(host);
-
-	pm_runtime_put_autosuspend(host->dev);
 }
 
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
@@ -1962,13 +1945,17 @@
 
 static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 {
-	struct omap_hsmmc_platform_data *pdata;
+	struct omap_hsmmc_platform_data *pdata, *legacy;
 	struct device_node *np = dev->of_node;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return ERR_PTR(-ENOMEM); /* out of memory */
 
+	legacy = dev_get_platdata(dev);
+	if (legacy && legacy->name)
+		pdata->name = legacy->name;
+
 	if (of_find_property(np, "ti,dual-volt", NULL))
 		pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
 
@@ -2005,8 +1992,6 @@
 	struct resource *res;
 	int ret, irq;
 	const struct of_device_id *match;
-	dma_cap_mask_t mask;
-	unsigned tx_req, rx_req;
 	const struct omap_mmc_of_data *data;
 	void __iomem *base;
 
@@ -2136,44 +2121,17 @@
 
 	omap_hsmmc_conf_bus_power(host);
 
-	if (!pdev->dev.of_node) {
-		res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "tx");
-		if (!res) {
-			dev_err(mmc_dev(host->mmc), "cannot get DMA TX channel\n");
-			ret = -ENXIO;
-			goto err_irq;
-		}
-		tx_req = res->start;
-
-		res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "rx");
-		if (!res) {
-			dev_err(mmc_dev(host->mmc), "cannot get DMA RX channel\n");
-			ret = -ENXIO;
-			goto err_irq;
-		}
-		rx_req = res->start;
-	}
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-
-	host->rx_chan =
-		dma_request_slave_channel_compat(mask, omap_dma_filter_fn,
-						 &rx_req, &pdev->dev, "rx");
-
-	if (!host->rx_chan) {
-		dev_err(mmc_dev(host->mmc), "unable to obtain RX DMA engine channel\n");
-		ret = -ENXIO;
+	host->rx_chan = dma_request_chan(&pdev->dev, "rx");
+	if (IS_ERR(host->rx_chan)) {
+		dev_err(mmc_dev(host->mmc), "RX DMA channel request failed\n");
+		ret = PTR_ERR(host->rx_chan);
 		goto err_irq;
 	}
 
-	host->tx_chan =
-		dma_request_slave_channel_compat(mask, omap_dma_filter_fn,
-						 &tx_req, &pdev->dev, "tx");
-
-	if (!host->tx_chan) {
-		dev_err(mmc_dev(host->mmc), "unable to obtain TX DMA engine channel\n");
-		ret = -ENXIO;
+	host->tx_chan = dma_request_chan(&pdev->dev, "tx");
+	if (IS_ERR(host->tx_chan)) {
+		dev_err(mmc_dev(host->mmc), "TX DMA channel request failed\n");
+		ret = PTR_ERR(host->tx_chan);
 		goto err_irq;
 	}
 
@@ -2231,9 +2189,9 @@
 	mmc_remove_host(mmc);
 err_irq:
 	device_init_wakeup(&pdev->dev, false);
-	if (host->tx_chan)
+	if (!IS_ERR_OR_NULL(host->tx_chan))
 		dma_release_channel(host->tx_chan);
-	if (host->rx_chan)
+	if (!IS_ERR_OR_NULL(host->rx_chan))
 		dma_release_channel(host->rx_chan);
 	pm_runtime_dont_use_autosuspend(host->dev);
 	pm_runtime_put_sync(host->dev);

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index bed6a49..b2d70ba 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c

@@ -200,8 +200,6 @@
 	if (!gpio_cd)
 		return 0;
 
-	pm_runtime_get_sync(mmc->parent);
-
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD)
@@ -211,9 +209,6 @@
 out:
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	pm_runtime_mark_last_busy(mmc->parent);
-	pm_runtime_put_autosuspend(mmc->parent);
-
 	return ret;
 }
 
@@ -267,8 +262,10 @@
 
 	/* Platform specific code during sd probe slot goes here */
 
-	if (hid && !strcmp(hid, "80865ACA"))
+	if (hid && !strcmp(hid, "80865ACA")) {
 		host->mmc_host_ops.get_cd = bxt_get_cd;
+		host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
+	}
 
 	return 0;
 }

diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 2e482b1..b6f4c1d 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c

@@ -55,8 +55,32 @@
 	return freq;
 }
 
+static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
+	bool ctrl_phy = false;
+
+	if (clock > MMC_HIGH_52_MAX_DTR && (!IS_ERR(sdhci_arasan->phy)))
+		ctrl_phy = true;
+
+	if (ctrl_phy) {
+		spin_unlock_irq(&host->lock);
+		phy_power_off(sdhci_arasan->phy);
+		spin_lock_irq(&host->lock);
+	}
+
+	sdhci_set_clock(host, clock);
+
+	if (ctrl_phy) {
+		spin_unlock_irq(&host->lock);
+		phy_power_on(sdhci_arasan->phy);
+		spin_lock_irq(&host->lock);
+	}
+}
+
 static struct sdhci_ops sdhci_arasan_ops = {
-	.set_clock = sdhci_set_clock,
+	.set_clock = sdhci_arasan_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_timeout_clock = sdhci_arasan_get_timeout_clock,
 	.set_bus_width = sdhci_set_bus_width,

diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 2703aa9..25f779e 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c

@@ -15,8 +15,10 @@
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/kernel.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
@@ -31,14 +33,60 @@
 #define		SDMMC_CACR_CAPWREN	BIT(0)
 #define		SDMMC_CACR_KEY		(0x46 << 8)
 
+#define SDHCI_AT91_PRESET_COMMON_CONF	0x400 /* drv type B, programmable clock mode */
+
 struct sdhci_at91_priv {
 	struct clk *hclock;
 	struct clk *gck;
 	struct clk *mainck;
 };
 
+static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+	u16 clk;
+	unsigned long timeout;
+
+	host->mmc->actual_clock = 0;
+
+	/*
+	 * There is no requirement to disable the internal clock before
+	 * changing the SD clock configuration. Moreover, disabling the
+	 * internal clock, changing the configuration and re-enabling the
+	 * internal clock causes some bugs. It can prevent to get the internal
+	 * clock stable flag ready and an unexpected switch to the base clock
+	 * when using presets.
+	 */
+	clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+	clk &= SDHCI_CLOCK_INT_EN;
+	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+	if (clock == 0)
+		return;
+
+	clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock);
+
+	clk |= SDHCI_CLOCK_INT_EN;
+	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+	/* Wait max 20 ms */
+	timeout = 20;
+	while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
+		& SDHCI_CLOCK_INT_STABLE)) {
+		if (timeout == 0) {
+			pr_err("%s: Internal clock never stabilised.\n",
+			       mmc_hostname(host->mmc));
+			return;
+		}
+		timeout--;
+		mdelay(1);
+	}
+
+	clk |= SDHCI_CLOCK_CARD_EN;
+	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+}
+
 static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
-	.set_clock		= sdhci_set_clock,
+	.set_clock		= sdhci_at91_set_clock,
 	.set_bus_width		= sdhci_set_bus_width,
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
@@ -46,7 +94,6 @@
 
 static const struct sdhci_pltfm_data soc_data_sama5d2 = {
 	.ops = &sdhci_at91_sama5d2_ops,
-	.quirks2 = SDHCI_QUIRK2_NEED_DELAY_AFTER_INT_CLK_RST,
 };
 
 static const struct of_device_id sdhci_at91_dt_match[] = {
@@ -119,6 +166,7 @@
 	unsigned int			clk_base, clk_mul;
 	unsigned int			gck_rate, real_gck_rate;
 	int				ret;
+	unsigned int			preset_div;
 
 	match = of_match_device(sdhci_at91_dt_match, &pdev->dev);
 	if (!match)
@@ -186,6 +234,28 @@
 			 clk_mul, real_gck_rate);
 	}
 
+	/*
+	 * We have to set preset values because it depends on the clk_mul
+	 * value. Moreover, SDR104 is supported in a degraded mode since the
+	 * maximum sd clock value is 120 MHz instead of 208 MHz. For that
+	 * reason, we need to use presets to support SDR104.
+	 */
+	preset_div = DIV_ROUND_UP(real_gck_rate, 24000000) - 1;
+	writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+	       host->ioaddr + SDHCI_PRESET_FOR_SDR12);
+	preset_div = DIV_ROUND_UP(real_gck_rate, 50000000) - 1;
+	writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+	       host->ioaddr + SDHCI_PRESET_FOR_SDR25);
+	preset_div = DIV_ROUND_UP(real_gck_rate, 100000000) - 1;
+	writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+	       host->ioaddr + SDHCI_PRESET_FOR_SDR50);
+	preset_div = DIV_ROUND_UP(real_gck_rate, 120000000) - 1;
+	writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+	       host->ioaddr + SDHCI_PRESET_FOR_SDR104);
+	preset_div = DIV_ROUND_UP(real_gck_rate, 50000000) - 1;
+	writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+	       host->ioaddr + SDHCI_PRESET_FOR_DDR50);
+
 	clk_prepare_enable(priv->mainck);
 	clk_prepare_enable(priv->gck);
 

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 79e1901..97d4eeb 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c

@@ -340,8 +340,6 @@
 	if (!gpio_cd)
 		return 0;
 
-	pm_runtime_get_sync(mmc->parent);
-
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD)
@@ -351,9 +349,6 @@
 out:
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	pm_runtime_mark_last_busy(mmc->parent);
-	pm_runtime_put_autosuspend(mmc->parent);
-
 	return ret;
 }
 
@@ -391,8 +386,10 @@
 	slot->cd_override_level = true;
 	if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD ||
 	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXTM_SD ||
-	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD)
+	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD) {
 		slot->host->mmc_host_ops.get_cd = bxt_get_cd;
+		slot->host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
+	}
 
 	return 0;
 }

diff --git a/drivers/mmc/host/sdhci-pic32.c b/drivers/mmc/host/sdhci-pic32.c
index 059df70..72c13b6 100644
--- a/drivers/mmc/host/sdhci-pic32.c
+++ b/drivers/mmc/host/sdhci-pic32.c

@@ -243,7 +243,6 @@
 static struct platform_driver pic32_sdhci_driver = {
 	.driver = {
 		.name	= "pic32-sdhci",
-		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(pic32_sdhci_id_table),
 	},
 	.probe		= pic32_sdhci_probe,

diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 072bb27..64f287a 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c

@@ -119,16 +119,22 @@
 {
 	struct sdhci_host *host;
 	struct resource *iomem;
-	int ret;
+	void __iomem *ioaddr;
+	int irq, ret;
 
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!iomem) {
-		ret = -ENOMEM;
+	ioaddr = devm_ioremap_resource(&pdev->dev, iomem);
+	if (IS_ERR(ioaddr)) {
+		ret = PTR_ERR(ioaddr);
 		goto err;
 	}
 
-	if (resource_size(iomem) < 0x100)
-		dev_err(&pdev->dev, "Invalid iomem size!\n");
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ number\n");
+		ret = irq;
+		goto err;
+	}
 
 	host = sdhci_alloc_host(&pdev->dev,
 		sizeof(struct sdhci_pltfm_host) + priv_size);
@@ -138,6 +144,8 @@
 		goto err;
 	}
 
+	host->ioaddr = ioaddr;
+	host->irq = irq;
 	host->hw_name = dev_name(&pdev->dev);
 	if (pdata && pdata->ops)
 		host->ops = pdata->ops;
@@ -148,22 +156,6 @@
 		host->quirks2 = pdata->quirks2;
 	}
 
-	host->irq = platform_get_irq(pdev, 0);
-
-	if (!request_mem_region(iomem->start, resource_size(iomem),
-		mmc_hostname(host->mmc))) {
-		dev_err(&pdev->dev, "cannot request region\n");
-		ret = -EBUSY;
-		goto err_request;
-	}
-
-	host->ioaddr = ioremap(iomem->start, resource_size(iomem));
-	if (!host->ioaddr) {
-		dev_err(&pdev->dev, "failed to remap registers\n");
-		ret = -ENOMEM;
-		goto err_remap;
-	}
-
 	/*
 	 * Some platforms need to probe the controller to be able to
 	 * determine which caps should be used.
@@ -174,11 +166,6 @@
 	platform_set_drvdata(pdev, host);
 
 	return host;
-
-err_remap:
-	release_mem_region(iomem->start, resource_size(iomem));
-err_request:
-	sdhci_free_host(host);
 err:
 	dev_err(&pdev->dev, "%s failed %d\n", __func__, ret);
 	return ERR_PTR(ret);
@@ -188,10 +175,7 @@
 void sdhci_pltfm_free(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	iounmap(host->ioaddr);
-	release_mem_region(iomem->start, resource_size(iomem));
 	sdhci_free_host(host);
 }
 EXPORT_SYMBOL_GPL(sdhci_pltfm_free);

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6bd3d17..e010ea4 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c

@@ -38,11 +38,6 @@
 #define DBG(f, x...) \
 	pr_debug(DRIVER_NAME " [%s()]: " f, __func__,## x)
 
-#if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \
-	defined(CONFIG_MMC_SDHCI_MODULE))
-#define SDHCI_USE_LEDS_CLASS
-#endif
-
 #define MAX_TUNING_LOOP 40
 
 static unsigned int debug_quirks = 0;
@@ -53,29 +48,7 @@
 static void sdhci_finish_command(struct sdhci_host *);
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
-static int sdhci_do_get_cd(struct sdhci_host *host);
-
-#ifdef CONFIG_PM
-static int sdhci_runtime_pm_get(struct sdhci_host *host);
-static int sdhci_runtime_pm_put(struct sdhci_host *host);
-static void sdhci_runtime_pm_bus_on(struct sdhci_host *host);
-static void sdhci_runtime_pm_bus_off(struct sdhci_host *host);
-#else
-static inline int sdhci_runtime_pm_get(struct sdhci_host *host)
-{
-	return 0;
-}
-static inline int sdhci_runtime_pm_put(struct sdhci_host *host)
-{
-	return 0;
-}
-static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
-{
-}
-static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
-{
-}
-#endif
+static int sdhci_get_cd(struct mmc_host *mmc);
 
 static void sdhci_dumpregs(struct sdhci_host *host)
 {
@@ -171,6 +144,22 @@
 	sdhci_set_card_detection(host, false);
 }
 
+static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
+{
+	if (host->bus_on)
+		return;
+	host->bus_on = true;
+	pm_runtime_get_noresume(host->mmc->parent);
+}
+
+static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
+{
+	if (!host->bus_on)
+		return;
+	host->bus_on = false;
+	pm_runtime_put_noidle(host->mmc->parent);
+}
+
 void sdhci_reset(struct sdhci_host *host, u8 mask)
 {
 	unsigned long timeout;
@@ -204,7 +193,7 @@
 static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 {
 	if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
-		if (!sdhci_do_get_cd(host))
+		if (!sdhci_get_cd(host->mmc))
 			return;
 	}
 
@@ -252,7 +241,7 @@
 	sdhci_enable_card_detection(host);
 }
 
-static void sdhci_activate_led(struct sdhci_host *host)
+static void __sdhci_led_activate(struct sdhci_host *host)
 {
 	u8 ctrl;
 
@@ -261,7 +250,7 @@
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 }
 
-static void sdhci_deactivate_led(struct sdhci_host *host)
+static void __sdhci_led_deactivate(struct sdhci_host *host)
 {
 	u8 ctrl;
 
@@ -270,9 +259,9 @@
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 }
 
-#ifdef SDHCI_USE_LEDS_CLASS
+#if IS_REACHABLE(CONFIG_LEDS_CLASS)
 static void sdhci_led_control(struct led_classdev *led,
-	enum led_brightness brightness)
+			      enum led_brightness brightness)
 {
 	struct sdhci_host *host = container_of(led, struct sdhci_host, led);
 	unsigned long flags;
@@ -283,12 +272,62 @@
 		goto out;
 
 	if (brightness == LED_OFF)
-		sdhci_deactivate_led(host);
+		__sdhci_led_deactivate(host);
 	else
-		sdhci_activate_led(host);
+		__sdhci_led_activate(host);
 out:
 	spin_unlock_irqrestore(&host->lock, flags);
 }
+
+static int sdhci_led_register(struct sdhci_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+
+	snprintf(host->led_name, sizeof(host->led_name),
+		 "%s::", mmc_hostname(mmc));
+
+	host->led.name = host->led_name;
+	host->led.brightness = LED_OFF;
+	host->led.default_trigger = mmc_hostname(mmc);
+	host->led.brightness_set = sdhci_led_control;
+
+	return led_classdev_register(mmc_dev(mmc), &host->led);
+}
+
+static void sdhci_led_unregister(struct sdhci_host *host)
+{
+	led_classdev_unregister(&host->led);
+}
+
+static inline void sdhci_led_activate(struct sdhci_host *host)
+{
+}
+
+static inline void sdhci_led_deactivate(struct sdhci_host *host)
+{
+}
+
+#else
+
+static inline int sdhci_led_register(struct sdhci_host *host)
+{
+	return 0;
+}
+
+static inline void sdhci_led_unregister(struct sdhci_host *host)
+{
+}
+
+static inline void sdhci_led_activate(struct sdhci_host *host)
+{
+	__sdhci_led_activate(host);
+}
+
+static inline void sdhci_led_deactivate(struct sdhci_host *host)
+{
+	__sdhci_led_deactivate(host);
+}
+
 #endif
 
 /*****************************************************************************\
@@ -1091,23 +1130,14 @@
 	return preset;
 }
 
-void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+u16 sdhci_calc_clk(struct sdhci_host *host, unsigned int clock,
+		   unsigned int *actual_clock)
 {
 	int div = 0; /* Initialized for compiler warning */
 	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
-	unsigned long timeout;
 	bool switch_base_clk = false;
 
-	host->mmc->actual_clock = 0;
-
-	sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
-	if (host->quirks2 & SDHCI_QUIRK2_NEED_DELAY_AFTER_INT_CLK_RST)
-		mdelay(1);
-
-	if (clock == 0)
-		return;
-
 	if (host->version >= SDHCI_SPEC_300) {
 		if (host->preset_enabled) {
 			u16 pre_val;
@@ -1184,10 +1214,29 @@
 
 clock_set:
 	if (real_div)
-		host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div;
+		*actual_clock = (host->max_clk * clk_mul) / real_div;
 	clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
 		<< SDHCI_DIVIDER_HI_SHIFT;
+
+	return clk;
+}
+EXPORT_SYMBOL_GPL(sdhci_calc_clk);
+
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+	u16 clk;
+	unsigned long timeout;
+
+	host->mmc->actual_clock = 0;
+
+	sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
+
+	if (clock == 0)
+		return;
+
+	clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock);
+
 	clk |= SDHCI_CLOCK_INT_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
@@ -1319,8 +1368,6 @@
 
 	host = mmc_priv(mmc);
 
-	sdhci_runtime_pm_get(host);
-
 	/* Firstly check card presence */
 	present = mmc->ops->get_cd(mmc);
 
@@ -1328,9 +1375,7 @@
 
 	WARN_ON(host->mrq != NULL);
 
-#ifndef SDHCI_USE_LEDS_CLASS
-	sdhci_activate_led(host);
-#endif
+	sdhci_led_activate(host);
 
 	/*
 	 * Ensure we don't send the STOP for non-SET_BLOCK_COUNTED
@@ -1405,11 +1450,11 @@
 }
 EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
 
-static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
+static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
+	struct sdhci_host *host = mmc_priv(mmc);
 	unsigned long flags;
 	u8 ctrl;
-	struct mmc_host *mmc = host->mmc;
 
 	spin_lock_irqsave(&host->lock, flags);
 
@@ -1563,18 +1608,10 @@
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
-static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+static int sdhci_get_cd(struct mmc_host *mmc)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
-
-	sdhci_runtime_pm_get(host);
-	sdhci_do_set_ios(host, ios);
-	sdhci_runtime_pm_put(host);
-}
-
-static int sdhci_do_get_cd(struct sdhci_host *host)
-{
-	int gpio_cd = mmc_gpio_get_cd(host->mmc);
+	int gpio_cd = mmc_gpio_get_cd(mmc);
 
 	if (host->flags & SDHCI_DEVICE_DEAD)
 		return 0;
@@ -1598,17 +1635,6 @@
 	return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
 }
 
-static int sdhci_get_cd(struct mmc_host *mmc)
-{
-	struct sdhci_host *host = mmc_priv(mmc);
-	int ret;
-
-	sdhci_runtime_pm_get(host);
-	ret = sdhci_do_get_cd(host);
-	sdhci_runtime_pm_put(host);
-	return ret;
-}
-
 static int sdhci_check_ro(struct sdhci_host *host)
 {
 	unsigned long flags;
@@ -1633,8 +1659,9 @@
 
 #define SAMPLE_COUNT	5
 
-static int sdhci_do_get_ro(struct sdhci_host *host)
+static int sdhci_get_ro(struct mmc_host *mmc)
 {
+	struct sdhci_host *host = mmc_priv(mmc);
 	int i, ro_count;
 
 	if (!(host->quirks & SDHCI_QUIRK_UNSTABLE_RO_DETECT))
@@ -1659,17 +1686,6 @@
 		host->ops->hw_reset(host);
 }
 
-static int sdhci_get_ro(struct mmc_host *mmc)
-{
-	struct sdhci_host *host = mmc_priv(mmc);
-	int ret;
-
-	sdhci_runtime_pm_get(host);
-	ret = sdhci_do_get_ro(host);
-	sdhci_runtime_pm_put(host);
-	return ret;
-}
-
 static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 {
 	if (!(host->flags & SDHCI_DEVICE_DEAD)) {
@@ -1689,8 +1705,6 @@
 	struct sdhci_host *host = mmc_priv(mmc);
 	unsigned long flags;
 
-	sdhci_runtime_pm_get(host);
-
 	spin_lock_irqsave(&host->lock, flags);
 	if (enable)
 		host->flags |= SDHCI_SDIO_IRQ_ENABLED;
@@ -1699,14 +1713,12 @@
 
 	sdhci_enable_sdio_irq_nolock(host, enable);
 	spin_unlock_irqrestore(&host->lock, flags);
-
-	sdhci_runtime_pm_put(host);
 }
 
-static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
-						struct mmc_ios *ios)
+static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+					     struct mmc_ios *ios)
 {
-	struct mmc_host *mmc = host->mmc;
+	struct sdhci_host *host = mmc_priv(mmc);
 	u16 ctrl;
 	int ret;
 
@@ -1794,29 +1806,13 @@
 	}
 }
 
-static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
-	struct mmc_ios *ios)
-{
-	struct sdhci_host *host = mmc_priv(mmc);
-	int err;
-
-	if (host->version < SDHCI_SPEC_300)
-		return 0;
-	sdhci_runtime_pm_get(host);
-	err = sdhci_do_start_signal_voltage_switch(host, ios);
-	sdhci_runtime_pm_put(host);
-	return err;
-}
-
 static int sdhci_card_busy(struct mmc_host *mmc)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 	u32 present_state;
 
-	sdhci_runtime_pm_get(host);
 	/* Check whether DAT[3:0] is 0000 */
 	present_state = sdhci_readl(host, SDHCI_PRESENT_STATE);
-	sdhci_runtime_pm_put(host);
 
 	return !(present_state & SDHCI_DATA_LVL_MASK);
 }
@@ -1843,7 +1839,6 @@
 	unsigned int tuning_count = 0;
 	bool hs400_tuning;
 
-	sdhci_runtime_pm_get(host);
 	spin_lock_irqsave(&host->lock, flags);
 
 	hs400_tuning = host->flags & SDHCI_HS400_TUNING;
@@ -1879,8 +1874,7 @@
 		break;
 
 	case MMC_TIMING_UHS_SDR50:
-		if (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
-		    host->flags & SDHCI_SDR104_NEEDS_TUNING)
+		if (host->flags & SDHCI_SDR50_NEEDS_TUNING)
 			break;
 		/* FALLTHROUGH */
 
@@ -1891,7 +1885,6 @@
 	if (host->ops->platform_execute_tuning) {
 		spin_unlock_irqrestore(&host->lock, flags);
 		err = host->ops->platform_execute_tuning(host, opcode);
-		sdhci_runtime_pm_put(host);
 		return err;
 	}
 
@@ -2023,8 +2016,6 @@
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 out_unlock:
 	spin_unlock_irqrestore(&host->lock, flags);
-	sdhci_runtime_pm_put(host);
-
 	return err;
 }
 
@@ -2105,7 +2096,7 @@
 	if (host->ops->card_event)
 		host->ops->card_event(host);
 
-	present = sdhci_do_get_cd(host);
+	present = sdhci_get_cd(host->mmc);
 
 	spin_lock_irqsave(&host->lock, flags);
 
@@ -2214,15 +2205,12 @@
 	host->cmd = NULL;
 	host->data = NULL;
 
-#ifndef SDHCI_USE_LEDS_CLASS
-	sdhci_deactivate_led(host);
-#endif
+	sdhci_led_deactivate(host);
 
 	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	mmc_request_done(host->mmc, mrq);
-	sdhci_runtime_pm_put(host);
 }
 
 static void sdhci_timeout_timer(unsigned long data)
@@ -2679,7 +2667,7 @@
 		sdhci_init(host, 0);
 		host->pwr = 0;
 		host->clock = 0;
-		sdhci_do_set_ios(host, &host->mmc->ios);
+		sdhci_set_ios(host->mmc, &host->mmc->ios);
 	} else {
 		sdhci_init(host, (host->mmc->pm_flags & MMC_PM_KEEP_POWER));
 		mmiowb();
@@ -2703,33 +2691,6 @@
 
 EXPORT_SYMBOL_GPL(sdhci_resume_host);
 
-static int sdhci_runtime_pm_get(struct sdhci_host *host)
-{
-	return pm_runtime_get_sync(host->mmc->parent);
-}
-
-static int sdhci_runtime_pm_put(struct sdhci_host *host)
-{
-	pm_runtime_mark_last_busy(host->mmc->parent);
-	return pm_runtime_put_autosuspend(host->mmc->parent);
-}
-
-static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
-{
-	if (host->bus_on)
-		return;
-	host->bus_on = true;
-	pm_runtime_get_noresume(host->mmc->parent);
-}
-
-static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
-{
-	if (!host->bus_on)
-		return;
-	host->bus_on = false;
-	pm_runtime_put_noidle(host->mmc->parent);
-}
-
 int sdhci_runtime_suspend_host(struct sdhci_host *host)
 {
 	unsigned long flags;
@@ -2768,8 +2729,8 @@
 	/* Force clock and power re-program */
 	host->pwr = 0;
 	host->clock = 0;
-	sdhci_do_start_signal_voltage_switch(host, &host->mmc->ios);
-	sdhci_do_set_ios(host, &host->mmc->ios);
+	sdhci_start_signal_voltage_switch(host->mmc, &host->mmc->ios);
+	sdhci_set_ios(host->mmc, &host->mmc->ios);
 
 	if ((host_flags & SDHCI_PV_ENABLED) &&
 		!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN)) {
@@ -3014,7 +2975,8 @@
 		if (!host->ops->get_max_clock) {
 			pr_err("%s: Hardware doesn't specify base clock frequency.\n",
 			       mmc_hostname(mmc));
-			return -ENODEV;
+			ret = -ENODEV;
+			goto undma;
 		}
 		host->max_clk = host->ops->get_max_clock(host);
 	}
@@ -3051,7 +3013,7 @@
 	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
-	if (!mmc->f_max || (mmc->f_max && (mmc->f_max > max_clk)))
+	if (!mmc->f_max || mmc->f_max > max_clk)
 		mmc->f_max = max_clk;
 
 	if (!(host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
@@ -3064,7 +3026,8 @@
 			} else {
 				pr_err("%s: Hardware doesn't specify timeout clock frequency.\n",
 					mmc_hostname(mmc));
-				return -ENODEV;
+				ret = -ENODEV;
+				goto undma;
 			}
 		}
 
@@ -3118,8 +3081,9 @@
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
 	/* If there are external regulators, get them */
-	if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		goto undma;
 
 	/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
 	if (!IS_ERR(mmc->supply.vqmmc)) {
@@ -3174,10 +3138,6 @@
 	if (caps[1] & SDHCI_USE_SDR50_TUNING)
 		host->flags |= SDHCI_SDR50_NEEDS_TUNING;
 
-	/* Does the host need tuning for SDR104 / HS200? */
-	if (mmc->caps2 & MMC_CAP2_HS200)
-		host->flags |= SDHCI_SDR104_NEEDS_TUNING;
-
 	/* Driver Type(s) (A, C, D) supported by the host */
 	if (caps[1] & SDHCI_DRIVER_TYPE_A)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
@@ -3276,7 +3236,8 @@
 	if (mmc->ocr_avail == 0) {
 		pr_err("%s: Hardware doesn't report any support voltages.\n",
 		       mmc_hostname(mmc));
-		return -ENODEV;
+		ret = -ENODEV;
+		goto unreg;
 	}
 
 	spin_lock_init(&host->lock);
@@ -3360,25 +3321,18 @@
 	sdhci_dumpregs(host);
 #endif
 
-#ifdef SDHCI_USE_LEDS_CLASS
-	snprintf(host->led_name, sizeof(host->led_name),
-		"%s::", mmc_hostname(mmc));
-	host->led.name = host->led_name;
-	host->led.brightness = LED_OFF;
-	host->led.default_trigger = mmc_hostname(mmc);
-	host->led.brightness_set = sdhci_led_control;
-
-	ret = led_classdev_register(mmc_dev(mmc), &host->led);
+	ret = sdhci_led_register(host);
 	if (ret) {
 		pr_err("%s: Failed to register LED device: %d\n",
 		       mmc_hostname(mmc), ret);
-		goto reset;
+		goto unirq;
 	}
-#endif
 
 	mmiowb();
 
-	mmc_add_host(mmc);
+	ret = mmc_add_host(mmc);
+	if (ret)
+		goto unled;
 
 	pr_info("%s: SDHCI controller on %s [%s] using %s\n",
 		mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
@@ -3390,15 +3344,25 @@
 
 	return 0;
 
-#ifdef SDHCI_USE_LEDS_CLASS
-reset:
+unled:
+	sdhci_led_unregister(host);
+unirq:
 	sdhci_do_reset(host, SDHCI_RESET_ALL);
 	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
 	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
-#endif
 untasklet:
 	tasklet_kill(&host->finish_tasklet);
+unreg:
+	if (!IS_ERR(mmc->supply.vqmmc))
+		regulator_disable(mmc->supply.vqmmc);
+undma:
+	if (host->align_buffer)
+		dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
+				  host->adma_table_sz, host->align_buffer,
+				  host->align_addr);
+	host->adma_table = NULL;
+	host->align_buffer = NULL;
 
 	return ret;
 }
@@ -3430,9 +3394,7 @@
 
 	mmc_remove_host(mmc);
 
-#ifdef SDHCI_USE_LEDS_CLASS
-	led_classdev_unregister(&host->led);
-#endif
+	sdhci_led_unregister(host);
 
 	if (!dead)
 		sdhci_do_reset(host, SDHCI_RESET_ALL);

diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0f39f4f..609f87c 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h

@@ -417,11 +417,6 @@
 #define SDHCI_QUIRK2_ACMD23_BROKEN			(1<<14)
 /* Broken Clock divider zero in controller */
 #define SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN		(1<<15)
-/*
- * When internal clock is disabled, a delay is needed before modifying the
- * SD clock frequency or enabling back the internal clock.
- */
-#define SDHCI_QUIRK2_NEED_DELAY_AFTER_INT_CLK_RST	(1<<16)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
@@ -433,7 +428,7 @@
 	struct mmc_host_ops mmc_host_ops;	/* MMC host ops */
 	u64 dma_mask;		/* custom DMA mask */
 
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#if IS_ENABLED(CONFIG_LEDS_CLASS)
 	struct led_classdev led;	/* LED control */
 	char led_name[32];
 #endif
@@ -450,7 +445,6 @@
 #define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
 #define SDHCI_PV_ENABLED	(1<<8)	/* Preset value enabled */
 #define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
-#define SDHCI_SDR104_NEEDS_TUNING (1<<10)	/* SDR104/HS200 needs tuning */
 #define SDHCI_USE_64_BIT_DMA	(1<<12)	/* Use 64-bit DMA */
 #define SDHCI_HS400_TUNING	(1<<13)	/* Tuning for HS400 */
 
@@ -661,6 +655,8 @@
 	return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED);
 }
 
+u16 sdhci_calc_clk(struct sdhci_host *host, unsigned int clock,
+		   unsigned int *actual_clock);
 void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
 void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 		     unsigned short vdd);

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index d9a655f..dd64b86 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c

@@ -248,7 +248,6 @@
 	int sg_idx;
 	int sg_blkidx;
 	bool power;
-	bool card_present;
 	bool ccs_enable;		/* Command Completion Signal support */
 	bool clk_ctrl2_enable;
 	struct mutex thread_lock;
@@ -1064,16 +1063,6 @@
 		host->mmc->f_max, host->mmc->f_min);
 }
 
-static void sh_mmcif_set_power(struct sh_mmcif_host *host, struct mmc_ios *ios)
-{
-	struct mmc_host *mmc = host->mmc;
-
-	if (!IS_ERR(mmc->supply.vmmc))
-		/* Errors ignored... */
-		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
-				      ios->power_mode ? ios->vdd : 0);
-}
-
 static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct sh_mmcif_host *host = mmc_priv(mmc);
@@ -1091,42 +1080,32 @@
 	host->state = STATE_IOS;
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	if (ios->power_mode == MMC_POWER_UP) {
-		if (!host->card_present) {
-			/* See if we also get DMA */
-			sh_mmcif_request_dma(host);
-			host->card_present = true;
-		}
-		sh_mmcif_set_power(host, ios);
-	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
-		/* clock stop */
-		sh_mmcif_clock_control(host, 0);
-		if (ios->power_mode == MMC_POWER_OFF) {
-			if (host->card_present) {
-				sh_mmcif_release_dma(host);
-				host->card_present = false;
-			}
-		}
-		if (host->power) {
-			pm_runtime_put_sync(dev);
-			clk_disable_unprepare(host->clk);
-			host->power = false;
-			if (ios->power_mode == MMC_POWER_OFF)
-				sh_mmcif_set_power(host, ios);
-		}
-		host->state = STATE_IDLE;
-		return;
-	}
-
-	if (ios->clock) {
+	switch (ios->power_mode) {
+	case MMC_POWER_UP:
+		if (!IS_ERR(mmc->supply.vmmc))
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
 		if (!host->power) {
 			clk_prepare_enable(host->clk);
-
 			pm_runtime_get_sync(dev);
-			host->power = true;
 			sh_mmcif_sync_reset(host);
+			sh_mmcif_request_dma(host);
+			host->power = true;
 		}
+		break;
+	case MMC_POWER_OFF:
+		if (!IS_ERR(mmc->supply.vmmc))
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+		if (host->power) {
+			sh_mmcif_clock_control(host, 0);
+			sh_mmcif_release_dma(host);
+			pm_runtime_put(dev);
+			clk_disable_unprepare(host->clk);
+			host->power = false;
+		}
+		break;
+	case MMC_POWER_ON:
 		sh_mmcif_clock_control(host, ios->clock);
+		break;
 	}
 
 	host->timing = ios->timing;
@@ -1519,23 +1498,23 @@
 
 	platform_set_drvdata(pdev, host);
 
-	pm_runtime_enable(dev);
-	host->power = false;
-
 	host->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(host->clk)) {
 		ret = PTR_ERR(host->clk);
 		dev_err(dev, "cannot get clock: %d\n", ret);
-		goto err_pm;
+		goto err_host;
 	}
 
 	ret = clk_prepare_enable(host->clk);
 	if (ret < 0)
-		goto err_pm;
+		goto err_host;
 
 	sh_mmcif_clk_setup(host);
 
-	ret = pm_runtime_resume(dev);
+	pm_runtime_enable(dev);
+	host->power = false;
+
+	ret = pm_runtime_get_sync(dev);
 	if (ret < 0)
 		goto err_clk;
 
@@ -1579,12 +1558,13 @@
 		 sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff,
 		 clk_get_rate(host->clk) / 1000000UL);
 
+	pm_runtime_put(dev);
 	clk_disable_unprepare(host->clk);
 	return ret;
 
 err_clk:
 	clk_disable_unprepare(host->clk);
-err_pm:
+	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
 err_host:
 	mmc_free_host(mmc);

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 9aa1479..f750f94 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c

@@ -28,10 +28,12 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/sh_dma.h>
 #include <linux/delay.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/pinctrl-state.h>
+#include <linux/regulator/consumer.h>
 
 #include "tmio_mmc.h"
 
@@ -48,10 +50,8 @@
 	unsigned bus_shift;
 };
 
-static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
-	{
-		.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT,
-	},
+static const struct sh_mobile_sdhi_of_data of_default_cfg = {
+	.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT,
 };
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = {
@@ -62,7 +62,7 @@
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
-			  TMIO_MMC_CLK_ACTUAL | TMIO_MMC_FAST_CLK_CHG,
+			  TMIO_MMC_CLK_ACTUAL | TMIO_MMC_MIN_RCAR2,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.dma_buswidth	= DMA_SLAVE_BUSWIDTH_4_BYTES,
 	.dma_rx_offset	= 0x2000,
@@ -70,17 +70,16 @@
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen3_compatible = {
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
-			  TMIO_MMC_CLK_ACTUAL | TMIO_MMC_FAST_CLK_CHG,
-	.capabilities	= MMC_CAP_SD_HIGHSPEED,
+			  TMIO_MMC_CLK_ACTUAL | TMIO_MMC_MIN_RCAR2,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.bus_shift	= 2,
 };
 
 static const struct of_device_id sh_mobile_sdhi_of_match[] = {
 	{ .compatible = "renesas,sdhi-shmobile" },
-	{ .compatible = "renesas,sdhi-sh7372" },
-	{ .compatible = "renesas,sdhi-sh73a0", .data = &sh_mobile_sdhi_of_cfg[0], },
-	{ .compatible = "renesas,sdhi-r8a73a4", .data = &sh_mobile_sdhi_of_cfg[0], },
-	{ .compatible = "renesas,sdhi-r8a7740", .data = &sh_mobile_sdhi_of_cfg[0], },
+	{ .compatible = "renesas,sdhi-sh73a0", .data = &of_default_cfg, },
+	{ .compatible = "renesas,sdhi-r8a73a4", .data = &of_default_cfg, },
+	{ .compatible = "renesas,sdhi-r8a7740", .data = &of_default_cfg, },
 	{ .compatible = "renesas,sdhi-r8a7778", .data = &of_rcar_gen1_compatible, },
 	{ .compatible = "renesas,sdhi-r8a7779", .data = &of_rcar_gen1_compatible, },
 	{ .compatible = "renesas,sdhi-r8a7790", .data = &of_rcar_gen2_compatible, },
@@ -97,6 +96,8 @@
 	struct clk *clk;
 	struct tmio_mmc_data mmc_data;
 	struct tmio_mmc_dma dma_priv;
+	struct pinctrl *pinctrl;
+	struct pinctrl_state *pins_default, *pins_uhs;
 };
 
 static void sh_mobile_sdhi_sdbuf_width(struct tmio_mmc_host *host, int width)
@@ -131,16 +132,28 @@
 	sd_ctrl_write16(host, EXT_ACC, val);
 }
 
-static int sh_mobile_sdhi_clk_enable(struct platform_device *pdev, unsigned int *f)
+static int sh_mobile_sdhi_clk_enable(struct tmio_mmc_host *host)
 {
-	struct mmc_host *mmc = platform_get_drvdata(pdev);
-	struct tmio_mmc_host *host = mmc_priv(mmc);
+	struct mmc_host *mmc = host->mmc;
 	struct sh_mobile_sdhi *priv = host_to_priv(host);
 	int ret = clk_prepare_enable(priv->clk);
 	if (ret < 0)
 		return ret;
 
-	*f = clk_get_rate(priv->clk);
+	/*
+	 * The clock driver may not know what maximum frequency
+	 * actually works, so it should be set with the max-frequency
+	 * property which will already have been read to f_max.  If it
+	 * was missing, assume the current frequency is the maximum.
+	 */
+	if (!mmc->f_max)
+		mmc->f_max = clk_get_rate(priv->clk);
+
+	/*
+	 * Minimum frequency is the minimum input clock frequency
+	 * divided by our maximum divider.
+	 */
+	mmc->f_min = max(clk_round_rate(priv->clk, 1) / 512, 1L);
 
 	/* enable 16bit data access on SDBUF as default */
 	sh_mobile_sdhi_sdbuf_width(host, 16);
@@ -148,19 +161,92 @@
 	return 0;
 }
 
-static void sh_mobile_sdhi_clk_disable(struct platform_device *pdev)
+static unsigned int sh_mobile_sdhi_clk_update(struct tmio_mmc_host *host,
+					      unsigned int new_clock)
 {
-	struct mmc_host *mmc = platform_get_drvdata(pdev);
+	struct sh_mobile_sdhi *priv = host_to_priv(host);
+	unsigned int freq, diff, best_freq = 0, diff_min = ~0;
+	int i, ret;
+
+	/* tested only on RCar Gen2+ currently; may work for others */
+	if (!(host->pdata->flags & TMIO_MMC_MIN_RCAR2))
+		return clk_get_rate(priv->clk);
+
+	/*
+	 * We want the bus clock to be as close as possible to, but no
+	 * greater than, new_clock.  As we can divide by 1 << i for
+	 * any i in [0, 9] we want the input clock to be as close as
+	 * possible, but no greater than, new_clock << i.
+	 */
+	for (i = min(9, ilog2(UINT_MAX / new_clock)); i >= 0; i--) {
+		freq = clk_round_rate(priv->clk, new_clock << i);
+		if (freq > (new_clock << i)) {
+			/* Too fast; look for a slightly slower option */
+			freq = clk_round_rate(priv->clk,
+					      (new_clock << i) / 4 * 3);
+			if (freq > (new_clock << i))
+				continue;
+		}
+
+		diff = new_clock - (freq >> i);
+		if (diff <= diff_min) {
+			best_freq = freq;
+			diff_min = diff;
+		}
+	}
+
+	ret = clk_set_rate(priv->clk, best_freq);
+
+	return ret == 0 ? best_freq : clk_get_rate(priv->clk);
+}
+
+static void sh_mobile_sdhi_clk_disable(struct tmio_mmc_host *host)
+{
+	struct sh_mobile_sdhi *priv = host_to_priv(host);
+
+	clk_disable_unprepare(priv->clk);
+}
+
+static int sh_mobile_sdhi_start_signal_voltage_switch(struct mmc_host *mmc,
+						      struct mmc_ios *ios)
+{
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 	struct sh_mobile_sdhi *priv = host_to_priv(host);
-	clk_disable_unprepare(priv->clk);
+	struct pinctrl_state *pin_state;
+	int ret;
+
+	switch (ios->signal_voltage) {
+	case MMC_SIGNAL_VOLTAGE_330:
+		pin_state = priv->pins_default;
+		break;
+	case MMC_SIGNAL_VOLTAGE_180:
+		pin_state = priv->pins_uhs;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * If anything is missing, assume signal voltage is fixed at
+	 * 3.3V and succeed/fail accordingly.
+	 */
+	if (IS_ERR(priv->pinctrl) || IS_ERR(pin_state))
+		return ios->signal_voltage ==
+			MMC_SIGNAL_VOLTAGE_330 ? 0 : -EINVAL;
+
+	ret = mmc_regulator_set_vqmmc(host->mmc, ios);
+	if (ret)
+		return ret;
+
+	return pinctrl_select_state(priv->pinctrl, pin_state);
 }
 
 static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host)
 {
 	int timeout = 1000;
 
-	while (--timeout && !(sd_ctrl_read16(host, CTL_STATUS2) & (1 << 13)))
+	while (--timeout && !(sd_ctrl_read16_and_16_as_32(host, CTL_STATUS)
+			      & TMIO_STAT_SCLKDIVEN))
 		udelay(1);
 
 	if (!timeout) {
@@ -226,7 +312,6 @@
 	struct tmio_mmc_host *host;
 	struct resource *res;
 	int irq, ret, i = 0;
-	bool multiplexed_isr = true;
 	struct tmio_mmc_dma *dma_priv;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -247,6 +332,14 @@
 		goto eprobe;
 	}
 
+	priv->pinctrl = devm_pinctrl_get(&pdev->dev);
+	if (!IS_ERR(priv->pinctrl)) {
+		priv->pins_default = pinctrl_lookup_state(priv->pinctrl,
+						PINCTRL_STATE_DEFAULT);
+		priv->pins_uhs = pinctrl_lookup_state(priv->pinctrl,
+						"state_uhs");
+	}
+
 	host = tmio_mmc_host_alloc(pdev);
 	if (!host) {
 		ret = -ENOMEM;
@@ -267,8 +360,10 @@
 	host->dma		= dma_priv;
 	host->write16_hook	= sh_mobile_sdhi_write16_hook;
 	host->clk_enable	= sh_mobile_sdhi_clk_enable;
+	host->clk_update	= sh_mobile_sdhi_clk_update;
 	host->clk_disable	= sh_mobile_sdhi_clk_disable;
 	host->multi_io_quirk	= sh_mobile_sdhi_multi_io_quirk;
+	host->start_signal_voltage_switch = sh_mobile_sdhi_start_signal_voltage_switch;
 
 	/* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */
 	if (!host->bus_shift && resource_size(res) > 0x100) /* old way to determine the shift */
@@ -308,63 +403,24 @@
 	if (ret < 0)
 		goto efree;
 
-	/*
-	 * Allow one or more specific (named) ISRs or
-	 * one or more multiplexed (un-named) ISRs.
-	 */
-
-	irq = platform_get_irq_byname(pdev, SH_MOBILE_SDHI_IRQ_CARD_DETECT);
-	if (irq >= 0) {
-		multiplexed_isr = false;
-		ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_card_detect_irq, 0,
+	while (1) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			break;
+		i++;
+		ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
 				  dev_name(&pdev->dev), host);
 		if (ret)
 			goto eirq;
 	}
 
-	irq = platform_get_irq_byname(pdev, SH_MOBILE_SDHI_IRQ_SDIO);
-	if (irq >= 0) {
-		multiplexed_isr = false;
-		ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_sdio_irq, 0,
-				  dev_name(&pdev->dev), host);
-		if (ret)
-			goto eirq;
-	}
-
-	irq = platform_get_irq_byname(pdev, SH_MOBILE_SDHI_IRQ_SDCARD);
-	if (irq >= 0) {
-		multiplexed_isr = false;
-		ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_sdcard_irq, 0,
-				  dev_name(&pdev->dev), host);
-		if (ret)
-			goto eirq;
-	} else if (!multiplexed_isr) {
-		dev_err(&pdev->dev,
-			"Principal SD-card IRQ is missing among named interrupts\n");
+	/* There must be at least one IRQ source */
+	if (!i) {
 		ret = irq;
 		goto eirq;
 	}
 
-	if (multiplexed_isr) {
-		while (1) {
-			irq = platform_get_irq(pdev, i);
-			if (irq < 0)
-				break;
-			i++;
-			ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
-					  dev_name(&pdev->dev), host);
-			if (ret)
-				goto eirq;
-		}
-
-		/* There must be at least one IRQ source */
-		if (!i) {
-			ret = irq;
-			goto eirq;
-		}
-	}
-
-	dev_info(&pdev->dev, "%s base at 0x%08lx clock rate %u MHz\n",
+	dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n",
 		 mmc_hostname(host->mmc), (unsigned long)
 		 (platform_get_resource(pdev, IORESOURCE_MEM, 0)->start),
 		 host->mmc->f_max / 1000000);

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 4a597f5a..1aac2ad 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h

@@ -1,6 +1,8 @@
 /*
  * linux/drivers/mmc/host/tmio_mmc.h
  *
+ * Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ * Copyright (C) 2015-16 Renesas Electronics Corporation
  * Copyright (C) 2007 Ian Molton
  * Copyright (C) 2004 Ian Molton
  *
@@ -18,12 +20,67 @@
 
 #include <linux/dmaengine.h>
 #include <linux/highmem.h>
-#include <linux/mmc/tmio.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 
+#define CTL_SD_CMD 0x00
+#define CTL_ARG_REG 0x04
+#define CTL_STOP_INTERNAL_ACTION 0x08
+#define CTL_XFER_BLK_COUNT 0xa
+#define CTL_RESPONSE 0x0c
+/* driver merges STATUS and following STATUS2 */
+#define CTL_STATUS 0x1c
+/* driver merges IRQ_MASK and following IRQ_MASK2 */
+#define CTL_IRQ_MASK 0x20
+#define CTL_SD_CARD_CLK_CTL 0x24
+#define CTL_SD_XFER_LEN 0x26
+#define CTL_SD_MEM_CARD_OPT 0x28
+#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
+#define CTL_SD_DATA_PORT 0x30
+#define CTL_TRANSACTION_CTL 0x34
+#define CTL_SDIO_STATUS 0x36
+#define CTL_SDIO_IRQ_MASK 0x38
+#define CTL_DMA_ENABLE 0xd8
+#define CTL_RESET_SD 0xe0
+#define CTL_VERSION 0xe2
+#define CTL_SDIO_REGS 0x100
+#define CTL_CLK_AND_WAIT_CTL 0x138
+#define CTL_RESET_SDIO 0x1e0
+
+/* Definitions for values the CTRL_STATUS register can take. */
+#define TMIO_STAT_CMDRESPEND    BIT(0)
+#define TMIO_STAT_DATAEND       BIT(2)
+#define TMIO_STAT_CARD_REMOVE   BIT(3)
+#define TMIO_STAT_CARD_INSERT   BIT(4)
+#define TMIO_STAT_SIGSTATE      BIT(5)
+#define TMIO_STAT_WRPROTECT     BIT(7)
+#define TMIO_STAT_CARD_REMOVE_A BIT(8)
+#define TMIO_STAT_CARD_INSERT_A BIT(9)
+#define TMIO_STAT_SIGSTATE_A    BIT(10)
+
+/* These belong technically to CTRL_STATUS2, but the driver merges them */
+#define TMIO_STAT_CMD_IDX_ERR   BIT(16)
+#define TMIO_STAT_CRCFAIL       BIT(17)
+#define TMIO_STAT_STOPBIT_ERR   BIT(18)
+#define TMIO_STAT_DATATIMEOUT   BIT(19)
+#define TMIO_STAT_RXOVERFLOW    BIT(20)
+#define TMIO_STAT_TXUNDERRUN    BIT(21)
+#define TMIO_STAT_CMDTIMEOUT    BIT(22)
+#define TMIO_STAT_DAT0		BIT(23)	/* only known on R-Car so far */
+#define TMIO_STAT_RXRDY         BIT(24)
+#define TMIO_STAT_TXRQ          BIT(25)
+#define TMIO_STAT_ILL_FUNC      BIT(29) /* only when !TMIO_MMC_HAS_IDLE_WAIT */
+#define TMIO_STAT_SCLKDIVEN     BIT(29) /* only when TMIO_MMC_HAS_IDLE_WAIT */
+#define TMIO_STAT_CMD_BUSY      BIT(30)
+#define TMIO_STAT_ILL_ACCESS    BIT(31)
+
+#define	CLK_CTL_DIV_MASK	0xff
+#define	CLK_CTL_SCLKEN		BIT(8)
+
+#define TMIO_BBS		512		/* Boot block size */
+
 /* Definitions for values the CTRL_SDIO_STATUS register can take. */
 #define TMIO_SDIO_STAT_IOIRQ	0x0001
 #define TMIO_SDIO_STAT_EXPUB52	0x4000
@@ -95,10 +152,14 @@
 	bool			sdio_irq_enabled;
 
 	int (*write16_hook)(struct tmio_mmc_host *host, int addr);
-	int (*clk_enable)(struct platform_device *pdev, unsigned int *f);
-	void (*clk_disable)(struct platform_device *pdev);
+	int (*clk_enable)(struct tmio_mmc_host *host);
+	unsigned int (*clk_update)(struct tmio_mmc_host *host,
+				   unsigned int new_clock);
+	void (*clk_disable)(struct tmio_mmc_host *host);
 	int (*multi_io_quirk)(struct mmc_card *card,
 			      unsigned int direction, int blk_size);
+	int (*start_signal_voltage_switch)(struct mmc_host *mmc,
+					   struct mmc_ios *ios);
 };
 
 struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev);
@@ -111,9 +172,6 @@
 void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i);
 void tmio_mmc_disable_mmc_irqs(struct tmio_mmc_host *host, u32 i);
 irqreturn_t tmio_mmc_irq(int irq, void *devid);
-irqreturn_t tmio_mmc_sdcard_irq(int irq, void *devid);
-irqreturn_t tmio_mmc_card_detect_irq(int irq, void *devid);
-irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid);
 
 static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg,
 					 unsigned long *flags)
@@ -177,7 +235,7 @@
 	readsw(host->ctl + (addr << host->bus_shift), buf, count);
 }
 
-static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, int addr)
 {
 	return readw(host->ctl + (addr << host->bus_shift)) |
 	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
@@ -199,11 +257,10 @@
 	writesw(host->ctl + (addr << host->bus_shift), buf, count);
 }
 
-static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
+static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, int addr, u32 val)
 {
 	writew(val, host->ctl + (addr << host->bus_shift));
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
-
 #endif

diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 7fb0c03..fa8a936 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c

@@ -15,7 +15,6 @@
 #include <linux/dmaengine.h>
 #include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/tmio.h>
 #include <linux/pagemap.h>
 #include <linux/scatterlist.h>
 

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 0521b46..f44e2ab 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c

@@ -39,7 +39,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/slot-gpio.h>
-#include <linux/mmc/tmio.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/platform_device.h>
@@ -56,18 +55,18 @@
 void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i)
 {
 	host->sdcard_irq_mask &= ~(i & TMIO_MASK_IRQ);
-	sd_ctrl_write32(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
+	sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
 }
 
 void tmio_mmc_disable_mmc_irqs(struct tmio_mmc_host *host, u32 i)
 {
 	host->sdcard_irq_mask |= (i & TMIO_MASK_IRQ);
-	sd_ctrl_write32(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
+	sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
 }
 
 static void tmio_mmc_ack_mmc_irqs(struct tmio_mmc_host *host, u32 i)
 {
-	sd_ctrl_write32(host, CTL_STATUS, ~i);
+	sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, ~i);
 }
 
 static void tmio_mmc_init_sg(struct tmio_mmc_host *host, struct mmc_data *data)
@@ -154,31 +153,16 @@
 	}
 }
 
-static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
-				unsigned int new_clock)
+static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
-	u32 clk = 0, clock;
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
+		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
+	msleep(host->pdata->flags & TMIO_MMC_MIN_RCAR2 ? 1 : 10);
 
-	if (new_clock) {
-		for (clock = host->mmc->f_min, clk = 0x80000080;
-		     new_clock >= (clock << 1);
-		     clk >>= 1)
-			clock <<= 1;
-
-		/* 1/1 clock is option */
-		if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
-		   ((clk >> 22) & 0x1))
-			clk |= 0xff;
-	}
-
-	if (host->set_clk_div)
-		host->set_clk_div(host->pdev, (clk >> 22) & 1);
-
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
-			sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & CLK_CTL_DIV_MASK);
-	if (!(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG))
+	if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
+		sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
 		msleep(10);
+	}
 }
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
@@ -190,19 +174,41 @@
 
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 5 : 10);
+	msleep(host->pdata->flags & TMIO_MMC_MIN_RCAR2 ? 5 : 10);
 }
 
-static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
+static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
+				unsigned int new_clock)
 {
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
-		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 1 : 10);
+	u32 clk = 0, clock;
 
-	if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
-		sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
-		msleep(10);
+	if (new_clock == 0) {
+		tmio_mmc_clk_stop(host);
+		return;
 	}
+
+	if (host->clk_update)
+		clock = host->clk_update(host, new_clock) / 512;
+	else
+		clock = host->mmc->f_min;
+
+	for (clk = 0x80000080; new_clock >= (clock << 1); clk >>= 1)
+		clock <<= 1;
+
+	/* 1/1 clock is option */
+	if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) && ((clk >> 22) & 0x1))
+		clk |= 0xff;
+
+	if (host->set_clk_div)
+		host->set_clk_div(host->pdev, (clk >> 22) & 1);
+
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
+			sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & CLK_CTL_DIV_MASK);
+	if (!(host->pdata->flags & TMIO_MMC_MIN_RCAR2))
+		msleep(10);
+
+	tmio_mmc_clk_start(host);
 }
 
 static void tmio_mmc_reset(struct tmio_mmc_host *host)
@@ -264,9 +270,6 @@
 
 	tmio_mmc_abort_dma(host);
 	mmc_request_done(host->mmc, mrq);
-
-	pm_runtime_mark_last_busy(mmc_dev(host->mmc));
-	pm_runtime_put_autosuspend(mmc_dev(host->mmc));
 }
 
 /* called with host->lock held, interrupts disabled */
@@ -296,9 +299,6 @@
 		tmio_mmc_abort_dma(host);
 
 	mmc_request_done(host->mmc, mrq);
-
-	pm_runtime_mark_last_busy(mmc_dev(host->mmc));
-	pm_runtime_put_autosuspend(mmc_dev(host->mmc));
 }
 
 static void tmio_mmc_done_work(struct work_struct *work)
@@ -375,7 +375,7 @@
 	tmio_mmc_enable_mmc_irqs(host, irq_mask);
 
 	/* Fire off the command */
-	sd_ctrl_write32(host, CTL_ARG_REG, cmd->arg);
+	sd_ctrl_write32_as_16_and_16(host, CTL_ARG_REG, cmd->arg);
 	sd_ctrl_write16(host, CTL_SD_CMD, c);
 
 	return 0;
@@ -530,7 +530,7 @@
 		goto out;
 
 	if (host->chan_tx && (data->flags & MMC_DATA_WRITE) && !host->force_pio) {
-		u32 status = sd_ctrl_read32(host, CTL_STATUS);
+		u32 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
 		bool done = false;
 
 		/*
@@ -542,7 +542,7 @@
 		 * waiting for one more interrupt fixes the problem.
 		 */
 		if (host->pdata->flags & TMIO_MMC_HAS_IDLE_WAIT) {
-			if (status & TMIO_STAT_ILL_FUNC)
+			if (status & TMIO_STAT_SCLKDIVEN)
 				done = true;
 		} else {
 			if (!(status & TMIO_STAT_CMD_BUSY))
@@ -585,7 +585,7 @@
 	 */
 
 	for (i = 3, addr = CTL_RESPONSE ; i >= 0 ; i--, addr += 4)
-		cmd->resp[i] = sd_ctrl_read32(host, addr);
+		cmd->resp[i] = sd_ctrl_read16_and_16_as_32(host, addr);
 
 	if (cmd->flags &  MMC_RSP_136) {
 		cmd->resp[0] = (cmd->resp[0] << 8) | (cmd->resp[1] >> 24);
@@ -625,19 +625,6 @@
 	spin_unlock(&host->lock);
 }
 
-static void tmio_mmc_card_irq_status(struct tmio_mmc_host *host,
-				       int *ireg, int *status)
-{
-	*status = sd_ctrl_read32(host, CTL_STATUS);
-	*ireg = *status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
-
-	pr_debug_status(*status);
-	pr_debug_status(*ireg);
-
-	/* Clear the status except the interrupt status */
-	sd_ctrl_write32(host, CTL_STATUS, TMIO_MASK_IRQ);
-}
-
 static bool __tmio_mmc_card_detect_irq(struct tmio_mmc_host *host,
 				      int ireg, int status)
 {
@@ -657,18 +644,6 @@
 	return false;
 }
 
-irqreturn_t tmio_mmc_card_detect_irq(int irq, void *devid)
-{
-	unsigned int ireg, status;
-	struct tmio_mmc_host *host = devid;
-
-	tmio_mmc_card_irq_status(host, &ireg, &status);
-	__tmio_mmc_card_detect_irq(host, ireg, status);
-
-	return IRQ_HANDLED;
-}
-EXPORT_SYMBOL(tmio_mmc_card_detect_irq);
-
 static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host,
 				 int ireg, int status)
 {
@@ -698,19 +673,7 @@
 	return false;
 }
 
-irqreturn_t tmio_mmc_sdcard_irq(int irq, void *devid)
-{
-	unsigned int ireg, status;
-	struct tmio_mmc_host *host = devid;
-
-	tmio_mmc_card_irq_status(host, &ireg, &status);
-	__tmio_mmc_sdcard_irq(host, ireg, status);
-
-	return IRQ_HANDLED;
-}
-EXPORT_SYMBOL(tmio_mmc_sdcard_irq);
-
-irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid)
+static void tmio_mmc_sdio_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
 	struct mmc_host *mmc = host->mmc;
@@ -719,7 +682,7 @@
 	unsigned int sdio_status;
 
 	if (!(pdata->flags & TMIO_MMC_SDIO_IRQ))
-		return IRQ_HANDLED;
+		return;
 
 	status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
 	ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask;
@@ -732,19 +695,22 @@
 
 	if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ)
 		mmc_signal_sdio_irq(mmc);
-
-	return IRQ_HANDLED;
 }
-EXPORT_SYMBOL(tmio_mmc_sdio_irq);
 
 irqreturn_t tmio_mmc_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
 	unsigned int ireg, status;
 
-	pr_debug("MMC IRQ begin\n");
+	status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
+	ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
 
-	tmio_mmc_card_irq_status(host, &ireg, &status);
+	pr_debug_status(status);
+	pr_debug_status(ireg);
+
+	/* Clear the status except the interrupt status */
+	sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
+
 	if (__tmio_mmc_card_detect_irq(host, ireg, status))
 		return IRQ_HANDLED;
 	if (__tmio_mmc_sdcard_irq(host, ireg, status))
@@ -812,8 +778,6 @@
 
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	pm_runtime_get_sync(mmc_dev(mmc));
-
 	if (mrq->data) {
 		ret = tmio_mmc_start_data(host, mrq->data);
 		if (ret)
@@ -832,24 +796,14 @@
 	host->mrq = NULL;
 	mrq->cmd->error = ret;
 	mmc_request_done(mmc, mrq);
-
-	pm_runtime_mark_last_busy(mmc_dev(mmc));
-	pm_runtime_put_autosuspend(mmc_dev(mmc));
 }
 
-static int tmio_mmc_clk_update(struct tmio_mmc_host *host)
+static int tmio_mmc_clk_enable(struct tmio_mmc_host *host)
 {
-	struct mmc_host *mmc = host->mmc;
-	int ret;
-
 	if (!host->clk_enable)
 		return -ENOTSUPP;
 
-	ret = host->clk_enable(host->pdev, &mmc->f_max);
-	if (!ret)
-		mmc->f_min = mmc->f_max / 512;
-
-	return ret;
+	return host->clk_enable(host);
 }
 
 static void tmio_mmc_power_on(struct tmio_mmc_host *host, unsigned short vdd)
@@ -925,8 +879,6 @@
 	struct device *dev = &host->pdev->dev;
 	unsigned long flags;
 
-	pm_runtime_get_sync(mmc_dev(mmc));
-
 	mutex_lock(&host->ios_lock);
 
 	spin_lock_irqsave(&host->lock, flags);
@@ -959,14 +911,12 @@
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_UP:
-		tmio_mmc_set_clock(host, ios->clock);
 		tmio_mmc_power_on(host, ios->vdd);
-		tmio_mmc_clk_start(host);
+		tmio_mmc_set_clock(host, ios->clock);
 		tmio_mmc_set_bus_width(host, ios->bus_width);
 		break;
 	case MMC_POWER_ON:
 		tmio_mmc_set_clock(host, ios->clock);
-		tmio_mmc_clk_start(host);
 		tmio_mmc_set_bus_width(host, ios->bus_width);
 		break;
 	}
@@ -983,9 +933,6 @@
 	host->clk_cache = ios->clock;
 
 	mutex_unlock(&host->ios_lock);
-
-	pm_runtime_mark_last_busy(mmc_dev(mmc));
-	pm_runtime_put_autosuspend(mmc_dev(mmc));
 }
 
 static int tmio_mmc_get_ro(struct mmc_host *mmc)
@@ -996,11 +943,8 @@
 	if (ret >= 0)
 		return ret;
 
-	pm_runtime_get_sync(mmc_dev(mmc));
 	ret = !((pdata->flags & TMIO_MMC_WRPROTECT_DISABLE) ||
-		(sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
-	pm_runtime_mark_last_busy(mmc_dev(mmc));
-	pm_runtime_put_autosuspend(mmc_dev(mmc));
+		(sd_ctrl_read16_and_16_as_32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
 
 	return ret;
 }
@@ -1016,12 +960,20 @@
 	return blk_size;
 }
 
-static const struct mmc_host_ops tmio_mmc_ops = {
+static int tmio_mmc_card_busy(struct mmc_host *mmc)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	return !(sd_ctrl_read16_and_16_as_32(host, CTL_STATUS) & TMIO_STAT_DAT0);
+}
+
+static struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
 	.get_cd		= mmc_gpio_get_cd,
 	.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
+	.card_busy	= tmio_mmc_card_busy,
 	.multi_io_quirk	= tmio_multi_io_quirk,
 };
 
@@ -1120,7 +1072,9 @@
 		goto host_free;
 	}
 
+	tmio_mmc_ops.start_signal_voltage_switch = _host->start_signal_voltage_switch;
 	mmc->ops = &tmio_mmc_ops;
+
 	mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities;
 	mmc->caps2 |= pdata->capabilities2;
 	mmc->max_segs = 32;
@@ -1135,7 +1089,7 @@
 				  mmc->caps & MMC_CAP_NONREMOVABLE ||
 				  mmc->slot.cd_irq >= 0);
 
-	if (tmio_mmc_clk_update(_host) < 0) {
+	if (tmio_mmc_clk_enable(_host) < 0) {
 		mmc->f_max = pdata->hclk;
 		mmc->f_min = mmc->f_max / 512;
 	}
@@ -1159,7 +1113,7 @@
 	tmio_mmc_clk_stop(_host);
 	tmio_mmc_reset(_host);
 
-	_host->sdcard_irq_mask = sd_ctrl_read32(_host, CTL_IRQ_MASK);
+	_host->sdcard_irq_mask = sd_ctrl_read16_and_16_as_32(_host, CTL_IRQ_MASK);
 	tmio_mmc_disable_mmc_irqs(_host, TMIO_MASK_ALL);
 
 	/* Unmask the IRQs we want to know about */
@@ -1251,7 +1205,7 @@
 		tmio_mmc_clk_stop(host);
 
 	if (host->clk_disable)
-		host->clk_disable(host->pdev);
+		host->clk_disable(host);
 
 	return 0;
 }
@@ -1263,12 +1217,10 @@
 	struct tmio_mmc_host *host = mmc_priv(mmc);
 
 	tmio_mmc_reset(host);
-	tmio_mmc_clk_update(host);
+	tmio_mmc_clk_enable(host);
 
-	if (host->clk_cache) {
+	if (host->clk_cache)
 		tmio_mmc_set_clock(host, host->clk_cache);
-		tmio_mmc_clk_start(host);
-	}
 
 	tmio_mmc_enable_dma(host, true);
 

diff --git a/drivers/mmc/host/toshsd.c b/drivers/mmc/host/toshsd.c
index e2cdd5f..553ef41 100644
--- a/drivers/mmc/host/toshsd.c
+++ b/drivers/mmc/host/toshsd.c

@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 807c06e..1bd5f1a 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c

@@ -22,6 +22,7 @@
 #include <linux/mmc/sdio.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/string.h>
@@ -198,6 +199,11 @@
 	struct dma_chan *chan_rx;
 	struct dma_chan *chan_tx;
 	bool dma_active;
+
+	/* Pin control */
+	struct pinctrl *pinctrl;
+	struct pinctrl_state *pins_default;
+	struct pinctrl_state *pins_uhs;
 };
 
 /*			I/O primitives					*/
@@ -1147,12 +1153,45 @@
 	}
 }
 
+static int usdhi6_set_pinstates(struct usdhi6_host *host, int voltage)
+{
+	if (IS_ERR(host->pins_uhs))
+		return 0;
+
+	switch (voltage) {
+	case MMC_SIGNAL_VOLTAGE_180:
+	case MMC_SIGNAL_VOLTAGE_120:
+		return pinctrl_select_state(host->pinctrl,
+					    host->pins_uhs);
+
+	default:
+		return pinctrl_select_state(host->pinctrl,
+					    host->pins_default);
+	}
+}
+
+static int usdhi6_sig_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	int ret;
+
+	ret = mmc_regulator_set_vqmmc(mmc, ios);
+	if (ret < 0)
+		return ret;
+
+	ret = usdhi6_set_pinstates(mmc_priv(mmc), ios->signal_voltage);
+	if (ret)
+		dev_warn_once(mmc_dev(mmc),
+			      "Failed to set pinstate err=%d\n", ret);
+	return ret;
+}
+
 static struct mmc_host_ops usdhi6_ops = {
 	.request	= usdhi6_request,
 	.set_ios	= usdhi6_set_ios,
 	.get_cd		= usdhi6_get_cd,
 	.get_ro		= usdhi6_get_ro,
 	.enable_sdio_irq = usdhi6_enable_sdio_irq,
+	.start_signal_voltage_switch = usdhi6_sig_volt_switch,
 };
 
 /*			State machine handlers				*/
@@ -1730,6 +1769,25 @@
 	host->wait	= USDHI6_WAIT_FOR_REQUEST;
 	host->timeout	= msecs_to_jiffies(4000);
 
+	host->pinctrl = devm_pinctrl_get(&pdev->dev);
+	if (IS_ERR(host->pinctrl)) {
+		ret = PTR_ERR(host->pinctrl);
+		goto e_free_mmc;
+	}
+
+	host->pins_uhs = pinctrl_lookup_state(host->pinctrl, "state_uhs");
+	if (!IS_ERR(host->pins_uhs)) {
+		host->pins_default = pinctrl_lookup_state(host->pinctrl,
+							  PINCTRL_STATE_DEFAULT);
+
+		if (IS_ERR(host->pins_default)) {
+			dev_err(dev,
+				"UHS pinctrl requires a default pin state.\n");
+			ret = PTR_ERR(host->pins_default);
+			goto e_free_mmc;
+		}
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	host->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(host->base)) {
@@ -1785,7 +1843,7 @@
 
 	mmc->ops = &usdhi6_ops;
 	mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
-		MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_DDR50 | MMC_CAP_SDIO_IRQ;
+		     MMC_CAP_SDIO_IRQ;
 	/* Set .max_segs to some random number. Feel free to adjust. */
 	mmc->max_segs = 32;
 	mmc->max_blk_size = 512;

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 3096251..bee180bd 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c

@@ -40,6 +40,7 @@
 #include <linux/slab.h>
 #include <linux/reboot.h>
 #include <linux/kconfig.h>
+#include <linux/leds.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -862,6 +863,7 @@
 		mtd_erase_callback(instr);
 		return 0;
 	}
+	ledtrig_mtd_activity();
 	return mtd->_erase(mtd, instr);
 }
 EXPORT_SYMBOL_GPL(mtd_erase);
@@ -925,6 +927,7 @@
 	if (!len)
 		return 0;
 
+	ledtrig_mtd_activity();
 	/*
 	 * In the absence of an error, drivers return a non-negative integer
 	 * representing the maximum number of bitflips that were corrected on
@@ -949,6 +952,7 @@
 		return -EROFS;
 	if (!len)
 		return 0;
+	ledtrig_mtd_activity();
 	return mtd->_write(mtd, to, len, retlen, buf);
 }
 EXPORT_SYMBOL_GPL(mtd_write);
@@ -982,6 +986,8 @@
 	ops->retlen = ops->oobretlen = 0;
 	if (!mtd->_read_oob)
 		return -EOPNOTSUPP;
+
+	ledtrig_mtd_activity();
 	/*
 	 * In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics
 	 * similar to mtd->_read(), returning a non-negative integer
@@ -997,6 +1003,19 @@
 }
 EXPORT_SYMBOL_GPL(mtd_read_oob);
 
+int mtd_write_oob(struct mtd_info *mtd, loff_t to,
+				struct mtd_oob_ops *ops)
+{
+	ops->retlen = ops->oobretlen = 0;
+	if (!mtd->_write_oob)
+		return -EOPNOTSUPP;
+	if (!(mtd->flags & MTD_WRITEABLE))
+		return -EROFS;
+	ledtrig_mtd_activity();
+	return mtd->_write_oob(mtd, to, ops);
+}
+EXPORT_SYMBOL_GPL(mtd_write_oob);
+
 /*
  * Method to access the protection register area, present in some flash
  * devices. The user data is one time programmable but the factory data is read

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 557b846..ba4f603 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c

@@ -43,7 +43,6 @@
 #include <linux/mtd/nand_bch.h>
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
-#include <linux/leds.h>
 #include <linux/io.h>
 #include <linux/mtd/partitions.h>
 #include <linux/of_mtd.h>
@@ -97,12 +96,6 @@
 static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 			     struct mtd_oob_ops *ops);
 
-/*
- * For devices which display every fart in the system on a separate LED. Is
- * compiled away when LED support is disabled.
- */
-DEFINE_LED_TRIGGER(nand_led_trigger);
-
 static int check_offs_len(struct mtd_info *mtd,
 					loff_t ofs, uint64_t len)
 {
@@ -540,19 +533,16 @@
 	if (in_interrupt() || oops_in_progress)
 		return panic_nand_wait_ready(mtd, timeo);
 
-	led_trigger_event(nand_led_trigger, LED_FULL);
 	/* Wait until command is processed or timeout occurs */
 	timeo = jiffies + msecs_to_jiffies(timeo);
 	do {
 		if (chip->dev_ready(mtd))
-			goto out;
+			return;
 		cond_resched();
 	} while (time_before(jiffies, timeo));
 
 	if (!chip->dev_ready(mtd))
 		pr_warn_ratelimited("timeout while waiting for chip to become ready\n");
-out:
-	led_trigger_event(nand_led_trigger, LED_OFF);
 }
 EXPORT_SYMBOL_GPL(nand_wait_ready);
 
@@ -885,8 +875,6 @@
 	int status;
 	unsigned long timeo = 400;
 
-	led_trigger_event(nand_led_trigger, LED_FULL);
-
 	/*
 	 * Apply this short delay always to ensure that we do wait tWB in any
 	 * case on any machine.
@@ -910,7 +898,6 @@
 			cond_resched();
 		} while (time_before(jiffies, timeo));
 	}
-	led_trigger_event(nand_led_trigger, LED_OFF);
 
 	status = (int)chip->read_byte(mtd);
 	/* This can happen if in case of timeout or buggy dev_ready */
@@ -4466,20 +4453,6 @@
 }
 EXPORT_SYMBOL_GPL(nand_release);
 
-static int __init nand_base_init(void)
-{
-	led_trigger_register_simple("nand-disk", &nand_led_trigger);
-	return 0;
-}
-
-static void __exit nand_base_exit(void)
-{
-	led_trigger_unregister_simple(nand_led_trigger);
-}
-
-module_init(nand_base_init);
-module_exit(nand_base_exit);
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>");
 MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>");

diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 72c9f1f..7c78307 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c

@@ -635,10 +635,10 @@
 
 #ifdef __i386__
 #include <asm/msr.h>
-#define GETTICK(x)                                                \
-({                                                                \
-	if (cpu_has_tsc)                                          \
-		x = (unsigned int)rdtsc();		  \
+#define GETTICK(x)						\
+({								\
+	if (boot_cpu_has(X86_FEATURE_TSC))			\
+		x = (unsigned int)rdtsc();			\
 })
 #else /* __i386__ */
 #define GETTICK(x)

diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index e2a4841..b3bec3a 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig

@@ -112,4 +112,7 @@
 	  While this option is selected automatically when needed, you can
 	  enable it manually to improve device tree unit test coverage.
 
+config OF_NUMA
+	bool
+
 endif # OF

diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 156c072..bee3fa9 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile

@@ -14,5 +14,6 @@
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
 obj-$(CONFIG_OF_RESOLVE)  += resolver.o
 obj-$(CONFIG_OF_OVERLAY) += overlay.o
+obj-$(CONFIG_OF_NUMA) += of_numa.o
 
 obj-$(CONFIG_OF_UNITTEST) += unittest-data/

diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
new file mode 100644
index 0000000..0f2784b
--- /dev/null
+++ b/drivers/of/of_numa.c

@@ -0,0 +1,211 @@
+/*
+ * OF NUMA Parsing support.
+ *
+ * Copyright (C) 2015 - 2016 Cavium Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/nodemask.h>
+
+#include <asm/numa.h>
+
+/* define default numa node to 0 */
+#define DEFAULT_NODE 0
+
+/*
+ * Even though we connect cpus to numa domains later in SMP
+ * init, we need to know the node ids now for all cpus.
+*/
+static void __init of_numa_parse_cpu_nodes(void)
+{
+	u32 nid;
+	int r;
+	struct device_node *cpus;
+	struct device_node *np = NULL;
+
+	cpus = of_find_node_by_path("/cpus");
+	if (!cpus)
+		return;
+
+	for_each_child_of_node(cpus, np) {
+		/* Skip things that are not CPUs */
+		if (of_node_cmp(np->type, "cpu") != 0)
+			continue;
+
+		r = of_property_read_u32(np, "numa-node-id", &nid);
+		if (r)
+			continue;
+
+		pr_debug("NUMA: CPU on %u\n", nid);
+		if (nid >= MAX_NUMNODES)
+			pr_warn("NUMA: Node id %u exceeds maximum value\n",
+				nid);
+		else
+			node_set(nid, numa_nodes_parsed);
+	}
+}
+
+static int __init of_numa_parse_memory_nodes(void)
+{
+	struct device_node *np = NULL;
+	struct resource rsrc;
+	u32 nid;
+	int r = 0;
+
+	for (;;) {
+		np = of_find_node_by_type(np, "memory");
+		if (!np)
+			break;
+
+		r = of_property_read_u32(np, "numa-node-id", &nid);
+		if (r == -EINVAL)
+			/*
+			 * property doesn't exist if -EINVAL, continue
+			 * looking for more memory nodes with
+			 * "numa-node-id" property
+			 */
+			continue;
+		else if (r)
+			/* some other error */
+			break;
+
+		r = of_address_to_resource(np, 0, &rsrc);
+		if (r) {
+			pr_err("NUMA: bad reg property in memory node\n");
+			break;
+		}
+
+		pr_debug("NUMA:  base = %llx len = %llx, node = %u\n",
+			 rsrc.start, rsrc.end - rsrc.start + 1, nid);
+
+		r = numa_add_memblk(nid, rsrc.start,
+				    rsrc.end - rsrc.start + 1);
+		if (r)
+			break;
+	}
+	of_node_put(np);
+
+	return r;
+}
+
+static int __init of_numa_parse_distance_map_v1(struct device_node *map)
+{
+	const __be32 *matrix;
+	int entry_count;
+	int i;
+
+	pr_info("NUMA: parsing numa-distance-map-v1\n");
+
+	matrix = of_get_property(map, "distance-matrix", NULL);
+	if (!matrix) {
+		pr_err("NUMA: No distance-matrix property in distance-map\n");
+		return -EINVAL;
+	}
+
+	entry_count = of_property_count_u32_elems(map, "distance-matrix");
+	if (entry_count <= 0) {
+		pr_err("NUMA: Invalid distance-matrix\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i + 2 < entry_count; i += 3) {
+		u32 nodea, nodeb, distance;
+
+		nodea = of_read_number(matrix, 1);
+		matrix++;
+		nodeb = of_read_number(matrix, 1);
+		matrix++;
+		distance = of_read_number(matrix, 1);
+		matrix++;
+
+		numa_set_distance(nodea, nodeb, distance);
+		pr_debug("NUMA:  distance[node%d -> node%d] = %d\n",
+			 nodea, nodeb, distance);
+
+		/* Set default distance of node B->A same as A->B */
+		if (nodeb > nodea)
+			numa_set_distance(nodeb, nodea, distance);
+	}
+
+	return 0;
+}
+
+static int __init of_numa_parse_distance_map(void)
+{
+	int ret = 0;
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL,
+				     "numa-distance-map-v1");
+	if (np)
+		ret = of_numa_parse_distance_map_v1(np);
+
+	of_node_put(np);
+	return ret;
+}
+
+int of_node_to_nid(struct device_node *device)
+{
+	struct device_node *np;
+	u32 nid;
+	int r = -ENODATA;
+
+	np = of_node_get(device);
+
+	while (np) {
+		struct device_node *parent;
+
+		r = of_property_read_u32(np, "numa-node-id", &nid);
+		/*
+		 * -EINVAL indicates the property was not found, and
+		 *  we walk up the tree trying to find a parent with a
+		 *  "numa-node-id".  Any other type of error indicates
+		 *  a bad device tree and we give up.
+		 */
+		if (r != -EINVAL)
+			break;
+
+		parent = of_get_parent(np);
+		of_node_put(np);
+		np = parent;
+	}
+	if (np && r)
+		pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n",
+			np->name);
+	of_node_put(np);
+
+	if (!r) {
+		if (nid >= MAX_NUMNODES)
+			pr_warn("NUMA: Node id %u exceeds maximum value\n",
+				nid);
+		else
+			return nid;
+	}
+
+	return NUMA_NO_NODE;
+}
+EXPORT_SYMBOL(of_node_to_nid);
+
+int __init of_numa_init(void)
+{
+	int r;
+
+	of_numa_parse_cpu_nodes();
+	r = of_numa_parse_memory_nodes();
+	if (r)
+		return r;
+	return of_numa_parse_distance_map();
+}

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index f700908..f2d01d4 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c

@@ -847,6 +847,14 @@
 	if (!platform_get_irq(cpu_pmu->plat_device, 0))
 		cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
+	/*
+	 * This is a CPU PMU potentially in a heterogeneous configuration (e.g.
+	 * big.LITTLE). This is not an uncore PMU, and we have taken ctx
+	 * sharing into account (e.g. with our pmu::filter_match callback and
+	 * pmu::event_init group validation).
+	 */
+	cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
+
 	return 0;
 
 out_unregister:

diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 1062fa4..79d64ea 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c

@@ -793,15 +793,6 @@
 	return AE_OK;
 }
 
-static int AMW0_set_cap_acpi_check_device_found __initdata;
-
-static acpi_status __init AMW0_set_cap_acpi_check_device_cb(acpi_handle handle,
-	u32 level, void *context, void **retval)
-{
-	AMW0_set_cap_acpi_check_device_found = 1;
-	return AE_OK;
-}
-
 static const struct acpi_device_id norfkill_ids[] __initconst = {
 	{ "VPC2004", 0},
 	{ "IBM0068", 0},
@@ -816,9 +807,10 @@
 	const struct acpi_device_id *id;
 
 	for (id = norfkill_ids; id->id[0]; id++)
-		acpi_get_devices(id->id, AMW0_set_cap_acpi_check_device_cb,
-				NULL, NULL);
-	return AMW0_set_cap_acpi_check_device_found;
+		if (acpi_dev_found(id->id))
+			return true;
+
+	return false;
 }
 
 static acpi_status __init AMW0_set_capabilities(void)

diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 14fd2ec..17b365f 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c

@@ -204,30 +204,10 @@
 	}
 }
 
-static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level,
-						 void *context, void **retval)
-{
-	pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID);
-	*(bool *)context = true;
-	return AE_CTRL_TERMINATE;
-}
-
-static int eeepc_wmi_check_atkd(void)
-{
-	acpi_status status;
-	bool found = false;
-
-	status = acpi_get_devices(EEEPC_ACPI_HID, eeepc_wmi_parse_device,
-				  &found, NULL);
-
-	if (ACPI_FAILURE(status) || !found)
-		return 0;
-	return -1;
-}
-
 static int eeepc_wmi_probe(struct platform_device *pdev)
 {
-	if (eeepc_wmi_check_atkd()) {
+	if (acpi_dev_found(EEEPC_ACPI_HID)) {
+		pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID);
 		pr_warn("WMI device present, but legacy ATKD device is also "
 			"present and enabled\n");
 		pr_warn("You probably booted with acpi_osi=\"Linux\" or "

diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index facd43b..81603d9 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c

@@ -521,10 +521,11 @@
 	int ret;
 
 	if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table) ||
-	    paravirt_enabled()) {
+	    arch_pnpbios_disabled()) {
 		printk(KERN_INFO "PnPBIOS: Disabled\n");
 		return -ENODEV;
 	}
+
 #ifdef CONFIG_PNPACPI
 	if (!acpi_disabled && !pnpacpi_disabled) {
 		pnpbios_disabled = 1;

diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 8986382..01b6d3f 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c

@@ -336,6 +336,7 @@
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *match;
 	struct rockchip_iodomain *iod;
+	struct device *parent;
 	int i, ret = 0;
 
 	if (!np)
@@ -351,7 +352,14 @@
 	match = of_match_node(rockchip_iodomain_match, np);
 	iod->soc_data = (struct rockchip_iodomain_soc_data *)match->data;
 
-	iod->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	parent = pdev->dev.parent;
+	if (parent && parent->of_node) {
+		iod->grf = syscon_node_to_regmap(parent->of_node);
+	} else {
+		dev_dbg(&pdev->dev, "falling back to old binding\n");
+		iod->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	}
+
 	if (IS_ERR(iod->grf)) {
 		dev_err(&pdev->dev, "couldn't find grf regmap\n");
 		return PTR_ERR(iod->grf);

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 8fad0a7..b2766b8 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c

@@ -34,6 +34,9 @@
 #include <asm/processor.h>
 #include <asm/cpu_device_id.h>
 
+/* Local defines */
+#define MSR_PLATFORM_POWER_LIMIT	0x0000065C
+
 /* bitmasks for RAPL MSRs, used by primitive access functions */
 #define ENERGY_STATUS_MASK      0xffffffff
 
@@ -86,6 +89,7 @@
 	RAPL_DOMAIN_PP0, /* core power plane */
 	RAPL_DOMAIN_PP1, /* graphics uncore */
 	RAPL_DOMAIN_DRAM,/* DRAM control_type */
+	RAPL_DOMAIN_PLATFORM, /* PSys control_type */
 	RAPL_DOMAIN_MAX,
 };
 
@@ -251,9 +255,11 @@
 	"core",
 	"uncore",
 	"dram",
+	"psys",
 };
 
 static struct powercap_control_type *control_type; /* PowerCap Controller */
+static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
 
 /* caller to ensure CPU hotplug lock is held */
 static struct rapl_package *find_package_by_id(int id)
@@ -409,6 +415,14 @@
 		.set_enable = set_domain_enable,
 		.get_enable = get_domain_enable,
 	},
+	/* RAPL_DOMAIN_PLATFORM */
+	{
+		.get_energy_uj = get_energy_counter,
+		.get_max_energy_range_uj = get_max_energy_counter,
+		.release = release_zone,
+		.set_enable = set_domain_enable,
+		.get_enable = get_domain_enable,
+	},
 };
 
 static int set_power_limit(struct powercap_zone *power_zone, int id,
@@ -1101,6 +1115,8 @@
 	RAPL_CPU(0X5C, rapl_defaults_core),/* Broxton */
 	RAPL_CPU(0x5E, rapl_defaults_core),/* Skylake-H/S */
 	RAPL_CPU(0x57, rapl_defaults_hsw_server),/* Knights Landing */
+	RAPL_CPU(0x8E, rapl_defaults_core),/* Kabylake */
+	RAPL_CPU(0x9E, rapl_defaults_core),/* Kabylake */
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
@@ -1160,6 +1176,13 @@
 			powercap_unregister_zone(control_type,
 						&rd_package->power_zone);
 	}
+
+	if (platform_rapl_domain) {
+		powercap_unregister_zone(control_type,
+					 &platform_rapl_domain->power_zone);
+		kfree(platform_rapl_domain);
+	}
+
 	powercap_unregister_control_type(control_type);
 
 	return 0;
@@ -1239,6 +1262,47 @@
 	return ret;
 }
 
+static int rapl_register_psys(void)
+{
+	struct rapl_domain *rd;
+	struct powercap_zone *power_zone;
+	u64 val;
+
+	if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val)
+		return -ENODEV;
+
+	if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val)
+		return -ENODEV;
+
+	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
+	rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
+	rd->id = RAPL_DOMAIN_PLATFORM;
+	rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT;
+	rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS;
+	rd->rpl[0].prim_id = PL1_ENABLE;
+	rd->rpl[0].name = pl1_name;
+	rd->rpl[1].prim_id = PL2_ENABLE;
+	rd->rpl[1].name = pl2_name;
+	rd->rp = find_package_by_id(0);
+
+	power_zone = powercap_register_zone(&rd->power_zone, control_type,
+					    "psys", NULL,
+					    &zone_ops[RAPL_DOMAIN_PLATFORM],
+					    2, &constraint_ops);
+
+	if (IS_ERR(power_zone)) {
+		kfree(rd);
+		return PTR_ERR(power_zone);
+	}
+
+	platform_rapl_domain = rd;
+
+	return 0;
+}
+
 static int rapl_register_powercap(void)
 {
 	struct rapl_domain *rd;
@@ -1255,6 +1319,10 @@
 	list_for_each_entry(rp, &rapl_packages, plist)
 		if (rapl_package_register_powercap(rp))
 			goto err_cleanup_package;
+
+	/* Don't bail out if PSys is not supported */
+	rapl_register_psys();
+
 	return ret;
 
 err_cleanup_package:
@@ -1289,6 +1357,9 @@
 	case RAPL_DOMAIN_DRAM:
 		msr = MSR_DRAM_ENERGY_STATUS;
 		break;
+	case RAPL_DOMAIN_PLATFORM:
+		/* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
+		return -EINVAL;
 	default:
 		pr_err("invalid domain id %d\n", domain);
 		return -EINVAL;

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 7831bc6..680fbc7 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c

@@ -128,6 +128,13 @@
 	set_bit(PWMF_REQUESTED, &pwm->flags);
 	pwm->label = label;
 
+	/*
+	 * FIXME: This should be removed once all PWM users properly make use
+	 * of struct pwm_args to initialize the PWM device. As long as this is
+	 * here, the PWM state and hardware state can get out of sync.
+	 */
+	pwm_apply_args(pwm);
+
 	return 0;
 }
 
@@ -146,12 +153,12 @@
 	if (IS_ERR(pwm))
 		return pwm;
 
-	pwm_set_period(pwm, args->args[1]);
+	pwm->args.period = args->args[1];
 
 	if (args->args[2] & PWM_POLARITY_INVERTED)
-		pwm_set_polarity(pwm, PWM_POLARITY_INVERSED);
+		pwm->args.polarity = PWM_POLARITY_INVERSED;
 	else
-		pwm_set_polarity(pwm, PWM_POLARITY_NORMAL);
+		pwm->args.polarity = PWM_POLARITY_NORMAL;
 
 	return pwm;
 }
@@ -172,7 +179,7 @@
 	if (IS_ERR(pwm))
 		return pwm;
 
-	pwm_set_period(pwm, args->args[1]);
+	pwm->args.period = args->args[1];
 
 	return pwm;
 }
@@ -747,13 +754,13 @@
 	if (!chip)
 		goto out;
 
+	pwm->args.period = chosen->period;
+	pwm->args.polarity = chosen->polarity;
+
 	pwm = pwm_request_from_chip(chip, chosen->index, con_id ?: dev_id);
 	if (IS_ERR(pwm))
 		goto out;
 
-	pwm_set_period(pwm, chosen->period);
-	pwm_set_polarity(pwm, chosen->polarity);
-
 out:
 	mutex_unlock(&pwm_lookup_lock);
 	return pwm;

diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c
index a80c108..7d33542 100644
--- a/drivers/pwm/pwm-clps711x.c
+++ b/drivers/pwm/pwm-clps711x.c

@@ -60,7 +60,7 @@
 		return -EINVAL;
 
 	/* Store constant period value */
-	pwm_set_period(pwm, DIV_ROUND_CLOSEST(NSEC_PER_SEC, freq));
+	pwm->args.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, freq);
 
 	return 0;
 }

diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index cb2f702..58b709f 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c

@@ -160,7 +160,7 @@
 	if (IS_ERR(pwm))
 		return pwm;
 
-	pwm_set_period(pwm, args->args[0]);
+	pwm->args.period = args->args[0];
 
 	return pwm;
 }

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index c77dc08..144cbf5 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig

@@ -321,6 +321,15 @@
 	help
 	  This driver supports LP8720/LP8725 PMIC
 
+config REGULATOR_LP873X
+	tristate "TI LP873X Power regulators"
+	depends on MFD_LP873X && OF
+	help
+	  This driver supports LP873X voltage regulator chips. LP873X
+	  provides two step-down converters and two general-purpose LDO
+	  voltage regulators. It supports software based voltage control
+	  for different voltage domains
+
 config REGULATOR_LP8755
 	tristate "TI LP8755 High Performance PMU driver"
 	depends on I2C
@@ -409,6 +418,7 @@
 config REGULATOR_MAX8973
 	tristate "Maxim MAX8973 voltage regulator "
 	depends on I2C
+	depends on THERMAL && THERMAL_OF
 	select REGMAP_I2C
 	help
 	  The MAXIM MAX8973 high-efficiency. three phase, DC-DC step-down
@@ -548,6 +558,13 @@
 	  Say y here to support the voltage regulators and convertors
 	  PV88060
 
+config REGULATOR_PV88080
+	tristate "Powerventure Semiconductor PV88080 regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Say y here to support the buck convertors on PV88080
+
 config REGULATOR_PV88090
 	tristate "Powerventure Semiconductor PV88090 regulator"
 	depends on I2C

diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 61bfbb9..85a1d44 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile

@@ -42,11 +42,12 @@
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o
+obj-$(CONFIG_REGULATOR_LP873X) += lp873x-regulator.o
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-buck.o
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-ldo.o
 obj-$(CONFIG_REGULATOR_LP8755) += lp8755.o
 obj-$(CONFIG_REGULATOR_LTC3589) += ltc3589.o
-obj-$(CONFIG_REGULATOR_MAX14577) += max14577.o
+obj-$(CONFIG_REGULATOR_MAX14577) += max14577-regulator.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_MAX77620) += max77620-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
@@ -55,10 +56,10 @@
 obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o
 obj-$(CONFIG_REGULATOR_MAX8973) += max8973-regulator.o
-obj-$(CONFIG_REGULATOR_MAX8997) += max8997.o
+obj-$(CONFIG_REGULATOR_MAX8997) += max8997-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o
 obj-$(CONFIG_REGULATOR_MAX77686) += max77686-regulator.o
-obj-$(CONFIG_REGULATOR_MAX77693) += max77693.o
+obj-$(CONFIG_REGULATOR_MAX77693) += max77693-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77802) += max77802-regulator.o
 obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o
 obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o
@@ -71,6 +72,7 @@
 obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
 obj-$(CONFIG_REGULATOR_PFUZE100) += pfuze100-regulator.o
 obj-$(CONFIG_REGULATOR_PV88060) += pv88060-regulator.o
+obj-$(CONFIG_REGULATOR_PV88080) += pv88080-regulator.o
 obj-$(CONFIG_REGULATOR_PV88090) += pv88090-regulator.o
 obj-$(CONFIG_REGULATOR_PWM) += pwm-regulator.o
 obj-$(CONFIG_REGULATOR_TPS51632) += tps51632-regulator.o

diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 000d566..a1cd0d4 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c

@@ -139,6 +139,74 @@
 	int off_mask;
 };
 
+static const struct regmap_range act8600_reg_ranges[] = {
+	regmap_reg_range(0x00, 0x01),
+	regmap_reg_range(0x10, 0x10),
+	regmap_reg_range(0x12, 0x12),
+	regmap_reg_range(0x20, 0x20),
+	regmap_reg_range(0x22, 0x22),
+	regmap_reg_range(0x30, 0x30),
+	regmap_reg_range(0x32, 0x32),
+	regmap_reg_range(0x40, 0x41),
+	regmap_reg_range(0x50, 0x51),
+	regmap_reg_range(0x60, 0x61),
+	regmap_reg_range(0x70, 0x71),
+	regmap_reg_range(0x80, 0x81),
+	regmap_reg_range(0x91, 0x91),
+	regmap_reg_range(0xA1, 0xA1),
+	regmap_reg_range(0xA8, 0xAA),
+	regmap_reg_range(0xB0, 0xB0),
+	regmap_reg_range(0xB2, 0xB2),
+	regmap_reg_range(0xC1, 0xC1),
+};
+
+static const struct regmap_range act8600_reg_ro_ranges[] = {
+	regmap_reg_range(0xAA, 0xAA),
+	regmap_reg_range(0xC1, 0xC1),
+};
+
+static const struct regmap_range act8600_reg_volatile_ranges[] = {
+	regmap_reg_range(0x00, 0x01),
+	regmap_reg_range(0x12, 0x12),
+	regmap_reg_range(0x22, 0x22),
+	regmap_reg_range(0x32, 0x32),
+	regmap_reg_range(0x41, 0x41),
+	regmap_reg_range(0x51, 0x51),
+	regmap_reg_range(0x61, 0x61),
+	regmap_reg_range(0x71, 0x71),
+	regmap_reg_range(0x81, 0x81),
+	regmap_reg_range(0xA8, 0xA8),
+	regmap_reg_range(0xAA, 0xAA),
+	regmap_reg_range(0xB0, 0xB0),
+	regmap_reg_range(0xC1, 0xC1),
+};
+
+static const struct regmap_access_table act8600_write_ranges_table = {
+	.yes_ranges	= act8600_reg_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(act8600_reg_ranges),
+	.no_ranges	= act8600_reg_ro_ranges,
+	.n_no_ranges	= ARRAY_SIZE(act8600_reg_ro_ranges),
+};
+
+static const struct regmap_access_table act8600_read_ranges_table = {
+	.yes_ranges	= act8600_reg_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(act8600_reg_ranges),
+};
+
+static const struct regmap_access_table act8600_volatile_ranges_table = {
+	.yes_ranges	= act8600_reg_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(act8600_reg_volatile_ranges),
+};
+
+static const struct regmap_config act8600_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0xFF,
+	.wr_table = &act8600_write_ranges_table,
+	.rd_table = &act8600_read_ranges_table,
+	.volatile_table = &act8600_volatile_ranges_table,
+};
+
 static const struct regmap_config act8865_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -319,7 +387,6 @@
 };
 
 static int act8865_pdata_from_dt(struct device *dev,
-				 struct device_node **of_node,
 				 struct act8865_platform_data *pdata,
 				 unsigned long type)
 {
@@ -370,7 +437,7 @@
 		regulator->id = i;
 		regulator->name = matches[i].name;
 		regulator->init_data = matches[i].init_data;
-		of_node[i] = matches[i].of_node;
+		regulator->of_node = matches[i].of_node;
 		regulator++;
 	}
 
@@ -378,7 +445,6 @@
 }
 #else
 static inline int act8865_pdata_from_dt(struct device *dev,
-					struct device_node **of_node,
 					struct act8865_platform_data *pdata,
 					unsigned long type)
 {
@@ -386,8 +452,8 @@
 }
 #endif
 
-static struct regulator_init_data
-*act8865_get_init_data(int id, struct act8865_platform_data *pdata)
+static struct act8865_regulator_data *act8865_get_regulator_data(
+		int id, struct act8865_platform_data *pdata)
 {
 	int i;
 
@@ -396,7 +462,7 @@
 
 	for (i = 0; i < pdata->num_regulators; i++) {
 		if (pdata->regulators[i].id == id)
-			return pdata->regulators[i].init_data;
+			return &pdata->regulators[i];
 	}
 
 	return NULL;
@@ -418,9 +484,9 @@
 	const struct regulator_desc *regulators;
 	struct act8865_platform_data pdata_of, *pdata;
 	struct device *dev = &client->dev;
-	struct device_node **of_node;
 	int i, ret, num_regulators;
 	struct act8865 *act8865;
+	const struct regmap_config *regmap_config;
 	unsigned long type;
 	int off_reg, off_mask;
 	int voltage_select = 0;
@@ -447,12 +513,14 @@
 	case ACT8600:
 		regulators = act8600_regulators;
 		num_regulators = ARRAY_SIZE(act8600_regulators);
+		regmap_config = &act8600_regmap_config;
 		off_reg = -1;
 		off_mask = -1;
 		break;
 	case ACT8846:
 		regulators = act8846_regulators;
 		num_regulators = ARRAY_SIZE(act8846_regulators);
+		regmap_config = &act8865_regmap_config;
 		off_reg = ACT8846_GLB_OFF_CTRL;
 		off_mask = ACT8846_OFF_SYSMASK;
 		break;
@@ -464,6 +532,7 @@
 			regulators = act8865_regulators;
 			num_regulators = ARRAY_SIZE(act8865_regulators);
 		}
+		regmap_config = &act8865_regmap_config;
 		off_reg = ACT8865_SYS_CTRL;
 		off_mask = ACT8865_MSTROFF;
 		break;
@@ -472,34 +541,22 @@
 		return -EINVAL;
 	}
 
-	of_node = devm_kzalloc(dev, sizeof(struct device_node *) *
-			       num_regulators, GFP_KERNEL);
-	if (!of_node)
-		return -ENOMEM;
-
 	if (dev->of_node && !pdata) {
-		ret = act8865_pdata_from_dt(dev, of_node, &pdata_of, type);
+		ret = act8865_pdata_from_dt(dev, &pdata_of, type);
 		if (ret < 0)
 			return ret;
 
 		pdata = &pdata_of;
 	}
 
-	if (pdata->num_regulators > num_regulators) {
-		dev_err(dev, "too many regulators: %d\n",
-			pdata->num_regulators);
-		return -EINVAL;
-	}
-
 	act8865 = devm_kzalloc(dev, sizeof(struct act8865), GFP_KERNEL);
 	if (!act8865)
 		return -ENOMEM;
 
-	act8865->regmap = devm_regmap_init_i2c(client, &act8865_regmap_config);
+	act8865->regmap = devm_regmap_init_i2c(client, regmap_config);
 	if (IS_ERR(act8865->regmap)) {
 		ret = PTR_ERR(act8865->regmap);
-		dev_err(&client->dev, "Failed to allocate register map: %d\n",
-			ret);
+		dev_err(dev, "Failed to allocate register map: %d\n", ret);
 		return ret;
 	}
 
@@ -518,15 +575,20 @@
 	for (i = 0; i < num_regulators; i++) {
 		const struct regulator_desc *desc = &regulators[i];
 		struct regulator_config config = { };
+		struct act8865_regulator_data *rdata;
 		struct regulator_dev *rdev;
 
 		config.dev = dev;
-		config.init_data = act8865_get_init_data(desc->id, pdata);
-		config.of_node = of_node[i];
 		config.driver_data = act8865;
 		config.regmap = act8865->regmap;
 
-		rdev = devm_regulator_register(&client->dev, desc, &config);
+		rdata = act8865_get_regulator_data(desc->id, pdata);
+		if (rdata) {
+			config.init_data = rdata->init_data;
+			config.of_node = rdata->of_node;
+		}
+
+		rdev = devm_regulator_register(dev, desc, &config);
 		if (IS_ERR(rdev)) {
 			dev_err(dev, "failed to register %s\n", desc->name);
 			return PTR_ERR(rdev);
@@ -534,7 +596,6 @@
 	}
 
 	i2c_set_clientdata(client, act8865);
-	devm_kfree(dev, of_node);
 
 	return 0;
 }

diff --git a/drivers/regulator/as3722-regulator.c b/drivers/regulator/as3722-regulator.c
index 8b046ee..66337e1 100644
--- a/drivers/regulator/as3722-regulator.c
+++ b/drivers/regulator/as3722-regulator.c

@@ -372,7 +372,7 @@
 			AS3722_LDO_ILIMIT_MASK, reg);
 }
 
-static struct regulator_ops as3722_ldo0_ops = {
+static const struct regulator_ops as3722_ldo0_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -383,7 +383,7 @@
 	.set_current_limit = as3722_ldo_set_current_limit,
 };
 
-static struct regulator_ops as3722_ldo0_extcntrl_ops = {
+static const struct regulator_ops as3722_ldo0_extcntrl_ops = {
 	.list_voltage = regulator_list_voltage_linear,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
@@ -415,7 +415,7 @@
 	return 150000;
 }
 
-static struct regulator_ops as3722_ldo3_ops = {
+static const struct regulator_ops as3722_ldo3_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -425,20 +425,45 @@
 	.get_current_limit = as3722_ldo3_get_current_limit,
 };
 
-static struct regulator_ops as3722_ldo3_extcntrl_ops = {
+static const struct regulator_ops as3722_ldo3_extcntrl_ops = {
 	.list_voltage = regulator_list_voltage_linear,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_current_limit = as3722_ldo3_get_current_limit,
 };
 
+static const struct regulator_ops as3722_ldo6_ops = {
+	.is_enabled = regulator_is_enabled_regmap,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.map_voltage = regulator_map_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear_range,
+	.get_current_limit = as3722_ldo_get_current_limit,
+	.set_current_limit = as3722_ldo_set_current_limit,
+	.get_bypass = regulator_get_bypass_regmap,
+	.set_bypass = regulator_set_bypass_regmap,
+};
+
+static const struct regulator_ops as3722_ldo6_extcntrl_ops = {
+	.map_voltage = regulator_map_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear_range,
+	.get_current_limit = as3722_ldo_get_current_limit,
+	.set_current_limit = as3722_ldo_set_current_limit,
+	.get_bypass = regulator_get_bypass_regmap,
+	.set_bypass = regulator_set_bypass_regmap,
+};
+
 static const struct regulator_linear_range as3722_ldo_ranges[] = {
 	REGULATOR_LINEAR_RANGE(0, 0x00, 0x00, 0),
 	REGULATOR_LINEAR_RANGE(825000, 0x01, 0x24, 25000),
 	REGULATOR_LINEAR_RANGE(1725000, 0x40, 0x7F, 25000),
 };
 
-static struct regulator_ops as3722_ldo_ops = {
+static const struct regulator_ops as3722_ldo_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -450,7 +475,7 @@
 	.set_current_limit = as3722_ldo_set_current_limit,
 };
 
-static struct regulator_ops as3722_ldo_extcntrl_ops = {
+static const struct regulator_ops as3722_ldo_extcntrl_ops = {
 	.map_voltage = regulator_map_voltage_linear_range,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -616,7 +641,7 @@
 	REGULATOR_LINEAR_RANGE(2650000, 0x71, 0x7F, 50000),
 };
 
-static struct regulator_ops as3722_sd016_ops = {
+static const struct regulator_ops as3722_sd016_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -630,7 +655,7 @@
 	.set_mode = as3722_sd_set_mode,
 };
 
-static struct regulator_ops as3722_sd016_extcntrl_ops = {
+static const struct regulator_ops as3722_sd016_extcntrl_ops = {
 	.list_voltage = regulator_list_voltage_linear,
 	.map_voltage = regulator_map_voltage_linear,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -641,7 +666,7 @@
 	.set_mode = as3722_sd_set_mode,
 };
 
-static struct regulator_ops as3722_sd2345_ops = {
+static const struct regulator_ops as3722_sd2345_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -653,7 +678,7 @@
 	.set_mode = as3722_sd_set_mode,
 };
 
-static struct regulator_ops as3722_sd2345_extcntrl_ops = {
+static const struct regulator_ops as3722_sd2345_extcntrl_ops = {
 	.list_voltage = regulator_list_voltage_linear_range,
 	.map_voltage = regulator_map_voltage_linear_range,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
@@ -760,7 +785,7 @@
 	struct as3722_regulator_config_data *reg_config;
 	struct regulator_dev *rdev;
 	struct regulator_config config = { };
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	int id;
 	int ret;
 
@@ -829,6 +854,24 @@
 				}
 			}
 			break;
+		case AS3722_REGULATOR_ID_LDO6:
+			if (reg_config->ext_control)
+				ops = &as3722_ldo6_extcntrl_ops;
+			else
+				ops = &as3722_ldo6_ops;
+			as3722_regs->desc[id].enable_time = 500;
+			as3722_regs->desc[id].bypass_reg =
+						AS3722_LDO6_VOLTAGE_REG;
+			as3722_regs->desc[id].bypass_mask =
+						AS3722_LDO_VSEL_MASK;
+			as3722_regs->desc[id].bypass_val_on =
+						AS3722_LDO6_VSEL_BYPASS;
+			as3722_regs->desc[id].bypass_val_off =
+						AS3722_LDO6_VSEL_BYPASS;
+			as3722_regs->desc[id].linear_ranges = as3722_ldo_ranges;
+			as3722_regs->desc[id].n_linear_ranges =
+						ARRAY_SIZE(as3722_ldo_ranges);
+			break;
 		case AS3722_REGULATOR_ID_SD0:
 		case AS3722_REGULATOR_ID_SD1:
 		case AS3722_REGULATOR_ID_SD6:

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index e0b7642..ec8184d5 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c

@@ -132,6 +132,19 @@
 	return has_full_constraints || of_have_populated_dt();
 }
 
+static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
+{
+	if (!rdev->constraints) {
+		rdev_err(rdev, "no constraints\n");
+		return false;
+	}
+
+	if (rdev->constraints->valid_ops_mask & ops)
+		return true;
+
+	return false;
+}
+
 static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
 {
 	if (rdev && rdev->supply)
@@ -198,28 +211,13 @@
 	return regnode;
 }
 
-static int _regulator_can_change_status(struct regulator_dev *rdev)
-{
-	if (!rdev->constraints)
-		return 0;
-
-	if (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_STATUS)
-		return 1;
-	else
-		return 0;
-}
-
 /* Platform voltage constraint check */
 static int regulator_check_voltage(struct regulator_dev *rdev,
 				   int *min_uV, int *max_uV)
 {
 	BUG_ON(*min_uV > *max_uV);
 
-	if (!rdev->constraints) {
-		rdev_err(rdev, "no constraints\n");
-		return -ENODEV;
-	}
-	if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
 		rdev_err(rdev, "voltage operation not allowed\n");
 		return -EPERM;
 	}
@@ -275,11 +273,7 @@
 {
 	BUG_ON(*min_uA > *max_uA);
 
-	if (!rdev->constraints) {
-		rdev_err(rdev, "no constraints\n");
-		return -ENODEV;
-	}
-	if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_CURRENT)) {
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_CURRENT)) {
 		rdev_err(rdev, "current operation not allowed\n");
 		return -EPERM;
 	}
@@ -312,11 +306,7 @@
 		return -EINVAL;
 	}
 
-	if (!rdev->constraints) {
-		rdev_err(rdev, "no constraints\n");
-		return -ENODEV;
-	}
-	if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_MODE)) {
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_MODE)) {
 		rdev_err(rdev, "mode operation not allowed\n");
 		return -EPERM;
 	}
@@ -333,20 +323,6 @@
 	return -EINVAL;
 }
 
-/* dynamic regulator mode switching constraint check */
-static int regulator_check_drms(struct regulator_dev *rdev)
-{
-	if (!rdev->constraints) {
-		rdev_err(rdev, "no constraints\n");
-		return -ENODEV;
-	}
-	if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) {
-		rdev_dbg(rdev, "drms operation not allowed\n");
-		return -EPERM;
-	}
-	return 0;
-}
-
 static ssize_t regulator_uV_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -692,8 +668,7 @@
 	 * first check to see if we can set modes at all, otherwise just
 	 * tell the consumer everything is OK.
 	 */
-	err = regulator_check_drms(rdev);
-	if (err < 0)
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS))
 		return 0;
 
 	if (!rdev->desc->ops->get_optimum_mode &&
@@ -808,8 +783,6 @@
 /* locks held by caller */
 static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state)
 {
-	lockdep_assert_held_once(&rdev->mutex);
-
 	if (!rdev->constraints)
 		return -EINVAL;
 
@@ -893,7 +866,7 @@
 	rdev_dbg(rdev, "%s\n", buf);
 
 	if ((constraints->min_uV != constraints->max_uV) &&
-	    !(constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE))
+	    !regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE))
 		rdev_warn(rdev,
 			  "Voltage range but no REGULATOR_CHANGE_VOLTAGE\n");
 }
@@ -906,7 +879,8 @@
 
 	/* do we need to apply the constraint voltage */
 	if (rdev->constraints->apply_uV &&
-	    rdev->constraints->min_uV == rdev->constraints->max_uV) {
+	    rdev->constraints->min_uV && rdev->constraints->max_uV) {
+		int target_min, target_max;
 		int current_uV = _regulator_get_voltage(rdev);
 		if (current_uV < 0) {
 			rdev_err(rdev,
@@ -914,15 +888,34 @@
 				 current_uV);
 			return current_uV;
 		}
-		if (current_uV < rdev->constraints->min_uV ||
-		    current_uV > rdev->constraints->max_uV) {
+
+		/*
+		 * If we're below the minimum voltage move up to the
+		 * minimum voltage, if we're above the maximum voltage
+		 * then move down to the maximum.
+		 */
+		target_min = current_uV;
+		target_max = current_uV;
+
+		if (current_uV < rdev->constraints->min_uV) {
+			target_min = rdev->constraints->min_uV;
+			target_max = rdev->constraints->min_uV;
+		}
+
+		if (current_uV > rdev->constraints->max_uV) {
+			target_min = rdev->constraints->max_uV;
+			target_max = rdev->constraints->max_uV;
+		}
+
+		if (target_min != current_uV || target_max != current_uV) {
+			rdev_info(rdev, "Bringing %duV into %d-%duV\n",
+				  current_uV, target_min, target_max);
 			ret = _regulator_do_set_voltage(
-				rdev, rdev->constraints->min_uV,
-				rdev->constraints->max_uV);
+				rdev, target_min, target_max);
 			if (ret < 0) {
 				rdev_err(rdev,
-					"failed to apply %duV constraint(%d)\n",
-					rdev->constraints->min_uV, ret);
+					"failed to apply %d-%duV constraint(%d)\n",
+					target_min, target_max, ret);
 				return ret;
 			}
 		}
@@ -1150,17 +1143,6 @@
 		}
 	}
 
-	if (rdev->constraints->active_discharge && ops->set_active_discharge) {
-		bool ad_state = (rdev->constraints->active_discharge ==
-			      REGULATOR_ACTIVE_DISCHARGE_ENABLE) ? true : false;
-
-		ret = ops->set_active_discharge(rdev, ad_state);
-		if (ret < 0) {
-			rdev_err(rdev, "failed to set active discharge\n");
-			return ret;
-		}
-	}
-
 	print_constraints(rdev);
 	return 0;
 }
@@ -1272,6 +1254,55 @@
 	}
 }
 
+#ifdef CONFIG_DEBUG_FS
+static ssize_t constraint_flags_read_file(struct file *file,
+					  char __user *user_buf,
+					  size_t count, loff_t *ppos)
+{
+	const struct regulator *regulator = file->private_data;
+	const struct regulation_constraints *c = regulator->rdev->constraints;
+	char *buf;
+	ssize_t ret;
+
+	if (!c)
+		return 0;
+
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = snprintf(buf, PAGE_SIZE,
+			"always_on: %u\n"
+			"boot_on: %u\n"
+			"apply_uV: %u\n"
+			"ramp_disable: %u\n"
+			"soft_start: %u\n"
+			"pull_down: %u\n"
+			"over_current_protection: %u\n",
+			c->always_on,
+			c->boot_on,
+			c->apply_uV,
+			c->ramp_disable,
+			c->soft_start,
+			c->pull_down,
+			c->over_current_protection);
+
+	ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
+	kfree(buf);
+
+	return ret;
+}
+
+#endif
+
+static const struct file_operations constraint_flags_fops = {
+#ifdef CONFIG_DEBUG_FS
+	.open = simple_open,
+	.read = constraint_flags_read_file,
+	.llseek = default_llseek,
+#endif
+};
+
 #define REG_STR_SIZE	64
 
 static struct regulator *create_regulator(struct regulator_dev *rdev,
@@ -1327,6 +1358,9 @@
 				   &regulator->min_uV);
 		debugfs_create_u32("max_uV", 0444, regulator->debugfs,
 				   &regulator->max_uV);
+		debugfs_create_file("constraint_flags", 0444,
+				    regulator->debugfs, regulator,
+				    &constraint_flags_fops);
 	}
 
 	/*
@@ -1334,7 +1368,7 @@
 	 * it is then we don't need to do nearly so much work for
 	 * enable/disable calls.
 	 */
-	if (!_regulator_can_change_status(rdev) &&
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS) &&
 	    _regulator_is_enabled(rdev))
 		regulator->always_on = true;
 
@@ -1532,10 +1566,11 @@
 	}
 
 	/* Cascade always-on state to supply */
-	if (_regulator_is_enabled(rdev) && rdev->supply) {
+	if (_regulator_is_enabled(rdev)) {
 		ret = regulator_enable(rdev->supply);
 		if (ret < 0) {
 			_regulator_put(rdev->supply);
+			rdev->supply = NULL;
 			return ret;
 		}
 	}
@@ -2111,15 +2146,15 @@
 	lockdep_assert_held_once(&rdev->mutex);
 
 	/* check voltage and requested load before enabling */
-	if (rdev->constraints &&
-	    (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS))
+	if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS))
 		drms_uA_update(rdev);
 
 	if (rdev->use_count == 0) {
 		/* The regulator may on if it's not switchable or left on */
 		ret = _regulator_is_enabled(rdev);
 		if (ret == -EINVAL || ret == 0) {
-			if (!_regulator_can_change_status(rdev))
+			if (!regulator_ops_is_valid(rdev,
+					REGULATOR_CHANGE_STATUS))
 				return -EPERM;
 
 			ret = _regulator_do_enable(rdev);
@@ -2221,7 +2256,7 @@
 	    (rdev->constraints && !rdev->constraints->always_on)) {
 
 		/* we are last user */
-		if (_regulator_can_change_status(rdev)) {
+		if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) {
 			ret = _notifier_call_chain(rdev,
 						   REGULATOR_EVENT_PRE_DISABLE,
 						   NULL);
@@ -2242,10 +2277,7 @@
 
 		rdev->use_count = 0;
 	} else if (rdev->use_count > 1) {
-
-		if (rdev->constraints &&
-			(rdev->constraints->valid_ops_mask &
-			REGULATOR_CHANGE_DRMS))
+		if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS))
 			drms_uA_update(rdev);
 
 		rdev->use_count--;
@@ -2489,8 +2521,7 @@
 {
 	struct regulator_dev	*rdev = regulator->rdev;
 
-	if (rdev->constraints &&
-	    (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+	if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
 		if (rdev->desc->n_voltages - rdev->desc->linear_min_sel > 1)
 			return 1;
 
@@ -2644,7 +2675,7 @@
 	int i, voltages, ret;
 
 	/* If we can't change voltage check the current voltage */
-	if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
 		ret = regulator_get_voltage(regulator);
 		if (ret >= 0)
 			return min_uV <= ret && ret <= max_uV;
@@ -2850,7 +2881,7 @@
 	 * return successfully even though the regulator does not support
 	 * changing the voltage.
 	 */
-	if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
 		current_uV = _regulator_get_voltage(rdev);
 		if (min_uV <= current_uV && current_uV <= max_uV) {
 			regulator->min_uV = min_uV;
@@ -3109,6 +3140,23 @@
 static int _regulator_get_voltage(struct regulator_dev *rdev)
 {
 	int sel, ret;
+	bool bypassed;
+
+	if (rdev->desc->ops->get_bypass) {
+		ret = rdev->desc->ops->get_bypass(rdev, &bypassed);
+		if (ret < 0)
+			return ret;
+		if (bypassed) {
+			/* if bypassed the regulator must have a supply */
+			if (!rdev->supply) {
+				rdev_err(rdev,
+					 "bypassed regulator has no supply!\n");
+				return -EPROBE_DEFER;
+			}
+
+			return _regulator_get_voltage(rdev->supply->rdev);
+		}
+	}
 
 	if (rdev->desc->ops->get_voltage_sel) {
 		sel = rdev->desc->ops->get_voltage_sel(rdev);
@@ -3365,8 +3413,7 @@
 	if (!rdev->desc->ops->set_bypass)
 		return 0;
 
-	if (rdev->constraints &&
-	    !(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_BYPASS))
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_BYPASS))
 		return 0;
 
 	mutex_lock(&rdev->mutex);
@@ -3840,6 +3887,16 @@
 			   &rdev->bypass_count);
 }
 
+static int regulator_register_resolve_supply(struct device *dev, void *data)
+{
+	struct regulator_dev *rdev = dev_to_rdev(dev);
+
+	if (regulator_resolve_supply(rdev))
+		rdev_dbg(rdev, "unable to resolve supply\n");
+
+	return 0;
+}
+
 /**
  * regulator_register - register regulator
  * @regulator_desc: regulator to register
@@ -3911,8 +3968,6 @@
 		rdev->dev.of_node = of_node_get(config->of_node);
 	}
 
-	mutex_lock(&regulator_list_mutex);
-
 	mutex_init(&rdev->mutex);
 	rdev->reg_data = config->driver_data;
 	rdev->owner = regulator_desc->owner;
@@ -3937,7 +3992,9 @@
 
 	if ((config->ena_gpio || config->ena_gpio_initialized) &&
 	    gpio_is_valid(config->ena_gpio)) {
+		mutex_lock(&regulator_list_mutex);
 		ret = regulator_ena_gpio_request(rdev, config);
+		mutex_unlock(&regulator_list_mutex);
 		if (ret != 0) {
 			rdev_err(rdev, "Failed to request enable GPIO%d: %d\n",
 				 config->ena_gpio, ret);
@@ -3950,63 +4007,73 @@
 	rdev->dev.parent = dev;
 	dev_set_name(&rdev->dev, "regulator.%lu",
 		    (unsigned long) atomic_inc_return(&regulator_no));
-	ret = device_register(&rdev->dev);
-	if (ret != 0) {
-		put_device(&rdev->dev);
-		goto wash;
-	}
-
-	dev_set_drvdata(&rdev->dev, rdev);
 
 	/* set regulator constraints */
 	if (init_data)
 		constraints = &init_data->constraints;
 
-	ret = set_machine_constraints(rdev, constraints);
-	if (ret < 0)
-		goto scrub;
-
 	if (init_data && init_data->supply_regulator)
 		rdev->supply_name = init_data->supply_regulator;
 	else if (regulator_desc->supply_name)
 		rdev->supply_name = regulator_desc->supply_name;
 
+	/*
+	 * Attempt to resolve the regulator supply, if specified,
+	 * but don't return an error if we fail because we will try
+	 * to resolve it again later as more regulators are added.
+	 */
+	if (regulator_resolve_supply(rdev))
+		rdev_dbg(rdev, "unable to resolve supply\n");
+
+	ret = set_machine_constraints(rdev, constraints);
+	if (ret < 0)
+		goto wash;
+
 	/* add consumers devices */
 	if (init_data) {
+		mutex_lock(&regulator_list_mutex);
 		for (i = 0; i < init_data->num_consumer_supplies; i++) {
 			ret = set_consumer_device_supply(rdev,
 				init_data->consumer_supplies[i].dev_name,
 				init_data->consumer_supplies[i].supply);
 			if (ret < 0) {
+				mutex_unlock(&regulator_list_mutex);
 				dev_err(dev, "Failed to set supply %s\n",
 					init_data->consumer_supplies[i].supply);
 				goto unset_supplies;
 			}
 		}
+		mutex_unlock(&regulator_list_mutex);
 	}
 
+	ret = device_register(&rdev->dev);
+	if (ret != 0) {
+		put_device(&rdev->dev);
+		goto unset_supplies;
+	}
+
+	dev_set_drvdata(&rdev->dev, rdev);
 	rdev_init_debugfs(rdev);
-out:
-	mutex_unlock(&regulator_list_mutex);
+
+	/* try to resolve regulators supply since a new one was registered */
+	class_for_each_device(&regulator_class, NULL, NULL,
+			      regulator_register_resolve_supply);
 	kfree(config);
 	return rdev;
 
 unset_supplies:
+	mutex_lock(&regulator_list_mutex);
 	unset_regulator_supplies(rdev);
-
-scrub:
-	regulator_ena_gpio_free(rdev);
-	device_unregister(&rdev->dev);
-	/* device core frees rdev */
-	rdev = ERR_PTR(ret);
-	goto out;
-
+	mutex_unlock(&regulator_list_mutex);
 wash:
+	kfree(rdev->constraints);
+	mutex_lock(&regulator_list_mutex);
 	regulator_ena_gpio_free(rdev);
+	mutex_unlock(&regulator_list_mutex);
 clean:
 	kfree(rdev);
-	rdev = ERR_PTR(ret);
-	goto out;
+	kfree(config);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(regulator_register);
 
@@ -4032,8 +4099,8 @@
 	WARN_ON(rdev->open_count);
 	unset_regulator_supplies(rdev);
 	list_del(&rdev->list);
-	mutex_unlock(&regulator_list_mutex);
 	regulator_ena_gpio_free(rdev);
+	mutex_unlock(&regulator_list_mutex);
 	device_unregister(&rdev->dev);
 }
 EXPORT_SYMBOL_GPL(regulator_unregister);
@@ -4386,7 +4453,7 @@
 	if (c && c->always_on)
 		return 0;
 
-	if (c && !(c->valid_ops_mask & REGULATOR_CHANGE_STATUS))
+	if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS))
 		return 0;
 
 	mutex_lock(&rdev->mutex);

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 2cb5cc3..d7da81a 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c

@@ -65,6 +65,13 @@
 	FAN53555_CHIP_ID_03,
 	FAN53555_CHIP_ID_04,
 	FAN53555_CHIP_ID_05,
+	FAN53555_CHIP_ID_08 = 8,
+};
+
+/* IC mask revision */
+enum {
+	FAN53555_CHIP_REV_00 = 0x3,
+	FAN53555_CHIP_REV_13 = 0xf,
 };
 
 enum {
@@ -217,9 +224,26 @@
 	/* Init voltage range and step */
 	switch (di->chip_id) {
 	case FAN53555_CHIP_ID_00:
+		switch (di->chip_rev) {
+		case FAN53555_CHIP_REV_00:
+			di->vsel_min = 600000;
+			di->vsel_step = 10000;
+			break;
+		case FAN53555_CHIP_REV_13:
+			di->vsel_min = 800000;
+			di->vsel_step = 10000;
+			break;
+		default:
+			dev_err(di->dev,
+				"Chip ID %d with rev %d not supported!\n",
+				di->chip_id, di->chip_rev);
+			return -EINVAL;
+		}
+		break;
 	case FAN53555_CHIP_ID_01:
 	case FAN53555_CHIP_ID_03:
 	case FAN53555_CHIP_ID_05:
+	case FAN53555_CHIP_ID_08:
 		di->vsel_min = 600000;
 		di->vsel_step = 10000;
 		break;

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index b1e32e7..bcf38fd 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c

@@ -460,7 +460,7 @@
 	if (ret != 0)
 		return ret;
 
-	*enable = val & rdev->desc->bypass_mask;
+	*enable = (val & rdev->desc->bypass_mask) == rdev->desc->bypass_val_on;
 
 	return 0;
 }

diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
index 15c25c6..204b5c5 100644
--- a/drivers/regulator/lp3971.c
+++ b/drivers/regulator/lp3971.c

@@ -365,8 +365,8 @@
 	mutex_lock(&lp3971->io_lock);
 
 	ret = lp3971_i2c_read(lp3971->i2c, reg, 1, &tmp);
-	tmp = (tmp & ~mask) | val;
 	if (ret == 0) {
+		tmp = (tmp & ~mask) | val;
 		ret = lp3971_i2c_write(lp3971->i2c, reg, 1, &tmp);
 		dev_dbg(lp3971->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg,
 			(unsigned)val&0xff);

diff --git a/drivers/regulator/lp3972.c b/drivers/regulator/lp3972.c
index 3a7e96e..ff0c275 100644
--- a/drivers/regulator/lp3972.c
+++ b/drivers/regulator/lp3972.c

@@ -211,8 +211,8 @@
 	mutex_lock(&lp3972->io_lock);
 
 	ret = lp3972_i2c_read(lp3972->i2c, reg, 1, &tmp);
-	tmp = (tmp & ~mask) | val;
 	if (ret == 0) {
+		tmp = (tmp & ~mask) | val;
 		ret = lp3972_i2c_write(lp3972->i2c, reg, 1, &tmp);
 		dev_dbg(lp3972->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg,
 			(unsigned)val & 0xff);

diff --git a/drivers/regulator/lp873x-regulator.c b/drivers/regulator/lp873x-regulator.c
new file mode 100644
index 0000000..b4ffd11
--- /dev/null
+++ b/drivers/regulator/lp873x-regulator.c

@@ -0,0 +1,241 @@
+/*
+ * Regulator driver for LP873X PMIC
+ *
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/lp873x.h>
+
+#define LP873X_REGULATOR(_name, _id, _of, _ops, _n, _vr, _vm, _er, _em, \
+			 _delay, _lr, _nlr, _cr)			\
+	[_id] = {							\
+		.desc = {						\
+			.name			= _name,		\
+			.id			= _id,			\
+			.of_match		= of_match_ptr(_of),	\
+			.regulators_node	= of_match_ptr("regulators"),\
+			.ops			= &_ops,		\
+			.n_voltages		= _n,			\
+			.type			= REGULATOR_VOLTAGE,	\
+			.owner			= THIS_MODULE,		\
+			.vsel_reg		= _vr,			\
+			.vsel_mask		= _vm,			\
+			.enable_reg		= _er,			\
+			.enable_mask		= _em,			\
+			.ramp_delay		= _delay,		\
+			.linear_ranges		= _lr,			\
+			.n_linear_ranges	= _nlr,			\
+		},							\
+		.ctrl2_reg = _cr,					\
+	}
+
+struct lp873x_regulator {
+	struct regulator_desc desc;
+	unsigned int ctrl2_reg;
+};
+
+static const struct lp873x_regulator regulators[];
+
+static const struct regulator_linear_range buck0_buck1_ranges[] = {
+	REGULATOR_LINEAR_RANGE(0, 0x0, 0x13, 0),
+	REGULATOR_LINEAR_RANGE(700000, 0x14, 0x17, 10000),
+	REGULATOR_LINEAR_RANGE(735000, 0x18, 0x9d, 5000),
+	REGULATOR_LINEAR_RANGE(1420000, 0x9e, 0xff, 20000),
+};
+
+static const struct regulator_linear_range ldo0_ldo1_ranges[] = {
+	REGULATOR_LINEAR_RANGE(800000, 0x0, 0x19, 100000),
+};
+
+static unsigned int lp873x_buck_ramp_delay[] = {
+	30000, 15000, 10000, 7500, 3800, 1900, 940, 470
+};
+
+/* LP873X BUCK current limit */
+static const unsigned int lp873x_buck_uA[] = {
+	1500000, 2000000, 2500000, 3000000, 3500000, 4000000,
+};
+
+static int lp873x_buck_set_ramp_delay(struct regulator_dev *rdev,
+				      int ramp_delay)
+{
+	int id = rdev_get_id(rdev);
+	struct lp873x *lp873 = rdev_get_drvdata(rdev);
+	unsigned int reg;
+	int ret;
+
+	if (ramp_delay <= 470)
+		reg = 7;
+	else if (ramp_delay <= 940)
+		reg = 6;
+	else if (ramp_delay <= 1900)
+		reg = 5;
+	else if (ramp_delay <= 3800)
+		reg = 4;
+	else if (ramp_delay <= 7500)
+		reg = 3;
+	else if (ramp_delay <= 10000)
+		reg = 2;
+	else if (ramp_delay <= 15000)
+		reg = 1;
+	else
+		reg = 0;
+
+	ret = regmap_update_bits(lp873->regmap, regulators[id].ctrl2_reg,
+				 LP873X_BUCK0_CTRL_2_BUCK0_SLEW_RATE,
+				 reg << __ffs(LP873X_BUCK0_CTRL_2_BUCK0_SLEW_RATE));
+	if (ret) {
+		dev_err(lp873->dev, "SLEW RATE write failed: %d\n", ret);
+		return ret;
+	}
+
+	rdev->constraints->ramp_delay = lp873x_buck_ramp_delay[reg];
+
+	return 0;
+}
+
+static int lp873x_buck_set_current_limit(struct regulator_dev *rdev,
+					 int min_uA, int max_uA)
+{
+	int id = rdev_get_id(rdev);
+	struct lp873x *lp873 = rdev_get_drvdata(rdev);
+	int i;
+
+	for (i = ARRAY_SIZE(lp873x_buck_uA) - 1; i >= 0; i--) {
+		if (lp873x_buck_uA[i] >= min_uA &&
+		    lp873x_buck_uA[i] <= max_uA)
+			return regmap_update_bits(lp873->regmap,
+						  regulators[id].ctrl2_reg,
+						  LP873X_BUCK0_CTRL_2_BUCK0_ILIM,
+						  i << __ffs(LP873X_BUCK0_CTRL_2_BUCK0_ILIM));
+	}
+
+	return -EINVAL;
+}
+
+static int lp873x_buck_get_current_limit(struct regulator_dev *rdev)
+{
+	int id = rdev_get_id(rdev);
+	struct lp873x *lp873 = rdev_get_drvdata(rdev);
+	int ret;
+	unsigned int val;
+
+	ret = regmap_read(lp873->regmap, regulators[id].ctrl2_reg, &val);
+	if (ret)
+		return ret;
+
+	val = (val & LP873X_BUCK0_CTRL_2_BUCK0_ILIM) >>
+	       __ffs(LP873X_BUCK0_CTRL_2_BUCK0_ILIM);
+
+	return (val < ARRAY_SIZE(lp873x_buck_uA)) ?
+			lp873x_buck_uA[val] : -EINVAL;
+}
+
+/* Operations permitted on BUCK0, BUCK1 */
+static struct regulator_ops lp873x_buck01_ops = {
+	.is_enabled		= regulator_is_enabled_regmap,
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+	.list_voltage		= regulator_list_voltage_linear_range,
+	.map_voltage		= regulator_map_voltage_linear_range,
+	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
+	.set_ramp_delay		= lp873x_buck_set_ramp_delay,
+	.set_current_limit	= lp873x_buck_set_current_limit,
+	.get_current_limit	= lp873x_buck_get_current_limit,
+};
+
+/* Operations permitted on LDO0 and LDO1 */
+static struct regulator_ops lp873x_ldo01_ops = {
+	.is_enabled		= regulator_is_enabled_regmap,
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+	.list_voltage		= regulator_list_voltage_linear_range,
+	.map_voltage		= regulator_map_voltage_linear_range,
+};
+
+static const struct lp873x_regulator regulators[] = {
+	LP873X_REGULATOR("BUCK0", LP873X_BUCK_0, "buck0", lp873x_buck01_ops,
+			 256, LP873X_REG_BUCK0_VOUT,
+			 LP873X_BUCK0_VOUT_BUCK0_VSET, LP873X_REG_BUCK0_CTRL_1,
+			 LP873X_BUCK0_CTRL_1_BUCK0_EN, 10000,
+			 buck0_buck1_ranges, 4, LP873X_REG_BUCK0_CTRL_2),
+	LP873X_REGULATOR("BUCK1", LP873X_BUCK_1, "buck1", lp873x_buck01_ops,
+			 256, LP873X_REG_BUCK1_VOUT,
+			 LP873X_BUCK1_VOUT_BUCK1_VSET, LP873X_REG_BUCK1_CTRL_1,
+			 LP873X_BUCK1_CTRL_1_BUCK1_EN, 10000,
+			 buck0_buck1_ranges, 4, LP873X_REG_BUCK1_CTRL_2),
+	LP873X_REGULATOR("LDO0", LP873X_LDO_0, "ldo0", lp873x_ldo01_ops, 26,
+			 LP873X_REG_LDO0_VOUT, LP873X_LDO0_VOUT_LDO0_VSET,
+			 LP873X_REG_LDO0_CTRL,
+			 LP873X_LDO0_CTRL_LDO0_EN, 0, ldo0_ldo1_ranges, 1,
+			 0xFF),
+	LP873X_REGULATOR("LDO1", LP873X_LDO_1, "ldo1", lp873x_ldo01_ops, 26,
+			 LP873X_REG_LDO1_VOUT, LP873X_LDO1_VOUT_LDO1_VSET,
+			 LP873X_REG_LDO1_CTRL,
+			 LP873X_LDO1_CTRL_LDO1_EN, 0, ldo0_ldo1_ranges, 1,
+			 0xFF),
+};
+
+static int lp873x_regulator_probe(struct platform_device *pdev)
+{
+	struct lp873x *lp873 = dev_get_drvdata(pdev->dev.parent);
+	struct regulator_config config = { };
+	struct regulator_dev *rdev;
+	int i;
+
+	platform_set_drvdata(pdev, lp873);
+
+	config.dev = &pdev->dev;
+	config.dev->of_node = lp873->dev->of_node;
+	config.driver_data = lp873;
+	config.regmap = lp873->regmap;
+
+	for (i = 0; i < ARRAY_SIZE(regulators); i++) {
+		rdev = devm_regulator_register(&pdev->dev, &regulators[i].desc,
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(lp873->dev, "failed to register %s regulator\n",
+				pdev->name);
+			return PTR_ERR(rdev);
+		}
+	}
+
+	return 0;
+}
+
+static const struct platform_device_id lp873x_regulator_id_table[] = {
+	{ "lp873x-regulator", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, lp873x_regulator_id_table);
+
+static struct platform_driver lp873x_regulator_driver = {
+	.driver = {
+		.name = "lp873x-pmic",
+	},
+	.probe = lp873x_regulator_probe,
+	.id_table = lp873x_regulator_id_table,
+};
+module_platform_driver(lp873x_regulator_driver);
+
+MODULE_AUTHOR("J Keerthy <j-keerthy@ti.com>");
+MODULE_DESCRIPTION("LP873X voltage regulator driver");
+MODULE_ALIAS("platform:lp873x-pmic");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/regulator/max14577.c b/drivers/regulator/max14577-regulator.c
similarity index 100%
rename from drivers/regulator/max14577.c
rename to drivers/regulator/max14577-regulator.c


diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 73a3356..321e804 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c

@@ -81,6 +81,7 @@
 	int suspend_fps_pd_slot;
 	int suspend_fps_pu_slot;
 	int current_mode;
+	int ramp_rate_setting;
 };
 
 struct max77620_regulator {
@@ -307,6 +308,43 @@
 	return 0;
 }
 
+static int max77620_set_slew_rate(struct max77620_regulator *pmic, int id,
+				  int slew_rate)
+{
+	struct max77620_regulator_info *rinfo = pmic->rinfo[id];
+	unsigned int val;
+	int ret;
+	u8 mask;
+
+	if (rinfo->type == MAX77620_REGULATOR_TYPE_SD) {
+		if (slew_rate <= 13750)
+			val = 0;
+		else if (slew_rate <= 27500)
+			val = 1;
+		else if (slew_rate <= 55000)
+			val = 2;
+		else
+			val = 3;
+		val <<= MAX77620_SD_SR_SHIFT;
+		mask = MAX77620_SD_SR_MASK;
+	} else {
+		if (slew_rate <= 5000)
+			val = 1;
+		else
+			val = 0;
+		mask = MAX77620_LDO_SLEW_RATE_MASK;
+	}
+
+	ret = regmap_update_bits(pmic->rmap, rinfo->cfg_addr, mask, val);
+	if (ret < 0) {
+		dev_err(pmic->dev, "Regulator %d slew rate set failed: %d\n",
+			id, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int max77620_init_pmic(struct max77620_regulator *pmic, int id)
 {
 	struct max77620_regulator_pdata *rpdata = &pmic->reg_pdata[id];
@@ -351,6 +389,13 @@
 	if (ret < 0)
 		return ret;
 
+	if (rpdata->ramp_rate_setting) {
+		ret = max77620_set_slew_rate(pmic, id,
+					     rpdata->ramp_rate_setting);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -502,35 +547,16 @@
 {
 	struct max77620_regulator *pmic = rdev_get_drvdata(rdev);
 	int id = rdev_get_id(rdev);
-	struct max77620_regulator_info *rinfo = pmic->rinfo[id];
-	int ret, val;
-	u8 mask;
+	struct max77620_regulator_pdata *rpdata = &pmic->reg_pdata[id];
 
-	if (rinfo->type == MAX77620_REGULATOR_TYPE_SD) {
-		if (ramp_delay <= 13750)
-			val = 0;
-		else if (ramp_delay <= 27500)
-			val = 1;
-		else if (ramp_delay <= 55000)
-			val = 2;
-		else
-			val = 3;
-		val <<= MAX77620_SD_SR_SHIFT;
-		mask = MAX77620_SD_SR_MASK;
-	} else {
-		if (ramp_delay <= 5000)
-			val = 1;
-		else
-			val = 0;
-		mask = MAX77620_LDO_SLEW_RATE_MASK;
-	}
+	/* Device specific ramp rate setting tells that platform has
+	 * different ramp rate from advertised value. In this case,
+	 * do not configure anything and just return success.
+	 */
+	if (rpdata->ramp_rate_setting)
+		return 0;
 
-	ret = regmap_update_bits(pmic->rmap, rinfo->cfg_addr, mask, val);
-	if (ret < 0)
-		dev_err(pmic->dev, "Reg 0x%02x update failed: %d\n",
-			rinfo->cfg_addr, ret);
-
-	return ret;
+	return max77620_set_slew_rate(pmic, id, ramp_delay);
 }
 
 static int max77620_of_parse_cb(struct device_node *np,
@@ -563,6 +589,9 @@
 			np, "maxim,suspend-fps-power-down-slot", &pval);
 	rpdata->suspend_fps_pd_slot = (!ret) ? pval : -1;
 
+	ret = of_property_read_u32(np, "maxim,ramp-rate-setting", &pval);
+	rpdata->ramp_rate_setting = (!ret) ? pval : 0;
+
 	return max77620_init_pmic(pmic, desc->id);
 }
 

diff --git a/drivers/regulator/max77686-regulator.c b/drivers/regulator/max77686-regulator.c
index 17ccf36..ac4fa58 100644
--- a/drivers/regulator/max77686-regulator.c
+++ b/drivers/regulator/max77686-regulator.c

@@ -41,6 +41,8 @@
 #define MAX77686_LDO_LOW_UVSTEP	25000
 #define MAX77686_BUCK_MINUV	750000
 #define MAX77686_BUCK_UVSTEP	50000
+#define MAX77686_BUCK_ENABLE_TIME	40		/* us */
+#define MAX77686_DVS_ENABLE_TIME	22		/* us */
 #define MAX77686_RAMP_DELAY	100000			/* uV/us */
 #define MAX77686_DVS_RAMP_DELAY	27500			/* uV/us */
 #define MAX77686_DVS_MINUV	600000
@@ -422,6 +424,7 @@
 	.min_uV		= MAX77686_BUCK_MINUV,				\
 	.uV_step	= MAX77686_BUCK_UVSTEP,				\
 	.ramp_delay	= MAX77686_RAMP_DELAY,				\
+	.enable_time	= MAX77686_BUCK_ENABLE_TIME,			\
 	.n_voltages	= MAX77686_VSEL_MASK + 1,			\
 	.vsel_reg	= MAX77686_REG_BUCK5OUT + (num - 5) * 2,	\
 	.vsel_mask	= MAX77686_VSEL_MASK,				\
@@ -439,6 +442,7 @@
 	.min_uV		= MAX77686_BUCK_MINUV,				\
 	.uV_step	= MAX77686_BUCK_UVSTEP,				\
 	.ramp_delay	= MAX77686_RAMP_DELAY,				\
+	.enable_time	= MAX77686_BUCK_ENABLE_TIME,			\
 	.n_voltages	= MAX77686_VSEL_MASK + 1,			\
 	.vsel_reg	= MAX77686_REG_BUCK1OUT,			\
 	.vsel_mask	= MAX77686_VSEL_MASK,				\
@@ -456,6 +460,7 @@
 	.min_uV		= MAX77686_DVS_MINUV,				\
 	.uV_step	= MAX77686_DVS_UVSTEP,				\
 	.ramp_delay	= MAX77686_DVS_RAMP_DELAY,			\
+	.enable_time	= MAX77686_DVS_ENABLE_TIME,			\
 	.n_voltages	= MAX77686_DVS_VSEL_MASK + 1,			\
 	.vsel_reg	= MAX77686_REG_BUCK2DVS1 + (num - 2) * 10,	\
 	.vsel_mask	= MAX77686_DVS_VSEL_MASK,			\
@@ -553,17 +558,7 @@
 	.id_table = max77686_pmic_id,
 };
 
-static int __init max77686_pmic_init(void)
-{
-	return platform_driver_register(&max77686_pmic_driver);
-}
-subsys_initcall(max77686_pmic_init);
-
-static void __exit max77686_pmic_cleanup(void)
-{
-	platform_driver_unregister(&max77686_pmic_driver);
-}
-module_exit(max77686_pmic_cleanup);
+module_platform_driver(max77686_pmic_driver);
 
 MODULE_DESCRIPTION("MAXIM 77686 Regulator Driver");
 MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>");

diff --git a/drivers/regulator/max77693.c b/drivers/regulator/max77693-regulator.c
similarity index 100%
rename from drivers/regulator/max77693.c
rename to drivers/regulator/max77693-regulator.c


diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
index c07ee13..1d35393 100644
--- a/drivers/regulator/max77802-regulator.c
+++ b/drivers/regulator/max77802-regulator.c

@@ -5,7 +5,7 @@
  * Simon Glass <sjg@chromium.org>
  *
  * Copyright (C) 2012 Samsung Electronics
- * Chiwoong Byun <woong.byun@smasung.com>
+ * Chiwoong Byun <woong.byun@samsung.com>
  * Jonghwa Lee <jonghwa3.lee@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify

diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index 5b75b7c..08d2f13 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c

@@ -38,6 +38,9 @@
 #include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/regmap.h>
+#include <linux/thermal.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 
 /* Register definitions */
 #define MAX8973_VOUT					0x0
@@ -74,6 +77,7 @@
 #define MAX8973_WDTMR_ENABLE				BIT(6)
 #define MAX8973_DISCH_ENBABLE				BIT(5)
 #define MAX8973_FT_ENABLE				BIT(4)
+#define MAX77621_T_JUNCTION_120				BIT(7)
 
 #define MAX8973_CKKADV_TRIP_MASK			0xC
 #define MAX8973_CKKADV_TRIP_DISABLE			0xC
@@ -93,6 +97,12 @@
 #define MAX8973_VOLATGE_STEP				6250
 #define MAX8973_BUCK_N_VOLTAGE				0x80
 
+#define MAX77621_CHIPID_TJINT_S				BIT(0)
+
+#define MAX77621_NORMAL_OPERATING_TEMP			100000
+#define MAX77621_TJINT_WARNING_TEMP_120			120000
+#define MAX77621_TJINT_WARNING_TEMP_140			140000
+
 enum device_id {
 	MAX8973,
 	MAX77621
@@ -112,6 +122,9 @@
 	int curr_gpio_val;
 	struct regulator_ops ops;
 	enum device_id id;
+	int junction_temp_warning;
+	int irq;
+	struct thermal_zone_device *tz_device;
 };
 
 /*
@@ -391,6 +404,10 @@
 	if (pdata->control_flags & MAX8973_CONTROL_FREQ_SHIFT_9PER_ENABLE)
 		control1 |= MAX8973_FREQSHIFT_9PER;
 
+	if ((pdata->junction_temp_warning == MAX77621_TJINT_WARNING_TEMP_120) &&
+	    (max->id == MAX77621))
+		control2 |= MAX77621_T_JUNCTION_120;
+
 	if (!(pdata->control_flags & MAX8973_CONTROL_PULL_DOWN_ENABLE))
 		control2 |= MAX8973_DISCH_ENBABLE;
 
@@ -457,6 +474,79 @@
 	return ret;
 }
 
+static int max8973_thermal_read_temp(void *data, int *temp)
+{
+	struct max8973_chip *mchip = data;
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(mchip->regmap, MAX8973_CHIPID1, &val);
+	if (ret < 0) {
+		dev_err(mchip->dev, "Failed to read register CHIPID1, %d", ret);
+		return ret;
+	}
+
+	/* +1 degC to trigger cool devive */
+	if (val & MAX77621_CHIPID_TJINT_S)
+		*temp = mchip->junction_temp_warning + 1000;
+	else
+		*temp = MAX77621_NORMAL_OPERATING_TEMP;
+
+	return 0;
+}
+
+static irqreturn_t max8973_thermal_irq(int irq, void *data)
+{
+	struct max8973_chip *mchip = data;
+
+	thermal_zone_device_update(mchip->tz_device);
+
+	return IRQ_HANDLED;
+}
+
+static const struct thermal_zone_of_device_ops max77621_tz_ops = {
+	.get_temp = max8973_thermal_read_temp,
+};
+
+static int max8973_thermal_init(struct max8973_chip *mchip)
+{
+	struct thermal_zone_device *tzd;
+	struct irq_data *irq_data;
+	unsigned long irq_flags = 0;
+	int ret;
+
+	if (mchip->id != MAX77621)
+		return 0;
+
+	tzd = devm_thermal_zone_of_sensor_register(mchip->dev, 0, mchip,
+						   &max77621_tz_ops);
+	if (IS_ERR(tzd)) {
+		ret = PTR_ERR(tzd);
+		dev_err(mchip->dev, "Failed to register thermal sensor: %d\n",
+			ret);
+		return ret;
+	}
+
+	if (mchip->irq <= 0)
+		return 0;
+
+	irq_data = irq_get_irq_data(mchip->irq);
+	if (irq_data)
+		irq_flags = irqd_get_trigger_type(irq_data);
+
+	ret = devm_request_threaded_irq(mchip->dev, mchip->irq, NULL,
+					max8973_thermal_irq,
+					IRQF_ONESHOT | IRQF_SHARED | irq_flags,
+					dev_name(mchip->dev), mchip);
+	if (ret < 0) {
+		dev_err(mchip->dev, "Failed to request irq %d, %d\n",
+			mchip->irq, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static const struct regmap_config max8973_regmap_config = {
 	.reg_bits		= 8,
 	.val_bits		= 8,
@@ -521,6 +611,11 @@
 		pdata->control_flags |= MAX8973_CONTROL_CLKADV_TRIP_DISABLED;
 	}
 
+	pdata->junction_temp_warning = MAX77621_TJINT_WARNING_TEMP_140;
+	ret = of_property_read_u32(np, "junction-warn-millicelsius", &pval);
+	if (!ret && (pval <= MAX77621_TJINT_WARNING_TEMP_120))
+		pdata->junction_temp_warning = MAX77621_TJINT_WARNING_TEMP_120;
+
 	return pdata;
 }
 
@@ -608,6 +703,7 @@
 	max->enable_external_control = pdata->enable_ext_control;
 	max->curr_gpio_val = pdata->dvs_def_state;
 	max->curr_vout_reg = MAX8973_VOUT + pdata->dvs_def_state;
+	max->junction_temp_warning = pdata->junction_temp_warning;
 
 	if (gpio_is_valid(max->enable_gpio))
 		max->enable_external_control = true;
@@ -718,6 +814,7 @@
 		return ret;
 	}
 
+	max8973_thermal_init(max);
 	return 0;
 }
 

diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997-regulator.c
similarity index 99%
rename from drivers/regulator/max8997.c
rename to drivers/regulator/max8997-regulator.c
index ea0196d..efabc0e 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997-regulator.c

@@ -2,7 +2,7 @@
  * max8997.c - Regulator driver for the Maxim 8997/8966
  *
  * Copyright (C) 2011 Samsung Electronics
- * MyungJoo Ham <myungjoo.ham@smasung.com>
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by

diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 6b0aa80..cd828db 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c

@@ -45,9 +45,9 @@
 	/* Voltage change possible? */
 	if (constraints->min_uV != constraints->max_uV)
 		constraints->valid_ops_mask |= REGULATOR_CHANGE_VOLTAGE;
-	/* Only one voltage?  Then make sure it's set. */
-	if (constraints->min_uV && constraints->max_uV &&
-	    constraints->min_uV == constraints->max_uV)
+
+	/* Do we have a voltage range, if so try to apply it? */
+	if (constraints->min_uV && constraints->max_uV)
 		constraints->apply_uV = true;
 
 	if (!of_property_read_u32(np, "regulator-microvolt-offset", &pval))

diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 6efc7ee..f11d41d 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c

@@ -944,6 +944,8 @@
 			if (id == PALMAS_REG_LDO9) {
 				desc->ops = &palmas_ops_ldo9;
 				desc->bypass_reg = desc->enable_reg;
+				desc->bypass_val_on =
+						PALMAS_LDO9_CTRL_LDO_BYPASS_EN;
 				desc->bypass_mask =
 						PALMAS_LDO9_CTRL_LDO_BYPASS_EN;
 			}
@@ -1055,6 +1057,8 @@
 			    id == TPS65917_REG_LDO2) {
 				desc->ops = &tps65917_ops_ldo_1_2;
 				desc->bypass_reg = desc->enable_reg;
+				desc->bypass_val_on =
+						TPS65917_LDO1_CTRL_BYPASS_EN;
 				desc->bypass_mask =
 						TPS65917_LDO1_CTRL_BYPASS_EN;
 			}
@@ -1206,6 +1210,7 @@
 				desc->enable_mask = SMPS10_BOOST_EN;
 			desc->bypass_reg = PALMAS_BASE_TO_REG(PALMAS_SMPS_BASE,
 							    PALMAS_SMPS10_CTRL);
+			desc->bypass_val_on = SMPS10_BYPASS_EN;
 			desc->bypass_mask = SMPS10_BYPASS_EN;
 			desc->min_uV = 3750000;
 			desc->uV_step = 1250000;
@@ -1462,10 +1467,10 @@
 	.ldo_register = tps65917_ldo_registration,
 };
 
-static void palmas_dt_to_pdata(struct device *dev,
-			       struct device_node *node,
-			       struct palmas_pmic_platform_data *pdata,
-			       struct palmas_pmic_driver_data *ddata)
+static int palmas_dt_to_pdata(struct device *dev,
+			      struct device_node *node,
+			      struct palmas_pmic_platform_data *pdata,
+			      struct palmas_pmic_driver_data *ddata)
 {
 	struct device_node *regulators;
 	u32 prop;
@@ -1474,7 +1479,7 @@
 	regulators = of_get_child_by_name(node, "regulators");
 	if (!regulators) {
 		dev_info(dev, "regulator node not found\n");
-		return;
+		return 0;
 	}
 
 	ret = of_regulator_match(dev, regulators, ddata->palmas_matches,
@@ -1482,25 +1487,29 @@
 	of_node_put(regulators);
 	if (ret < 0) {
 		dev_err(dev, "Error parsing regulator init data: %d\n", ret);
-		return;
+		return 0;
 	}
 
 	for (idx = 0; idx < ddata->max_reg; idx++) {
-		if (!ddata->palmas_matches[idx].init_data ||
-		    !ddata->palmas_matches[idx].of_node)
+		static struct of_regulator_match *match;
+		struct palmas_reg_init *rinit;
+		struct device_node *np;
+
+		match = &ddata->palmas_matches[idx];
+		np = match->of_node;
+
+		if (!match->init_data || !np)
 			continue;
 
-		pdata->reg_data[idx] = ddata->palmas_matches[idx].init_data;
+		rinit = devm_kzalloc(dev, sizeof(*rinit), GFP_KERNEL);
+		if (!rinit)
+			return -ENOMEM;
 
-		pdata->reg_init[idx] = devm_kzalloc(dev,
-				sizeof(struct palmas_reg_init), GFP_KERNEL);
+		pdata->reg_data[idx] = match->init_data;
+		pdata->reg_init[idx] = rinit;
 
-		pdata->reg_init[idx]->warm_reset =
-			of_property_read_bool(ddata->palmas_matches[idx].of_node,
-					      "ti,warm-reset");
-
-		ret = of_property_read_u32(ddata->palmas_matches[idx].of_node,
-					   "ti,roof-floor", &prop);
+		rinit->warm_reset = of_property_read_bool(np, "ti,warm-reset");
+		ret = of_property_read_u32(np, "ti,roof-floor", &prop);
 		/* EINVAL: Property not found */
 		if (ret != -EINVAL) {
 			int econtrol;
@@ -1522,31 +1531,29 @@
 					WARN_ON(1);
 					dev_warn(dev,
 						 "%s: Invalid roof-floor option: %u\n",
-					     palmas_matches[idx].name, prop);
+						 match->name, prop);
 					break;
 				}
 			}
-			pdata->reg_init[idx]->roof_floor = econtrol;
+			rinit->roof_floor = econtrol;
 		}
 
-		ret = of_property_read_u32(ddata->palmas_matches[idx].of_node,
-					   "ti,mode-sleep", &prop);
+		ret = of_property_read_u32(np, "ti,mode-sleep", &prop);
 		if (!ret)
-			pdata->reg_init[idx]->mode_sleep = prop;
+			rinit->mode_sleep = prop;
 
-		ret = of_property_read_bool(ddata->palmas_matches[idx].of_node,
-					    "ti,smps-range");
+		ret = of_property_read_bool(np, "ti,smps-range");
 		if (ret)
-			pdata->reg_init[idx]->vsel =
-				PALMAS_SMPS12_VOLTAGE_RANGE;
+			rinit->vsel = PALMAS_SMPS12_VOLTAGE_RANGE;
 
 		if (idx == PALMAS_REG_LDO8)
 			pdata->enable_ldo8_tracking = of_property_read_bool(
-						ddata->palmas_matches[idx].of_node,
-						"ti,enable-ldo8-tracking");
+						np, "ti,enable-ldo8-tracking");
 	}
 
 	pdata->ldo6_vibrator = of_property_read_bool(node, "ti,ldo6-vibrator");
+
+	return 0;
 }
 
 static const struct of_device_id of_palmas_match_tbl[] = {
@@ -1628,7 +1635,9 @@
 	platform_set_drvdata(pdev, pmic);
 	pmic->palmas->pmic_ddata = driver_data;
 
-	palmas_dt_to_pdata(&pdev->dev, node, pdata, driver_data);
+	ret = palmas_dt_to_pdata(&pdev->dev, node, pdata, driver_data);
+	if (ret)
+		return ret;
 
 	ret = palmas_smps_read(palmas, PALMAS_SMPS_CTRL, &reg);
 	if (ret)

diff --git a/drivers/regulator/pv88080-regulator.c b/drivers/regulator/pv88080-regulator.c
new file mode 100644
index 0000000..d710756
--- /dev/null
+++ b/drivers/regulator/pv88080-regulator.c

@@ -0,0 +1,419 @@
+/*
+ * pv88080-regulator.c - Regulator device driver for PV88080
+ * Copyright (C) 2016  Powerventure Semiconductor Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include "pv88080-regulator.h"
+
+#define PV88080_MAX_REGULATORS	3
+
+/* PV88080 REGULATOR IDs */
+enum {
+	/* BUCKs */
+	PV88080_ID_BUCK1,
+	PV88080_ID_BUCK2,
+	PV88080_ID_BUCK3,
+};
+
+struct pv88080_regulator {
+	struct regulator_desc desc;
+	/* Current limiting */
+	unsigned int n_current_limits;
+	const int *current_limits;
+	unsigned int limit_mask;
+	unsigned int conf;
+	unsigned int conf2;
+	unsigned int conf5;
+};
+
+struct pv88080 {
+	struct device *dev;
+	struct regmap *regmap;
+	struct regulator_dev *rdev[PV88080_MAX_REGULATORS];
+};
+
+struct pv88080_buck_voltage {
+	int min_uV;
+	int max_uV;
+	int uV_step;
+};
+
+static const struct regmap_config pv88080_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+/* Current limits array (in uA) for BUCK1, BUCK2, BUCK3.
+ * Entry indexes corresponds to register values.
+ */
+
+static const int pv88080_buck1_limits[] = {
+	3230000, 5130000, 6960000, 8790000
+};
+
+static const int pv88080_buck23_limits[] = {
+	1496000, 2393000, 3291000, 4189000
+};
+
+static const struct pv88080_buck_voltage pv88080_buck_vol[2] = {
+	{
+		.min_uV = 600000,
+		.max_uV = 1393750,
+		.uV_step = 6250,
+	},
+	{
+		.min_uV = 1400000,
+		.max_uV = 2193750,
+		.uV_step = 6250,
+	},
+};
+
+static unsigned int pv88080_buck_get_mode(struct regulator_dev *rdev)
+{
+	struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret, mode = 0;
+
+	ret = regmap_read(rdev->regmap, info->conf, &data);
+	if (ret < 0)
+		return ret;
+
+	switch (data & PV88080_BUCK1_MODE_MASK) {
+	case PV88080_BUCK_MODE_SYNC:
+		mode = REGULATOR_MODE_FAST;
+		break;
+	case PV88080_BUCK_MODE_AUTO:
+		mode = REGULATOR_MODE_NORMAL;
+		break;
+	case PV88080_BUCK_MODE_SLEEP:
+		mode = REGULATOR_MODE_STANDBY;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return mode;
+}
+
+static int pv88080_buck_set_mode(struct regulator_dev *rdev,
+					unsigned int mode)
+{
+	struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+	int val = 0;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = PV88080_BUCK_MODE_SYNC;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = PV88080_BUCK_MODE_AUTO;
+		break;
+	case REGULATOR_MODE_STANDBY:
+		val = PV88080_BUCK_MODE_SLEEP;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(rdev->regmap, info->conf,
+					PV88080_BUCK1_MODE_MASK, val);
+}
+
+static int pv88080_set_current_limit(struct regulator_dev *rdev, int min,
+				    int max)
+{
+	struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+	int i;
+
+	/* search for closest to maximum */
+	for (i = info->n_current_limits; i >= 0; i--) {
+		if (min <= info->current_limits[i]
+			&& max >= info->current_limits[i]) {
+				return regmap_update_bits(rdev->regmap,
+					info->conf,
+					info->limit_mask,
+					i << PV88080_BUCK1_ILIM_SHIFT);
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int pv88080_get_current_limit(struct regulator_dev *rdev)
+{
+	struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+	unsigned int data;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, info->conf, &data);
+	if (ret < 0)
+		return ret;
+
+	data = (data & info->limit_mask) >> PV88080_BUCK1_ILIM_SHIFT;
+	return info->current_limits[data];
+}
+
+static struct regulator_ops pv88080_buck_ops = {
+	.get_mode = pv88080_buck_get_mode,
+	.set_mode = pv88080_buck_set_mode,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+	.set_current_limit = pv88080_set_current_limit,
+	.get_current_limit = pv88080_get_current_limit,
+};
+
+#define PV88080_BUCK(chip, regl_name, min, step, max, limits_array) \
+{\
+	.desc	=	{\
+		.id = chip##_ID_##regl_name,\
+		.name = __stringify(chip##_##regl_name),\
+		.of_match = of_match_ptr(#regl_name),\
+		.regulators_node = of_match_ptr("regulators"),\
+		.type = REGULATOR_VOLTAGE,\
+		.owner = THIS_MODULE,\
+		.ops = &pv88080_buck_ops,\
+		.min_uV = min, \
+		.uV_step = step, \
+		.n_voltages = ((max) - (min))/(step) + 1, \
+		.enable_reg = PV88080_REG_##regl_name##_CONF0, \
+		.enable_mask = PV88080_##regl_name##_EN, \
+		.vsel_reg = PV88080_REG_##regl_name##_CONF0, \
+		.vsel_mask = PV88080_V##regl_name##_MASK, \
+	},\
+	.current_limits = limits_array, \
+	.n_current_limits = ARRAY_SIZE(limits_array), \
+	.limit_mask = PV88080_##regl_name##_ILIM_MASK, \
+	.conf = PV88080_REG_##regl_name##_CONF1, \
+	.conf2 = PV88080_REG_##regl_name##_CONF2, \
+	.conf5 = PV88080_REG_##regl_name##_CONF5, \
+}
+
+static struct pv88080_regulator pv88080_regulator_info[] = {
+	PV88080_BUCK(PV88080, BUCK1, 600000, 6250, 1393750,
+		pv88080_buck1_limits),
+	PV88080_BUCK(PV88080, BUCK2, 600000, 6250, 1393750,
+		pv88080_buck23_limits),
+	PV88080_BUCK(PV88080, BUCK3, 600000, 6250, 1393750,
+		pv88080_buck23_limits),
+};
+
+static irqreturn_t pv88080_irq_handler(int irq, void *data)
+{
+	struct pv88080 *chip = data;
+	int i, reg_val, err, ret = IRQ_NONE;
+
+	err = regmap_read(chip->regmap, PV88080_REG_EVENT_A, &reg_val);
+	if (err < 0)
+		goto error_i2c;
+
+	if (reg_val & PV88080_E_VDD_FLT) {
+		for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+			if (chip->rdev[i] != NULL) {
+				regulator_notifier_call_chain(chip->rdev[i],
+					REGULATOR_EVENT_UNDER_VOLTAGE,
+					NULL);
+			}
+		}
+
+		err = regmap_write(chip->regmap, PV88080_REG_EVENT_A,
+			PV88080_E_VDD_FLT);
+		if (err < 0)
+			goto error_i2c;
+
+		ret = IRQ_HANDLED;
+	}
+
+	if (reg_val & PV88080_E_OVER_TEMP) {
+		for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+			if (chip->rdev[i] != NULL) {
+				regulator_notifier_call_chain(chip->rdev[i],
+					REGULATOR_EVENT_OVER_TEMP,
+					NULL);
+			}
+		}
+
+		err = regmap_write(chip->regmap, PV88080_REG_EVENT_A,
+			PV88080_E_OVER_TEMP);
+		if (err < 0)
+			goto error_i2c;
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+
+error_i2c:
+	dev_err(chip->dev, "I2C error : %d\n", err);
+	return IRQ_NONE;
+}
+
+/*
+ * I2C driver interface functions
+ */
+static int pv88080_i2c_probe(struct i2c_client *i2c,
+		const struct i2c_device_id *id)
+{
+	struct regulator_init_data *init_data = dev_get_platdata(&i2c->dev);
+	struct pv88080 *chip;
+	struct regulator_config config = { };
+	int i, error, ret;
+	unsigned int conf2, conf5;
+
+	chip = devm_kzalloc(&i2c->dev, sizeof(struct pv88080), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->dev = &i2c->dev;
+	chip->regmap = devm_regmap_init_i2c(i2c, &pv88080_regmap_config);
+	if (IS_ERR(chip->regmap)) {
+		error = PTR_ERR(chip->regmap);
+		dev_err(chip->dev, "Failed to allocate register map: %d\n",
+			error);
+		return error;
+	}
+
+	i2c_set_clientdata(i2c, chip);
+
+	if (i2c->irq != 0) {
+		ret = regmap_write(chip->regmap, PV88080_REG_MASK_A, 0xFF);
+		if (ret < 0) {
+			dev_err(chip->dev,
+				"Failed to mask A reg: %d\n", ret);
+			return ret;
+		}
+		ret = regmap_write(chip->regmap, PV88080_REG_MASK_B, 0xFF);
+		if (ret < 0) {
+			dev_err(chip->dev,
+				"Failed to mask B reg: %d\n", ret);
+			return ret;
+		}
+		ret = regmap_write(chip->regmap, PV88080_REG_MASK_C, 0xFF);
+		if (ret < 0) {
+			dev_err(chip->dev,
+				"Failed to mask C reg: %d\n", ret);
+			return ret;
+		}
+
+		ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+					pv88080_irq_handler,
+					IRQF_TRIGGER_LOW|IRQF_ONESHOT,
+					"pv88080", chip);
+		if (ret != 0) {
+			dev_err(chip->dev, "Failed to request IRQ: %d\n",
+				i2c->irq);
+			return ret;
+		}
+
+		ret = regmap_update_bits(chip->regmap, PV88080_REG_MASK_A,
+			PV88080_M_VDD_FLT | PV88080_M_OVER_TEMP, 0);
+		if (ret < 0) {
+			dev_err(chip->dev,
+				"Failed to update mask reg: %d\n", ret);
+			return ret;
+		}
+
+	} else {
+		dev_warn(chip->dev, "No IRQ configured\n");
+	}
+
+	config.dev = chip->dev;
+	config.regmap = chip->regmap;
+
+	for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+		if (init_data)
+			config.init_data = &init_data[i];
+
+		ret = regmap_read(chip->regmap,
+			pv88080_regulator_info[i].conf2, &conf2);
+		if (ret < 0)
+			return ret;
+
+		conf2 = ((conf2 >> PV88080_BUCK_VDAC_RANGE_SHIFT) &
+			PV88080_BUCK_VDAC_RANGE_MASK);
+
+		ret = regmap_read(chip->regmap,
+			pv88080_regulator_info[i].conf5, &conf5);
+		if (ret < 0)
+			return ret;
+
+		conf5 = ((conf5 >> PV88080_BUCK_VRANGE_GAIN_SHIFT) &
+			PV88080_BUCK_VRANGE_GAIN_MASK);
+
+		pv88080_regulator_info[i].desc.min_uV =
+			pv88080_buck_vol[conf2].min_uV * (conf5+1);
+		pv88080_regulator_info[i].desc.uV_step =
+			pv88080_buck_vol[conf2].uV_step * (conf5+1);
+		pv88080_regulator_info[i].desc.n_voltages =
+			((pv88080_buck_vol[conf2].max_uV * (conf5+1))
+			- (pv88080_regulator_info[i].desc.min_uV))
+			/(pv88080_regulator_info[i].desc.uV_step) + 1;
+
+		config.driver_data = (void *)&pv88080_regulator_info[i];
+		chip->rdev[i] = devm_regulator_register(chip->dev,
+			&pv88080_regulator_info[i].desc, &config);
+		if (IS_ERR(chip->rdev[i])) {
+			dev_err(chip->dev,
+				"Failed to register PV88080 regulator\n");
+			return PTR_ERR(chip->rdev[i]);
+		}
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id pv88080_i2c_id[] = {
+	{"pv88080", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, pv88080_i2c_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pv88080_dt_ids[] = {
+	{ .compatible = "pvs,pv88080", .data = &pv88080_i2c_id[0] },
+	{},
+};
+MODULE_DEVICE_TABLE(of, pv88080_dt_ids);
+#endif
+
+static struct i2c_driver pv88080_regulator_driver = {
+	.driver = {
+		.name = "pv88080",
+		.of_match_table = of_match_ptr(pv88080_dt_ids),
+	},
+	.probe = pv88080_i2c_probe,
+	.id_table = pv88080_i2c_id,
+};
+
+module_i2c_driver(pv88080_regulator_driver);
+
+MODULE_AUTHOR("James Ban <James.Ban.opensource@diasemi.com>");
+MODULE_DESCRIPTION("Regulator device driver for Powerventure PV88080");
+MODULE_LICENSE("GPL");

diff --git a/drivers/regulator/pv88080-regulator.h b/drivers/regulator/pv88080-regulator.h
new file mode 100644
index 0000000..5e9afde
--- /dev/null
+++ b/drivers/regulator/pv88080-regulator.h

@@ -0,0 +1,92 @@
+/*
+ * pv88080-regulator.h - Regulator definitions for PV88080
+ * Copyright (C) 2016 Powerventure Semiconductor Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PV88080_REGISTERS_H__
+#define __PV88080_REGISTERS_H__
+
+/* System Control and Event Registers */
+#define	PV88080_REG_EVENT_A			0x04
+#define	PV88080_REG_MASK_A			0x09
+#define	PV88080_REG_MASK_B			0x0a
+#define	PV88080_REG_MASK_C			0x0b
+
+/* Regulator Registers */
+#define	PV88080_REG_BUCK1_CONF0			0x27
+#define	PV88080_REG_BUCK1_CONF1			0x28
+#define	PV88080_REG_BUCK1_CONF2			0x59
+#define	PV88080_REG_BUCK1_CONF5			0x5c
+#define	PV88080_REG_BUCK2_CONF0			0x29
+#define	PV88080_REG_BUCK2_CONF1			0x2a
+#define	PV88080_REG_BUCK2_CONF2			0x61
+#define	PV88080_REG_BUCK2_CONF5			0x64
+#define	PV88080_REG_BUCK3_CONF0			0x2b
+#define	PV88080_REG_BUCK3_CONF1			0x2c
+#define	PV88080_REG_BUCK3_CONF2			0x69
+#define	PV88080_REG_BUCK3_CONF5			0x6c
+
+/* PV88080_REG_EVENT_A (addr=0x04) */
+#define	PV88080_E_VDD_FLT				0x01
+#define	PV88080_E_OVER_TEMP			0x02
+
+/* PV88080_REG_MASK_A (addr=0x09) */
+#define	PV88080_M_VDD_FLT				0x01
+#define	PV88080_M_OVER_TEMP			0x02
+
+/* PV88080_REG_BUCK1_CONF0 (addr=0x27) */
+#define	PV88080_BUCK1_EN				0x80
+#define PV88080_VBUCK1_MASK			0x7F
+/* PV88080_REG_BUCK2_CONF0 (addr=0x29) */
+#define	PV88080_BUCK2_EN				0x80
+#define PV88080_VBUCK2_MASK			0x7F
+/* PV88080_REG_BUCK3_CONF0 (addr=0x2b) */
+#define	PV88080_BUCK3_EN				0x80
+#define PV88080_VBUCK3_MASK			0x7F
+
+/* PV88080_REG_BUCK1_CONF1 (addr=0x28) */
+#define PV88080_BUCK1_ILIM_SHIFT			2
+#define PV88080_BUCK1_ILIM_MASK			0x0C
+#define PV88080_BUCK1_MODE_MASK			0x03
+
+/* PV88080_REG_BUCK2_CONF1 (addr=0x2a) */
+#define PV88080_BUCK2_ILIM_SHIFT			2
+#define PV88080_BUCK2_ILIM_MASK			0x0C
+#define PV88080_BUCK2_MODE_MASK			0x03
+
+/* PV88080_REG_BUCK3_CONF1 (addr=0x2c) */
+#define PV88080_BUCK3_ILIM_SHIFT			2
+#define PV88080_BUCK3_ILIM_MASK			0x0C
+#define PV88080_BUCK3_MODE_MASK			0x03
+
+#define	PV88080_BUCK_MODE_SLEEP			0x00
+#define	PV88080_BUCK_MODE_AUTO			0x01
+#define	PV88080_BUCK_MODE_SYNC			0x02
+
+/* PV88080_REG_BUCK2_CONF2 (addr=0x61) */
+/* PV88080_REG_BUCK3_CONF2 (addr=0x69) */
+#define PV88080_BUCK_VDAC_RANGE_SHIFT			7
+#define PV88080_BUCK_VDAC_RANGE_MASK			0x01
+
+#define PV88080_BUCK_VDAC_RANGE_1			0x00
+#define PV88080_BUCK_VDAC_RANGE_2			0x01
+
+/* PV88080_REG_BUCK2_CONF5 (addr=0x64) */
+/* PV88080_REG_BUCK3_CONF5 (addr=0x6c) */
+#define PV88080_BUCK_VRANGE_GAIN_SHIFT			0
+#define PV88080_BUCK_VRANGE_GAIN_MASK			0x01
+
+#define PV88080_BUCK_VRANGE_GAIN_1			0x00
+#define PV88080_BUCK_VRANGE_GAIN_2			0x01
+
+#endif	/* __PV88080_REGISTERS_H__ */

diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index 4689d62..fafa348 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c

@@ -59,18 +59,18 @@
 					 unsigned selector)
 {
 	struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev);
-	unsigned int pwm_reg_period;
+	struct pwm_args pargs;
 	int dutycycle;
 	int ret;
 
-	pwm_reg_period = pwm_get_period(drvdata->pwm);
+	pwm_get_args(drvdata->pwm, &pargs);
 
-	dutycycle = (pwm_reg_period *
+	dutycycle = (pargs.period *
 		    drvdata->duty_cycle_table[selector].dutycycle) / 100;
 
-	ret = pwm_config(drvdata->pwm, dutycycle, pwm_reg_period);
+	ret = pwm_config(drvdata->pwm, dutycycle, pargs.period);
 	if (ret) {
-		dev_err(&rdev->dev, "Failed to configure PWM\n");
+		dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret);
 		return ret;
 	}
 
@@ -113,18 +113,6 @@
 	return pwm_is_enabled(drvdata->pwm);
 }
 
-/**
- * Continuous voltage call-backs
- */
-static int pwm_voltage_to_duty_cycle_percentage(struct regulator_dev *rdev, int req_uV)
-{
-	int min_uV = rdev->constraints->min_uV;
-	int max_uV = rdev->constraints->max_uV;
-	int diff = max_uV - min_uV;
-
-	return ((req_uV * 100) - (min_uV * 100)) / diff;
-}
-
 static int pwm_regulator_get_voltage(struct regulator_dev *rdev)
 {
 	struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev);
@@ -138,21 +126,42 @@
 {
 	struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev);
 	unsigned int ramp_delay = rdev->constraints->ramp_delay;
-	unsigned int period = pwm_get_period(drvdata->pwm);
-	int duty_cycle;
+	struct pwm_args pargs;
+	unsigned int req_diff = min_uV - rdev->constraints->min_uV;
+	unsigned int diff;
+	unsigned int duty_pulse;
+	u64 req_period;
+	u32 rem;
 	int ret;
 
-	duty_cycle = pwm_voltage_to_duty_cycle_percentage(rdev, min_uV);
+	pwm_get_args(drvdata->pwm, &pargs);
+	diff = rdev->constraints->max_uV - rdev->constraints->min_uV;
 
-	ret = pwm_config(drvdata->pwm, (period / 100) * duty_cycle, period);
+	/* First try to find out if we get the iduty cycle time which is
+	 * factor of PWM period time. If (request_diff_to_min * pwm_period)
+	 * is perfect divided by voltage_range_diff then it is possible to
+	 * get duty cycle time which is factor of PWM period. This will help
+	 * to get output voltage nearer to requested value as there is no
+	 * calculation loss.
+	 */
+	req_period = req_diff * pargs.period;
+	div_u64_rem(req_period, diff, &rem);
+	if (!rem) {
+		do_div(req_period, diff);
+		duty_pulse = (unsigned int)req_period;
+	} else {
+		duty_pulse = (pargs.period / 100) * ((req_diff * 100) / diff);
+	}
+
+	ret = pwm_config(drvdata->pwm, duty_pulse, pargs.period);
 	if (ret) {
-		dev_err(&rdev->dev, "Failed to configure PWM\n");
+		dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret);
 		return ret;
 	}
 
 	ret = pwm_enable(drvdata->pwm);
 	if (ret) {
-		dev_err(&rdev->dev, "Failed to enable PWM\n");
+		dev_err(&rdev->dev, "Failed to enable PWM: %d\n", ret);
 		return ret;
 	}
 	drvdata->volt_uV = min_uV;
@@ -200,8 +209,7 @@
 
 	if ((length < sizeof(*duty_cycle_table)) ||
 	    (length % sizeof(*duty_cycle_table))) {
-		dev_err(&pdev->dev,
-			"voltage-table length(%d) is invalid\n",
+		dev_err(&pdev->dev, "voltage-table length(%d) is invalid\n",
 			length);
 		return -EINVAL;
 	}
@@ -214,7 +222,7 @@
 					 (u32 *)duty_cycle_table,
 					 length / sizeof(u32));
 	if (ret) {
-		dev_err(&pdev->dev, "Failed to read voltage-table\n");
+		dev_err(&pdev->dev, "Failed to read voltage-table: %d\n", ret);
 		return ret;
 	}
 
@@ -277,16 +285,24 @@
 
 	drvdata->pwm = devm_pwm_get(&pdev->dev, NULL);
 	if (IS_ERR(drvdata->pwm)) {
-		dev_err(&pdev->dev, "Failed to get PWM\n");
-		return PTR_ERR(drvdata->pwm);
+		ret = PTR_ERR(drvdata->pwm);
+		dev_err(&pdev->dev, "Failed to get PWM: %d\n", ret);
+		return ret;
 	}
 
+	/*
+	 * FIXME: pwm_apply_args() should be removed when switching to the
+	 * atomic PWM API.
+	 */
+	pwm_apply_args(drvdata->pwm);
+
 	regulator = devm_regulator_register(&pdev->dev,
 					    &drvdata->desc, &config);
 	if (IS_ERR(regulator)) {
-		dev_err(&pdev->dev, "Failed to register regulator %s\n",
-			drvdata->desc.name);
-		return PTR_ERR(regulator);
+		ret = PTR_ERR(regulator);
+		dev_err(&pdev->dev, "Failed to register regulator %s: %d\n",
+			drvdata->desc.name, ret);
+		return ret;
 	}
 
 	return 0;

diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index 88a5dc8..84cce21 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c

@@ -246,6 +246,7 @@
 
 /* Minimum voltage stepper delay for each step. */
 #define SPMI_FTSMPS_STEP_DELAY		8
+#define SPMI_DEFAULT_STEP_DELAY		20
 
 /*
  * The ratio SPMI_FTSMPS_STEP_MARGIN_NUM/SPMI_FTSMPS_STEP_MARGIN_DEN is used to
@@ -254,13 +255,6 @@
 #define SPMI_FTSMPS_STEP_MARGIN_NUM	4
 #define SPMI_FTSMPS_STEP_MARGIN_DEN	5
 
-/*
- * This voltage in uV is returned by get_voltage functions when there is no way
- * to determine the current voltage level.  It is needed because the regulator
- * framework treats a 0 uV voltage as an error.
- */
-#define VOLTAGE_UNKNOWN 1
-
 /* VSET value to decide the range of ULT SMPS */
 #define ULT_SMPS_RANGE_SPLIT 0x60
 
@@ -539,12 +533,12 @@
 }
 
 static int spmi_regulator_select_voltage(struct spmi_regulator *vreg,
-		int min_uV, int max_uV, u8 *range_sel, u8 *voltage_sel,
-		unsigned *selector)
+					 int min_uV, int max_uV)
 {
 	const struct spmi_voltage_range *range;
 	int uV = min_uV;
 	int lim_min_uV, lim_max_uV, i, range_id, range_max_uV;
+	int selector, voltage_sel;
 
 	/* Check if request voltage is outside of physically settable range. */
 	lim_min_uV = vreg->set_points->range[0].set_point_min_uV;
@@ -570,14 +564,13 @@
 
 	range_id = i;
 	range = &vreg->set_points->range[range_id];
-	*range_sel = range->range_sel;
 
 	/*
 	 * Force uV to be an allowed set point by applying a ceiling function to
 	 * the uV value.
 	 */
-	*voltage_sel = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
-	uV = *voltage_sel * range->step_uV + range->min_uV;
+	voltage_sel = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
+	uV = voltage_sel * range->step_uV + range->min_uV;
 
 	if (uV > max_uV) {
 		dev_err(vreg->dev,
@@ -587,12 +580,48 @@
 		return -EINVAL;
 	}
 
-	*selector = 0;
+	selector = 0;
 	for (i = 0; i < range_id; i++)
-		*selector += vreg->set_points->range[i].n_voltages;
-	*selector += (uV - range->set_point_min_uV) / range->step_uV;
+		selector += vreg->set_points->range[i].n_voltages;
+	selector += (uV - range->set_point_min_uV) / range->step_uV;
 
-	return 0;
+	return selector;
+}
+
+static int spmi_sw_selector_to_hw(struct spmi_regulator *vreg,
+				  unsigned selector, u8 *range_sel,
+				  u8 *voltage_sel)
+{
+	const struct spmi_voltage_range *range, *end;
+
+	range = vreg->set_points->range;
+	end = range + vreg->set_points->count;
+
+	for (; range < end; range++) {
+		if (selector < range->n_voltages) {
+			*voltage_sel = selector;
+			*range_sel = range->range_sel;
+			return 0;
+		}
+
+		selector -= range->n_voltages;
+	}
+
+	return -EINVAL;
+}
+
+static int spmi_hw_selector_to_sw(struct spmi_regulator *vreg, u8 hw_sel,
+				  const struct spmi_voltage_range *range)
+{
+	int sw_sel = hw_sel;
+	const struct spmi_voltage_range *r = vreg->set_points->range;
+
+	while (r != range) {
+		sw_sel += r->n_voltages;
+		r++;
+	}
+
+	return sw_sel;
 }
 
 static const struct spmi_voltage_range *
@@ -614,12 +643,11 @@
 }
 
 static int spmi_regulator_select_voltage_same_range(struct spmi_regulator *vreg,
-		int min_uV, int max_uV, u8 *range_sel, u8 *voltage_sel,
-		unsigned *selector)
+		int min_uV, int max_uV)
 {
 	const struct spmi_voltage_range *range;
 	int uV = min_uV;
-	int i;
+	int i, selector;
 
 	range = spmi_regulator_find_range(vreg);
 	if (!range)
@@ -637,8 +665,8 @@
 	 * Force uV to be an allowed set point by applying a ceiling function to
 	 * the uV value.
 	 */
-	*voltage_sel = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
-	uV = *voltage_sel * range->step_uV + range->min_uV;
+	uV = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
+	uV = uV * range->step_uV + range->min_uV;
 
 	if (uV > max_uV) {
 		/*
@@ -648,43 +676,49 @@
 		goto different_range;
 	}
 
-	*selector = 0;
+	selector = 0;
 	for (i = 0; i < vreg->set_points->count; i++) {
 		if (uV >= vreg->set_points->range[i].set_point_min_uV
 		    && uV <= vreg->set_points->range[i].set_point_max_uV) {
-			*selector +=
+			selector +=
 			    (uV - vreg->set_points->range[i].set_point_min_uV)
 				/ vreg->set_points->range[i].step_uV;
 			break;
 		}
 
-		*selector += vreg->set_points->range[i].n_voltages;
+		selector += vreg->set_points->range[i].n_voltages;
 	}
 
-	if (*selector >= vreg->set_points->n_voltages)
+	if (selector >= vreg->set_points->n_voltages)
 		goto different_range;
 
-	return 0;
+	return selector;
 
 different_range:
-	return spmi_regulator_select_voltage(vreg, min_uV, max_uV,
-			range_sel, voltage_sel, selector);
+	return spmi_regulator_select_voltage(vreg, min_uV, max_uV);
 }
 
-static int spmi_regulator_common_set_voltage(struct regulator_dev *rdev,
-		int min_uV, int max_uV, unsigned *selector)
+static int spmi_regulator_common_map_voltage(struct regulator_dev *rdev,
+					     int min_uV, int max_uV)
+{
+	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+
+	/*
+	 * Favor staying in the current voltage range if possible.  This avoids
+	 * voltage spikes that occur when changing the voltage range.
+	 */
+	return spmi_regulator_select_voltage_same_range(vreg, min_uV, max_uV);
+}
+
+static int
+spmi_regulator_common_set_voltage(struct regulator_dev *rdev, unsigned selector)
 {
 	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
 	int ret;
 	u8 buf[2];
 	u8 range_sel, voltage_sel;
 
-	/*
-	 * Favor staying in the current voltage range if possible.  This avoids
-	 * voltage spikes that occur when changing the voltage range.
-	 */
-	ret = spmi_regulator_select_voltage_same_range(vreg, min_uV, max_uV,
-		&range_sel, &voltage_sel, selector);
+	ret = spmi_sw_selector_to_hw(vreg, selector, &range_sel, &voltage_sel);
 	if (ret)
 		return ret;
 
@@ -719,24 +753,24 @@
 
 	range = spmi_regulator_find_range(vreg);
 	if (!range)
-		return VOLTAGE_UNKNOWN;
+		return -EINVAL;
 
-	return range->step_uV * voltage_sel + range->min_uV;
+	return spmi_hw_selector_to_sw(vreg, voltage_sel, range);
+}
+
+static int spmi_regulator_single_map_voltage(struct regulator_dev *rdev,
+		int min_uV, int max_uV)
+{
+	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+
+	return spmi_regulator_select_voltage(vreg, min_uV, max_uV);
 }
 
 static int spmi_regulator_single_range_set_voltage(struct regulator_dev *rdev,
-		int min_uV, int max_uV, unsigned *selector)
+						   unsigned selector)
 {
 	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
-	int ret;
-	u8 range_sel, sel;
-
-	ret = spmi_regulator_select_voltage(vreg, min_uV, max_uV, &range_sel,
-		&sel, selector);
-	if (ret) {
-		dev_err(vreg->dev, "could not set voltage, ret=%d\n", ret);
-		return ret;
-	}
+	u8 sel = selector;
 
 	/*
 	 * Certain types of regulators do not have a range select register so
@@ -748,27 +782,24 @@
 static int spmi_regulator_single_range_get_voltage(struct regulator_dev *rdev)
 {
 	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
-	const struct spmi_voltage_range *range = vreg->set_points->range;
-	u8 voltage_sel;
+	u8 selector;
+	int ret;
 
-	spmi_vreg_read(vreg, SPMI_COMMON_REG_VOLTAGE_SET, &voltage_sel, 1);
+	ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_VOLTAGE_SET, &selector, 1);
+	if (ret)
+		return ret;
 
-	return range->step_uV * voltage_sel + range->min_uV;
+	return selector;
 }
 
 static int spmi_regulator_ult_lo_smps_set_voltage(struct regulator_dev *rdev,
-		int min_uV, int max_uV, unsigned *selector)
+						  unsigned selector)
 {
 	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
 	int ret;
 	u8 range_sel, voltage_sel;
 
-	/*
-	 * Favor staying in the current voltage range if possible. This avoids
-	 * voltage spikes that occur when changing the voltage range.
-	 */
-	ret = spmi_regulator_select_voltage_same_range(vreg, min_uV, max_uV,
-		&range_sel, &voltage_sel, selector);
+	ret = spmi_sw_selector_to_hw(vreg, selector, &range_sel, &voltage_sel);
 	if (ret)
 		return ret;
 
@@ -783,7 +814,7 @@
 		voltage_sel |= ULT_SMPS_RANGE_SPLIT;
 
 	return spmi_vreg_update_bits(vreg, SPMI_COMMON_REG_VOLTAGE_SET,
-	       voltage_sel, 0xff);
+				     voltage_sel, 0xff);
 }
 
 static int spmi_regulator_ult_lo_smps_get_voltage(struct regulator_dev *rdev)
@@ -796,12 +827,12 @@
 
 	range = spmi_regulator_find_range(vreg);
 	if (!range)
-		return VOLTAGE_UNKNOWN;
+		return -EINVAL;
 
 	if (range->range_sel == 1)
 		voltage_sel &= ~ULT_SMPS_RANGE_SPLIT;
 
-	return range->step_uV * voltage_sel + range->min_uV;
+	return spmi_hw_selector_to_sw(vreg, voltage_sel, range);
 }
 
 static int spmi_regulator_common_list_voltage(struct regulator_dev *rdev,
@@ -1007,8 +1038,10 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_common_set_voltage,
-	.get_voltage		= spmi_regulator_common_get_voltage,
+	.set_voltage_sel	= spmi_regulator_common_set_voltage,
+	.set_voltage_time_sel	= spmi_regulator_set_voltage_time_sel,
+	.get_voltage_sel	= spmi_regulator_common_get_voltage,
+	.map_voltage		= spmi_regulator_common_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_mode		= spmi_regulator_common_set_mode,
 	.get_mode		= spmi_regulator_common_get_mode,
@@ -1020,8 +1053,9 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_common_set_voltage,
-	.get_voltage		= spmi_regulator_common_get_voltage,
+	.set_voltage_sel	= spmi_regulator_common_set_voltage,
+	.get_voltage_sel	= spmi_regulator_common_get_voltage,
+	.map_voltage		= spmi_regulator_common_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_mode		= spmi_regulator_common_set_mode,
 	.get_mode		= spmi_regulator_common_get_mode,
@@ -1036,8 +1070,9 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_common_set_voltage,
-	.get_voltage		= spmi_regulator_common_get_voltage,
+	.set_voltage_sel	= spmi_regulator_common_set_voltage,
+	.get_voltage_sel	= spmi_regulator_common_get_voltage,
+	.map_voltage		= spmi_regulator_common_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_bypass		= spmi_regulator_common_set_bypass,
 	.get_bypass		= spmi_regulator_common_get_bypass,
@@ -1056,8 +1091,9 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_single_range_set_voltage,
-	.get_voltage		= spmi_regulator_single_range_get_voltage,
+	.set_voltage_sel	= spmi_regulator_single_range_set_voltage,
+	.get_voltage_sel	= spmi_regulator_single_range_get_voltage,
+	.map_voltage		= spmi_regulator_single_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_input_current_limit = spmi_regulator_set_ilim,
 };
@@ -1066,9 +1102,10 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_common_set_voltage,
+	.set_voltage_sel	= spmi_regulator_common_set_voltage,
 	.set_voltage_time_sel	= spmi_regulator_set_voltage_time_sel,
-	.get_voltage		= spmi_regulator_common_get_voltage,
+	.get_voltage_sel	= spmi_regulator_common_get_voltage,
+	.map_voltage		= spmi_regulator_common_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_mode		= spmi_regulator_common_set_mode,
 	.get_mode		= spmi_regulator_common_get_mode,
@@ -1080,8 +1117,9 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_ult_lo_smps_set_voltage,
-	.get_voltage		= spmi_regulator_ult_lo_smps_get_voltage,
+	.set_voltage_sel	= spmi_regulator_ult_lo_smps_set_voltage,
+	.set_voltage_time_sel	= spmi_regulator_set_voltage_time_sel,
+	.get_voltage_sel	= spmi_regulator_ult_lo_smps_get_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_mode		= spmi_regulator_common_set_mode,
 	.get_mode		= spmi_regulator_common_get_mode,
@@ -1093,8 +1131,10 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_single_range_set_voltage,
-	.get_voltage		= spmi_regulator_single_range_get_voltage,
+	.set_voltage_sel	= spmi_regulator_single_range_set_voltage,
+	.set_voltage_time_sel	= spmi_regulator_set_voltage_time_sel,
+	.get_voltage_sel	= spmi_regulator_single_range_get_voltage,
+	.map_voltage		= spmi_regulator_single_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_mode		= spmi_regulator_common_set_mode,
 	.get_mode		= spmi_regulator_common_get_mode,
@@ -1106,8 +1146,9 @@
 	.enable			= spmi_regulator_common_enable,
 	.disable		= spmi_regulator_common_disable,
 	.is_enabled		= spmi_regulator_common_is_enabled,
-	.set_voltage		= spmi_regulator_single_range_set_voltage,
-	.get_voltage		= spmi_regulator_single_range_get_voltage,
+	.set_voltage_sel	= spmi_regulator_single_range_set_voltage,
+	.get_voltage_sel	= spmi_regulator_single_range_get_voltage,
+	.map_voltage		= spmi_regulator_single_map_voltage,
 	.list_voltage		= spmi_regulator_common_list_voltage,
 	.set_mode		= spmi_regulator_common_set_mode,
 	.get_mode		= spmi_regulator_common_get_mode,
@@ -1201,7 +1242,7 @@
 	ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_DIG_MAJOR_REV, version,
 		ARRAY_SIZE(version));
 	if (ret) {
-		dev_err(vreg->dev, "could not read version registers\n");
+		dev_dbg(vreg->dev, "could not read version registers\n");
 		return ret;
 	}
 	dig_major_rev	= version[SPMI_COMMON_REG_DIG_MAJOR_REV
@@ -1245,11 +1286,11 @@
 	return 0;
 }
 
-static int spmi_regulator_ftsmps_init_slew_rate(struct spmi_regulator *vreg)
+static int spmi_regulator_init_slew_rate(struct spmi_regulator *vreg)
 {
 	int ret;
 	u8 reg = 0;
-	int step, delay, slew_rate;
+	int step, delay, slew_rate, step_delay;
 	const struct spmi_voltage_range *range;
 
 	ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_STEP_CTRL, &reg, 1);
@@ -1262,6 +1303,15 @@
 	if (!range)
 		return -EINVAL;
 
+	switch (vreg->logical_type) {
+	case SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS:
+		step_delay = SPMI_FTSMPS_STEP_DELAY;
+		break;
+	default:
+		step_delay = SPMI_DEFAULT_STEP_DELAY;
+		break;
+	}
+
 	step = reg & SPMI_FTSMPS_STEP_CTRL_STEP_MASK;
 	step >>= SPMI_FTSMPS_STEP_CTRL_STEP_SHIFT;
 
@@ -1270,7 +1320,7 @@
 
 	/* slew_rate has units of uV/us */
 	slew_rate = SPMI_FTSMPS_CLOCK_RATE * range->step_uV * (1 << step);
-	slew_rate /= 1000 * (SPMI_FTSMPS_STEP_DELAY << delay);
+	slew_rate /= 1000 * (step_delay << delay);
 	slew_rate *= SPMI_FTSMPS_STEP_MARGIN_NUM;
 	slew_rate /= SPMI_FTSMPS_STEP_MARGIN_DEN;
 
@@ -1411,10 +1461,16 @@
 		return ret;
 	}
 
-	if (vreg->logical_type == SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS) {
-		ret = spmi_regulator_ftsmps_init_slew_rate(vreg);
+	switch (vreg->logical_type) {
+	case SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS:
+	case SPMI_REGULATOR_LOGICAL_TYPE_ULT_LO_SMPS:
+	case SPMI_REGULATOR_LOGICAL_TYPE_ULT_HO_SMPS:
+	case SPMI_REGULATOR_LOGICAL_TYPE_SMPS:
+		ret = spmi_regulator_init_slew_rate(vreg);
 		if (ret)
 			return ret;
+	default:
+		break;
 	}
 
 	if (vreg->logical_type != SPMI_REGULATOR_LOGICAL_TYPE_VS)
@@ -1510,10 +1566,61 @@
 	{ }
 };
 
+static const struct spmi_regulator_data pm8994_regulators[] = {
+	{ "s1", 0x1400, "vdd_s1", },
+	{ "s2", 0x1700, "vdd_s2", },
+	{ "s3", 0x1a00, "vdd_s3", },
+	{ "s4", 0x1d00, "vdd_s4", },
+	{ "s5", 0x2000, "vdd_s5", },
+	{ "s6", 0x2300, "vdd_s6", },
+	{ "s7", 0x2600, "vdd_s7", },
+	{ "s8", 0x2900, "vdd_s8", },
+	{ "s9", 0x2c00, "vdd_s9", },
+	{ "s10", 0x2f00, "vdd_s10", },
+	{ "s11", 0x3200, "vdd_s11", },
+	{ "s12", 0x3500, "vdd_s12", },
+	{ "l1", 0x4000, "vdd_l1", },
+	{ "l2", 0x4100, "vdd_l2_l26_l28", },
+	{ "l3", 0x4200, "vdd_l3_l11", },
+	{ "l4", 0x4300, "vdd_l4_l27_l31", },
+	{ "l5", 0x4400, "vdd_l5_l7", },
+	{ "l6", 0x4500, "vdd_l6_l12_l32", },
+	{ "l7", 0x4600, "vdd_l5_l7", },
+	{ "l8", 0x4700, "vdd_l8_l16_l30", },
+	{ "l9", 0x4800, "vdd_l9_l10_l18_l22", },
+	{ "l10", 0x4900, "vdd_l9_l10_l18_l22", },
+	{ "l11", 0x4a00, "vdd_l3_l11", },
+	{ "l12", 0x4b00, "vdd_l6_l12_l32", },
+	{ "l13", 0x4c00, "vdd_l13_l19_l23_l24", },
+	{ "l14", 0x4d00, "vdd_l14_l15", },
+	{ "l15", 0x4e00, "vdd_l14_l15", },
+	{ "l16", 0x4f00, "vdd_l8_l16_l30", },
+	{ "l17", 0x5000, "vdd_l17_l29", },
+	{ "l18", 0x5100, "vdd_l9_l10_l18_l22", },
+	{ "l19", 0x5200, "vdd_l13_l19_l23_l24", },
+	{ "l20", 0x5300, "vdd_l20_l21", },
+	{ "l21", 0x5400, "vdd_l20_l21", },
+	{ "l22", 0x5500, "vdd_l9_l10_l18_l22", },
+	{ "l23", 0x5600, "vdd_l13_l19_l23_l24", },
+	{ "l24", 0x5700, "vdd_l13_l19_l23_l24", },
+	{ "l25", 0x5800, "vdd_l25", },
+	{ "l26", 0x5900, "vdd_l2_l26_l28", },
+	{ "l27", 0x5a00, "vdd_l4_l27_l31", },
+	{ "l28", 0x5b00, "vdd_l2_l26_l28", },
+	{ "l29", 0x5c00, "vdd_l17_l29", },
+	{ "l30", 0x5d00, "vdd_l8_l16_l30", },
+	{ "l31", 0x5e00, "vdd_l4_l27_l31", },
+	{ "l32", 0x5f00, "vdd_l6_l12_l32", },
+	{ "lvs1", 0x8000, "vdd_lvs_1_2", },
+	{ "lvs2", 0x8100, "vdd_lvs_1_2", },
+	{ }
+};
+
 static const struct of_device_id qcom_spmi_regulator_match[] = {
 	{ .compatible = "qcom,pm8841-regulators", .data = &pm8841_regulators },
 	{ .compatible = "qcom,pm8916-regulators", .data = &pm8916_regulators },
 	{ .compatible = "qcom,pm8941-regulators", .data = &pm8941_regulators },
+	{ .compatible = "qcom,pm8994-regulators", .data = &pm8994_regulators },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, qcom_spmi_regulator_match);
@@ -1573,7 +1680,7 @@
 
 		ret = spmi_regulator_match(vreg, reg->force_type);
 		if (ret)
-			goto err;
+			continue;
 
 		config.dev = dev;
 		config.driver_data = vreg;

diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index d86a3dc..40d07ba0 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c

@@ -55,6 +55,42 @@
 /* max steps for increase voltage of Buck1/2, equal 100mv*/
 #define MAX_STEPS_ONE_TIME 8
 
+#define RK8XX_DESC(_id, _match, _supply, _min, _max, _step, _vreg,	\
+	_vmask, _ereg, _emask, _etime)					\
+	[_id] = {							\
+		.name		= (_match),				\
+		.supply_name	= (_supply),				\
+		.of_match	= of_match_ptr(_match),			\
+		.regulators_node = of_match_ptr("regulators"),		\
+		.type		= REGULATOR_VOLTAGE,			\
+		.id		= (_id),				\
+		.n_voltages	= (((_max) - (_min)) / (_step) + 1),	\
+		.owner		= THIS_MODULE,				\
+		.min_uV		= (_min) * 1000,			\
+		.uV_step	= (_step) * 1000,			\
+		.vsel_reg	= (_vreg),				\
+		.vsel_mask	= (_vmask),				\
+		.enable_reg	= (_ereg),				\
+		.enable_mask	= (_emask),				\
+		.enable_time	= (_etime),				\
+		.ops		= &rk808_reg_ops,			\
+	}
+
+#define RK8XX_DESC_SWITCH(_id, _match, _supply, _ereg, _emask)		\
+	[_id] = {							\
+		.name		= (_match),				\
+		.supply_name	= (_supply),				\
+		.of_match	= of_match_ptr(_match),			\
+		.regulators_node = of_match_ptr("regulators"),		\
+		.type		= REGULATOR_VOLTAGE,			\
+		.id		= (_id),				\
+		.enable_reg	= (_ereg),				\
+		.enable_mask	= (_emask),				\
+		.owner		= THIS_MODULE,				\
+		.ops		= &rk808_switch_ops			\
+	}
+
+
 struct rk808_regulator_data {
 	struct gpio_desc *dvs_gpio[2];
 };
@@ -66,27 +102,11 @@
 	RK808_BUCK4_CONFIG_REG,
 };
 
-static const struct regulator_linear_range rk808_buck_voltage_ranges[] = {
-	REGULATOR_LINEAR_RANGE(712500, 0, 63, 12500),
-};
-
-static const struct regulator_linear_range rk808_buck4_voltage_ranges[] = {
-	REGULATOR_LINEAR_RANGE(1800000, 0, 15, 100000),
-};
-
-static const struct regulator_linear_range rk808_ldo_voltage_ranges[] = {
-	REGULATOR_LINEAR_RANGE(1800000, 0, 16, 100000),
-};
-
 static const struct regulator_linear_range rk808_ldo3_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0, 13, 100000),
 	REGULATOR_LINEAR_RANGE(2500000, 15, 15, 0),
 };
 
-static const struct regulator_linear_range rk808_ldo6_voltage_ranges[] = {
-	REGULATOR_LINEAR_RANGE(800000, 0, 17, 100000),
-};
-
 static int rk808_buck1_2_get_voltage_sel_regmap(struct regulator_dev *rdev)
 {
 	struct rk808_regulator_data *pdata = rdev_get_drvdata(rdev);
@@ -242,6 +262,21 @@
 static int rk808_set_suspend_voltage(struct regulator_dev *rdev, int uv)
 {
 	unsigned int reg;
+	int sel = regulator_map_voltage_linear(rdev, uv, uv);
+
+	if (sel < 0)
+		return -EINVAL;
+
+	reg = rdev->desc->vsel_reg + RK808_SLP_REG_OFFSET;
+
+	return regmap_update_bits(rdev->regmap, reg,
+				  rdev->desc->vsel_mask,
+				  sel);
+}
+
+static int rk808_set_suspend_voltage_range(struct regulator_dev *rdev, int uv)
+{
+	unsigned int reg;
 	int sel = regulator_map_voltage_linear_range(rdev, uv, uv);
 
 	if (sel < 0)
@@ -277,8 +312,8 @@
 }
 
 static struct regulator_ops rk808_buck1_2_ops = {
-	.list_voltage		= regulator_list_voltage_linear_range,
-	.map_voltage		= regulator_map_voltage_linear_range,
+	.list_voltage		= regulator_list_voltage_linear,
+	.map_voltage		= regulator_map_voltage_linear,
 	.get_voltage_sel	= rk808_buck1_2_get_voltage_sel_regmap,
 	.set_voltage_sel	= rk808_buck1_2_set_voltage_sel,
 	.set_voltage_time_sel	= rk808_buck1_2_set_voltage_time_sel,
@@ -292,6 +327,19 @@
 };
 
 static struct regulator_ops rk808_reg_ops = {
+	.list_voltage		= regulator_list_voltage_linear,
+	.map_voltage		= regulator_map_voltage_linear,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.is_enabled		= regulator_is_enabled_regmap,
+	.set_suspend_voltage	= rk808_set_suspend_voltage,
+	.set_suspend_enable	= rk808_set_suspend_enable,
+	.set_suspend_disable	= rk808_set_suspend_disable,
+};
+
+static struct regulator_ops rk808_reg_ops_ranges = {
 	.list_voltage		= regulator_list_voltage_linear_range,
 	.map_voltage		= regulator_map_voltage_linear_range,
 	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
@@ -299,7 +347,7 @@
 	.enable			= regulator_enable_regmap,
 	.disable		= regulator_disable_regmap,
 	.is_enabled		= regulator_is_enabled_regmap,
-	.set_suspend_voltage	= rk808_set_suspend_voltage,
+	.set_suspend_voltage	= rk808_set_suspend_voltage_range,
 	.set_suspend_enable	= rk808_set_suspend_enable,
 	.set_suspend_disable	= rk808_set_suspend_disable,
 };
@@ -316,12 +364,14 @@
 	{
 		.name = "DCDC_REG1",
 		.supply_name = "vcc1",
+		.of_match = of_match_ptr("DCDC_REG1"),
+		.regulators_node = of_match_ptr("regulators"),
 		.id = RK808_ID_DCDC1,
 		.ops = &rk808_buck1_2_ops,
 		.type = REGULATOR_VOLTAGE,
+		.min_uV = 712500,
+		.uV_step = 12500,
 		.n_voltages = 64,
-		.linear_ranges = rk808_buck_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_buck_voltage_ranges),
 		.vsel_reg = RK808_BUCK1_ON_VSEL_REG,
 		.vsel_mask = RK808_BUCK_VSEL_MASK,
 		.enable_reg = RK808_DCDC_EN_REG,
@@ -330,12 +380,14 @@
 	}, {
 		.name = "DCDC_REG2",
 		.supply_name = "vcc2",
+		.of_match = of_match_ptr("DCDC_REG2"),
+		.regulators_node = of_match_ptr("regulators"),
 		.id = RK808_ID_DCDC2,
 		.ops = &rk808_buck1_2_ops,
 		.type = REGULATOR_VOLTAGE,
+		.min_uV = 712500,
+		.uV_step = 12500,
 		.n_voltages = 64,
-		.linear_ranges = rk808_buck_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_buck_voltage_ranges),
 		.vsel_reg = RK808_BUCK2_ON_VSEL_REG,
 		.vsel_mask = RK808_BUCK_VSEL_MASK,
 		.enable_reg = RK808_DCDC_EN_REG,
@@ -344,6 +396,8 @@
 	}, {
 		.name = "DCDC_REG3",
 		.supply_name = "vcc3",
+		.of_match = of_match_ptr("DCDC_REG3"),
+		.regulators_node = of_match_ptr("regulators"),
 		.id = RK808_ID_DCDC3,
 		.ops = &rk808_switch_ops,
 		.type = REGULATOR_VOLTAGE,
@@ -351,55 +405,23 @@
 		.enable_reg = RK808_DCDC_EN_REG,
 		.enable_mask = BIT(2),
 		.owner = THIS_MODULE,
-	}, {
-		.name = "DCDC_REG4",
-		.supply_name = "vcc4",
-		.id = RK808_ID_DCDC4,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 16,
-		.linear_ranges = rk808_buck4_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_buck4_voltage_ranges),
-		.vsel_reg = RK808_BUCK4_ON_VSEL_REG,
-		.vsel_mask = RK808_BUCK4_VSEL_MASK,
-		.enable_reg = RK808_DCDC_EN_REG,
-		.enable_mask = BIT(3),
-		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG1",
-		.supply_name = "vcc6",
-		.id = RK808_ID_LDO1,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 17,
-		.linear_ranges = rk808_ldo_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
-		.vsel_reg = RK808_LDO1_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(0),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG2",
-		.supply_name = "vcc6",
-		.id = RK808_ID_LDO2,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 17,
-		.linear_ranges = rk808_ldo_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
-		.vsel_reg = RK808_LDO2_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(1),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
+	},
+	RK8XX_DESC(RK808_ID_DCDC4, "DCDC_REG4", "vcc4", 1800, 3300, 100,
+		RK808_BUCK4_ON_VSEL_REG, RK808_BUCK4_VSEL_MASK,
+		RK808_DCDC_EN_REG, BIT(3), 0),
+	RK8XX_DESC(RK808_ID_LDO1, "LDO_REG1", "vcc6", 1800, 3400, 100,
+		RK808_LDO1_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(0), 400),
+	RK8XX_DESC(RK808_ID_LDO2, "LDO_REG2", "vcc6", 1800, 3400, 100,
+		RK808_LDO2_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(1), 400),
+	{
 		.name = "LDO_REG3",
 		.supply_name = "vcc7",
+		.of_match = of_match_ptr("LDO_REG3"),
+		.regulators_node = of_match_ptr("regulators"),
 		.id = RK808_ID_LDO3,
-		.ops = &rk808_reg_ops,
+		.ops = &rk808_reg_ops_ranges,
 		.type = REGULATOR_VOLTAGE,
 		.n_voltages = 16,
 		.linear_ranges = rk808_ldo3_voltage_ranges,
@@ -410,117 +432,26 @@
 		.enable_mask = BIT(2),
 		.enable_time = 400,
 		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG4",
-		.supply_name = "vcc9",
-		.id = RK808_ID_LDO4,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 17,
-		.linear_ranges = rk808_ldo_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
-		.vsel_reg = RK808_LDO4_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(3),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG5",
-		.supply_name = "vcc9",
-		.id = RK808_ID_LDO5,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 17,
-		.linear_ranges = rk808_ldo_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
-		.vsel_reg = RK808_LDO5_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(4),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG6",
-		.supply_name = "vcc10",
-		.id = RK808_ID_LDO6,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 18,
-		.linear_ranges = rk808_ldo6_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo6_voltage_ranges),
-		.vsel_reg = RK808_LDO6_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(5),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG7",
-		.supply_name = "vcc7",
-		.id = RK808_ID_LDO7,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 18,
-		.linear_ranges = rk808_ldo6_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo6_voltage_ranges),
-		.vsel_reg = RK808_LDO7_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(6),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
-		.name = "LDO_REG8",
-		.supply_name = "vcc11",
-		.id = RK808_ID_LDO8,
-		.ops = &rk808_reg_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = 17,
-		.linear_ranges = rk808_ldo_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
-		.vsel_reg = RK808_LDO8_ON_VSEL_REG,
-		.vsel_mask = RK808_LDO_VSEL_MASK,
-		.enable_reg = RK808_LDO_EN_REG,
-		.enable_mask = BIT(7),
-		.enable_time = 400,
-		.owner = THIS_MODULE,
-	}, {
-		.name = "SWITCH_REG1",
-		.supply_name = "vcc8",
-		.id = RK808_ID_SWITCH1,
-		.ops = &rk808_switch_ops,
-		.type = REGULATOR_VOLTAGE,
-		.enable_reg = RK808_DCDC_EN_REG,
-		.enable_mask = BIT(5),
-		.owner = THIS_MODULE,
-	}, {
-		.name = "SWITCH_REG2",
-		.supply_name = "vcc12",
-		.id = RK808_ID_SWITCH2,
-		.ops = &rk808_switch_ops,
-		.type = REGULATOR_VOLTAGE,
-		.enable_reg = RK808_DCDC_EN_REG,
-		.enable_mask = BIT(6),
-		.owner = THIS_MODULE,
 	},
-};
-
-static struct of_regulator_match rk808_reg_matches[] = {
-	[RK808_ID_DCDC1]	= { .name = "DCDC_REG1" },
-	[RK808_ID_DCDC2]	= { .name = "DCDC_REG2" },
-	[RK808_ID_DCDC3]	= { .name = "DCDC_REG3" },
-	[RK808_ID_DCDC4]	= { .name = "DCDC_REG4" },
-	[RK808_ID_LDO1]		= { .name = "LDO_REG1" },
-	[RK808_ID_LDO2]		= { .name = "LDO_REG2" },
-	[RK808_ID_LDO3]		= { .name = "LDO_REG3" },
-	[RK808_ID_LDO4]		= { .name = "LDO_REG4" },
-	[RK808_ID_LDO5]		= { .name = "LDO_REG5" },
-	[RK808_ID_LDO6]		= { .name = "LDO_REG6" },
-	[RK808_ID_LDO7]		= { .name = "LDO_REG7" },
-	[RK808_ID_LDO8]		= { .name = "LDO_REG8" },
-	[RK808_ID_SWITCH1]	= { .name = "SWITCH_REG1" },
-	[RK808_ID_SWITCH2]	= { .name = "SWITCH_REG2" },
+	RK8XX_DESC(RK808_ID_LDO4, "LDO_REG4", "vcc9", 1800, 3400, 100,
+		RK808_LDO4_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(3), 400),
+	RK8XX_DESC(RK808_ID_LDO5, "LDO_REG5", "vcc9", 1800, 3400, 100,
+		RK808_LDO5_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(4), 400),
+	RK8XX_DESC(RK808_ID_LDO6, "LDO_REG6", "vcc10", 800, 2500, 100,
+		RK808_LDO6_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(5), 400),
+	RK8XX_DESC(RK808_ID_LDO7, "LDO_REG7", "vcc7", 800, 2500, 100,
+		RK808_LDO7_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(6), 400),
+	RK8XX_DESC(RK808_ID_LDO8, "LDO_REG8", "vcc11", 1800, 3400, 100,
+		RK808_LDO8_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+		BIT(7), 400),
+	RK8XX_DESC_SWITCH(RK808_ID_SWITCH1, "SWITCH_REG1", "vcc8",
+		RK808_DCDC_EN_REG, BIT(5)),
+	RK8XX_DESC_SWITCH(RK808_ID_SWITCH2, "SWITCH_REG2", "vcc12",
+		RK808_DCDC_EN_REG, BIT(6)),
 };
 
 static int rk808_regulator_dt_parse_pdata(struct device *dev,
@@ -529,17 +460,12 @@
 				   struct rk808_regulator_data *pdata)
 {
 	struct device_node *np;
-	int tmp, ret, i;
+	int tmp, ret = 0, i;
 
 	np = of_get_child_by_name(client_dev->of_node, "regulators");
 	if (!np)
 		return -ENXIO;
 
-	ret = of_regulator_match(dev, np, rk808_reg_matches,
-				 RK808_NUM_REGULATORS);
-	if (ret < 0)
-		goto dt_parse_end;
-
 	for (i = 0; i < ARRAY_SIZE(pdata->dvs_gpio); i++) {
 		pdata->dvs_gpio[i] =
 			devm_gpiod_get_index_optional(client_dev, "dvs", i,
@@ -586,18 +512,12 @@
 
 	platform_set_drvdata(pdev, pdata);
 
+	config.dev = &client->dev;
+	config.driver_data = pdata;
+	config.regmap = rk808->regmap;
+
 	/* Instantiate the regulators */
 	for (i = 0; i < RK808_NUM_REGULATORS; i++) {
-		if (!rk808_reg_matches[i].init_data ||
-		    !rk808_reg_matches[i].of_node)
-			continue;
-
-		config.dev = &client->dev;
-		config.driver_data = pdata;
-		config.regmap = rk808->regmap;
-		config.of_node = rk808_reg_matches[i].of_node;
-		config.init_data = rk808_reg_matches[i].init_data;
-
 		rk808_rdev = devm_regulator_register(&pdev->dev,
 						     &rk808_reg[i], &config);
 		if (IS_ERR(rk808_rdev)) {

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 6dfa350..02fb6b4 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c

@@ -267,6 +267,7 @@
 	.ops		= &s2mps11_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
+	.ramp_delay	= RAMP_DELAY_12_MVUS,		\
 	.min_uV		= MIN_800_MV,			\
 	.uV_step	= step,				\
 	.n_voltages	= S2MPS11_LDO_N_VOLTAGES,	\
@@ -1237,17 +1238,7 @@
 	.id_table = s2mps11_pmic_id,
 };
 
-static int __init s2mps11_pmic_init(void)
-{
-	return platform_driver_register(&s2mps11_pmic_driver);
-}
-subsys_initcall(s2mps11_pmic_init);
-
-static void __exit s2mps11_pmic_exit(void)
-{
-	platform_driver_unregister(&s2mps11_pmic_driver);
-}
-module_exit(s2mps11_pmic_exit);
+module_platform_driver(s2mps11_pmic_driver);
 
 /* Module information */
 MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");

diff --git a/drivers/regulator/tps6524x-regulator.c b/drivers/regulator/tps6524x-regulator.c
index 9d6ea3a..67cac26 100644
--- a/drivers/regulator/tps6524x-regulator.c
+++ b/drivers/regulator/tps6524x-regulator.c

@@ -600,7 +600,7 @@
 
 	memset(hw, 0, sizeof(struct tps6524x));
 	hw->dev = dev;
-	hw->spi = spi_dev_get(spi);
+	hw->spi = spi;
 	mutex_init(&hw->lock);
 
 	for (i = 0; i < N_REGULATORS; i++, info++, init_data++) {

diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 955a6fb..faeb5ee 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c

@@ -21,7 +21,7 @@
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/i2c/twl.h>
-
+#include <linux/delay.h>
 
 /*
  * The TWL4030/TW5030/TPS659x0/TWL6030 family chips include power management, a
@@ -188,6 +188,74 @@
 	return grp && (val == TWL6030_CFG_STATE_ON);
 }
 
+#define PB_I2C_BUSY	BIT(0)
+#define PB_I2C_BWEN	BIT(1)
+
+/* Wait until buffer empty/ready to send a word on power bus. */
+static int twl4030_wait_pb_ready(void)
+{
+
+	int	ret;
+	int	timeout = 10;
+	u8	val;
+
+	do {
+		ret = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &val,
+				      TWL4030_PM_MASTER_PB_CFG);
+		if (ret < 0)
+			return ret;
+
+		if (!(val & PB_I2C_BUSY))
+			return 0;
+
+		mdelay(1);
+		timeout--;
+	} while (timeout);
+
+	return -ETIMEDOUT;
+}
+
+/* Send a word over the powerbus */
+static int twl4030_send_pb_msg(unsigned msg)
+{
+	u8	val;
+	int	ret;
+
+	/* save powerbus configuration */
+	ret = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &val,
+			      TWL4030_PM_MASTER_PB_CFG);
+	if (ret < 0)
+		return ret;
+
+	/* Enable i2c access to powerbus */
+	ret = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, val | PB_I2C_BWEN,
+			       TWL4030_PM_MASTER_PB_CFG);
+	if (ret < 0)
+		return ret;
+
+	ret = twl4030_wait_pb_ready();
+	if (ret < 0)
+		return ret;
+
+	ret = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, msg >> 8,
+			       TWL4030_PM_MASTER_PB_WORD_MSB);
+	if (ret < 0)
+		return ret;
+
+	ret = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, msg & 0xff,
+			       TWL4030_PM_MASTER_PB_WORD_LSB);
+	if (ret < 0)
+		return ret;
+
+	ret = twl4030_wait_pb_ready();
+	if (ret < 0)
+		return ret;
+
+	/* Restore powerbus configuration */
+	return twl_i2c_write_u8(TWL_MODULE_PM_MASTER, val,
+				TWL4030_PM_MASTER_PB_CFG);
+}
+
 static int twl4030reg_enable(struct regulator_dev *rdev)
 {
 	struct twlreg_info	*info = rdev_get_drvdata(rdev);
@@ -303,7 +371,6 @@
 {
 	struct twlreg_info	*info = rdev_get_drvdata(rdev);
 	unsigned		message;
-	int			status;
 
 	/* We can only set the mode through state machine commands... */
 	switch (mode) {
@@ -317,20 +384,19 @@
 		return -EINVAL;
 	}
 
-	/* Ensure the resource is associated with some group */
-	status = twlreg_grp(rdev);
-	if (status < 0)
-		return status;
-	if (!(status & (P3_GRP_4030 | P2_GRP_4030 | P1_GRP_4030)))
-		return -EACCES;
+	return twl4030_send_pb_msg(message);
+}
 
-	status = twl_i2c_write_u8(TWL_MODULE_PM_MASTER,
-			message >> 8, TWL4030_PM_MASTER_PB_WORD_MSB);
-	if (status < 0)
-		return status;
-
-	return twl_i2c_write_u8(TWL_MODULE_PM_MASTER,
-			message & 0xff, TWL4030_PM_MASTER_PB_WORD_LSB);
+static inline unsigned int twl4030reg_map_mode(unsigned int mode)
+{
+	switch (mode) {
+	case RES_STATE_ACTIVE:
+		return REGULATOR_MODE_NORMAL;
+	case RES_STATE_SLEEP:
+		return REGULATOR_MODE_STANDBY;
+	default:
+		return -EINVAL;
+	}
 }
 
 static int twl6030reg_set_mode(struct regulator_dev *rdev, unsigned mode)
@@ -835,10 +901,11 @@
 #define TWL4030_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
 			remap_conf) \
 		TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
-			remap_conf, TWL4030, twl4030fixed_ops)
+			remap_conf, TWL4030, twl4030fixed_ops, \
+			twl4030reg_map_mode)
 #define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \
 		TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \
-			0x0, TWL6030, twl6030fixed_ops)
+			0x0, TWL6030, twl6030fixed_ops, 0x0)
 
 #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) \
 static const struct twlreg_info TWL4030_INFO_##label = { \
@@ -855,6 +922,7 @@
 		.type = REGULATOR_VOLTAGE, \
 		.owner = THIS_MODULE, \
 		.enable_time = turnon_delay, \
+		.of_map_mode = twl4030reg_map_mode, \
 		}, \
 	}
 
@@ -870,6 +938,7 @@
 		.type = REGULATOR_VOLTAGE, \
 		.owner = THIS_MODULE, \
 		.enable_time = turnon_delay, \
+		.of_map_mode = twl4030reg_map_mode, \
 		}, \
 	}
 
@@ -915,7 +984,7 @@
 	}
 
 #define TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, remap_conf, \
-		family, operations) \
+		family, operations, map_mode) \
 static const struct twlreg_info TWLFIXED_INFO_##label = { \
 	.base = offset, \
 	.id = num, \
@@ -930,6 +999,7 @@
 		.owner = THIS_MODULE, \
 		.min_uV = mVolts * 1000, \
 		.enable_time = turnon_delay, \
+		.of_map_mode = map_mode, \
 		}, \
 	}
 

diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c
index 5548a31..1fcbb22 100644
--- a/drivers/soc/qcom/spm.c
+++ b/drivers/soc/qcom/spm.c

@@ -274,7 +274,7 @@
 	return per_cpu(cpu_spm_drv, cpu) ? 0 : -ENXIO;
 }
 
-static struct cpuidle_ops qcom_cpuidle_ops __initdata = {
+static const struct cpuidle_ops qcom_cpuidle_ops __initconst = {
 	.suspend = qcom_idle_enter,
 	.init = qcom_cpuidle_init,
 };

diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c
index 5fbda7b..9cf4f84 100644
--- a/drivers/staging/unisys/visorbus/visorchipset.c
+++ b/drivers/staging/unisys/visorbus/visorchipset.c

@@ -2425,7 +2425,7 @@
 {
 	unsigned int eax, ebx, ecx, edx;
 
-	if (cpu_has_hypervisor) {
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
 		/* check the ID */
 		cpuid(UNISYS_SPAR_LEAF_ID, &eax, &ebx, &ecx, &edx);
 		return  (ebx == UNISYS_SPAR_ID_EBX) &&

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 3c3dc4a..d89d60c 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig

@@ -338,31 +338,9 @@
 	  hot & critical. The critical trip point default value is set by
 	  underlying BIOS/Firmware.
 
-config INT340X_THERMAL
-	tristate "ACPI INT340X thermal drivers"
-	depends on X86 && ACPI
-	select THERMAL_GOV_USER_SPACE
-	select ACPI_THERMAL_REL
-	select ACPI_FAN
-	select INTEL_SOC_DTS_IOSF_CORE
-	select THERMAL_WRITABLE_TRIPS
-	help
-	  Newer laptops and tablets that use ACPI may have thermal sensors and
-	  other devices with thermal control capabilities outside the core
-	  CPU/SOC, for thermal safety reasons.
-	  They are exposed for the OS to use via the INT3400 ACPI device object
-	  as the master, and INT3401~INT340B ACPI device objects as the slaves.
-	  Enable this to expose the temperature information and cooling ability
-	  from these objects to userspace via the normal thermal framework.
-	  This means that a wide range of applications and GUI widgets can show
-	  the information to the user or use this information for making
-	  decisions. For example, the Intel Thermal Daemon can use this
-	  information to allow the user to select his laptop to run without
-	  turning on the fans.
-
-config ACPI_THERMAL_REL
-	tristate
-	depends on ACPI
+menu "ACPI INT340X thermal drivers"
+source drivers/thermal/int340x_thermal/Kconfig
+endmenu
 
 config INTEL_PCH_THERMAL
 	tristate "Intel PCH Thermal Reporting Driver"

diff --git a/drivers/thermal/int340x_thermal/Kconfig b/drivers/thermal/int340x_thermal/Kconfig
new file mode 100644
index 0000000..0582bd1
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/Kconfig

@@ -0,0 +1,42 @@
+#
+# ACPI INT340x thermal drivers configuration
+#
+
+config INT340X_THERMAL
+	tristate "ACPI INT340X thermal drivers"
+	depends on X86 && ACPI
+	select THERMAL_GOV_USER_SPACE
+	select ACPI_THERMAL_REL
+	select ACPI_FAN
+	select INTEL_SOC_DTS_IOSF_CORE
+	help
+	  Newer laptops and tablets that use ACPI may have thermal sensors and
+	  other devices with thermal control capabilities outside the core
+	  CPU/SOC, for thermal safety reasons.
+	  They are exposed for the OS to use via the INT3400 ACPI device object
+	  as the master, and INT3401~INT340B ACPI device objects as the slaves.
+	  Enable this to expose the temperature information and cooling ability
+	  from these objects to userspace via the normal thermal framework.
+	  This means that a wide range of applications and GUI widgets can show
+	  the information to the user or use this information for making
+	  decisions. For example, the Intel Thermal Daemon can use this
+	  information to allow the user to select his laptop to run without
+	  turning on the fans.
+
+config ACPI_THERMAL_REL
+	tristate
+	depends on ACPI
+
+if INT340X_THERMAL
+
+config INT3406_THERMAL
+	tristate "ACPI INT3406 display thermal driver"
+	depends on ACPI_VIDEO
+	help
+	  The display thermal device represents the LED/LCD display panel
+	  that may or may not include touch support. The main function of
+	  the display thermal device is to allow control of the display
+	  brightness in order to address a thermal condition or to reduce
+	  power consumed by display device.
+
+endif

diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
index ba77a34..df0df05 100644
--- a/drivers/thermal/int340x_thermal/Makefile
+++ b/drivers/thermal/int340x_thermal/Makefile

@@ -3,4 +3,5 @@
 obj-$(CONFIG_INT340X_THERMAL)	+= int3402_thermal.o
 obj-$(CONFIG_INT340X_THERMAL)	+= int3403_thermal.o
 obj-$(CONFIG_INT340X_THERMAL)	+= processor_thermal_device.o
+obj-$(CONFIG_INT3406_THERMAL)	+= int3406_thermal.o
 obj-$(CONFIG_ACPI_THERMAL_REL)	+= acpi_thermal_rel.o

diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c b/drivers/thermal/int340x_thermal/int3406_thermal.c
new file mode 100644
index 0000000..13d431c
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3406_thermal.c

@@ -0,0 +1,236 @@
+/*
+ * INT3406 thermal driver for display participant device
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/thermal.h>
+#include <acpi/video.h>
+
+#define INT3406_BRIGHTNESS_LIMITS_CHANGED	0x80
+
+struct int3406_thermal_data {
+	int upper_limit;
+	int upper_limit_index;
+	int lower_limit;
+	int lower_limit_index;
+	acpi_handle handle;
+	struct acpi_video_device_brightness *br;
+	struct backlight_device *raw_bd;
+	struct thermal_cooling_device *cooling_dev;
+};
+
+static int int3406_thermal_to_raw(int level, struct int3406_thermal_data *d)
+{
+	int max_level = d->br->levels[d->br->count - 1];
+	int raw_max = d->raw_bd->props.max_brightness;
+
+	return level * raw_max / max_level;
+}
+
+static int int3406_thermal_to_acpi(int level, struct int3406_thermal_data *d)
+{
+	int raw_max = d->raw_bd->props.max_brightness;
+	int max_level = d->br->levels[d->br->count - 1];
+
+	return level * max_level / raw_max;
+}
+
+static int
+int3406_thermal_get_max_state(struct thermal_cooling_device *cooling_dev,
+			      unsigned long *state)
+{
+	struct int3406_thermal_data *d = cooling_dev->devdata;
+	int index = d->lower_limit_index ? d->lower_limit_index : 2;
+
+	*state = d->br->count - 1 - index;
+	return 0;
+}
+
+static int
+int3406_thermal_set_cur_state(struct thermal_cooling_device *cooling_dev,
+			      unsigned long state)
+{
+	struct int3406_thermal_data *d = cooling_dev->devdata;
+	int level, raw_level;
+
+	if (state > d->br->count - 3)
+		return -EINVAL;
+
+	state = d->br->count - 1 - state;
+	level = d->br->levels[state];
+
+	if ((d->upper_limit && level > d->upper_limit) ||
+	    (d->lower_limit && level < d->lower_limit))
+		return -EINVAL;
+
+	raw_level = int3406_thermal_to_raw(level, d);
+	return backlight_device_set_brightness(d->raw_bd, raw_level);
+}
+
+static int
+int3406_thermal_get_cur_state(struct thermal_cooling_device *cooling_dev,
+			      unsigned long *state)
+{
+	struct int3406_thermal_data *d = cooling_dev->devdata;
+	int raw_level, level, i;
+	int *levels = d->br->levels;
+
+	raw_level = d->raw_bd->props.brightness;
+	level = int3406_thermal_to_acpi(raw_level, d);
+
+	/*
+	 * There is no 1:1 mapping between the firmware interface level with the
+	 * raw interface level, we will have to find one that is close enough.
+	 */
+	for (i = 2; i < d->br->count; i++) {
+		if (level < levels[i]) {
+			if (i == 2)
+				break;
+			if ((level - levels[i - 1]) < (levels[i] - level))
+				i--;
+			break;
+		}
+	}
+
+	*state = d->br->count - 1 - i;
+	return 0;
+}
+
+static const struct thermal_cooling_device_ops video_cooling_ops = {
+	.get_max_state = int3406_thermal_get_max_state,
+	.get_cur_state = int3406_thermal_get_cur_state,
+	.set_cur_state = int3406_thermal_set_cur_state,
+};
+
+static int int3406_thermal_get_index(int *array, int nr, int value)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		if (array[i] == value)
+			break;
+	}
+	return i == nr ? -ENOENT : i;
+}
+
+static void int3406_thermal_get_limit(struct int3406_thermal_data *d)
+{
+	acpi_status status;
+	unsigned long long lower_limit, upper_limit;
+	int index;
+
+	status = acpi_evaluate_integer(d->handle, "DDDL", NULL, &lower_limit);
+	if (ACPI_SUCCESS(status)) {
+		index = int3406_thermal_get_index(d->br->levels, d->br->count,
+						  lower_limit);
+		if (index > 0) {
+			d->lower_limit = (int)lower_limit;
+			d->lower_limit_index = index;
+		}
+	}
+
+	status = acpi_evaluate_integer(d->handle, "DDPC", NULL, &upper_limit);
+	if (ACPI_SUCCESS(status)) {
+		index = int3406_thermal_get_index(d->br->levels, d->br->count,
+						  upper_limit);
+		if (index > 0) {
+			d->upper_limit = (int)upper_limit;
+			d->upper_limit_index = index;
+		}
+	}
+}
+
+static void int3406_notify(acpi_handle handle, u32 event, void *data)
+{
+	if (event == INT3406_BRIGHTNESS_LIMITS_CHANGED)
+		int3406_thermal_get_limit(data);
+}
+
+static int int3406_thermal_probe(struct platform_device *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	struct int3406_thermal_data *d;
+	struct backlight_device *bd;
+	int ret;
+
+	if (!ACPI_HANDLE(&pdev->dev))
+		return -ENODEV;
+
+	d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+	d->handle = ACPI_HANDLE(&pdev->dev);
+
+	bd = backlight_device_get_by_type(BACKLIGHT_RAW);
+	if (!bd)
+		return -ENODEV;
+	d->raw_bd = bd;
+
+	ret = acpi_video_get_levels(ACPI_COMPANION(&pdev->dev), &d->br);
+	if (ret)
+		return ret;
+
+	int3406_thermal_get_limit(d);
+
+	d->cooling_dev = thermal_cooling_device_register(acpi_device_bid(adev),
+							 d, &video_cooling_ops);
+	if (IS_ERR(d->cooling_dev))
+		goto err;
+
+	ret = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
+					  int3406_notify, d);
+	if (ret)
+		goto err_cdev;
+
+	platform_set_drvdata(pdev, d);
+
+	return 0;
+
+err_cdev:
+	thermal_cooling_device_unregister(d->cooling_dev);
+err:
+	kfree(d->br);
+	return -ENODEV;
+}
+
+static int int3406_thermal_remove(struct platform_device *pdev)
+{
+	struct int3406_thermal_data *d = platform_get_drvdata(pdev);
+
+	thermal_cooling_device_unregister(d->cooling_dev);
+	kfree(d->br);
+	return 0;
+}
+
+static const struct acpi_device_id int3406_thermal_match[] = {
+	{"INT3406", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3406_thermal_match);
+
+static struct platform_driver int3406_thermal_driver = {
+	.probe = int3406_thermal_probe,
+	.remove = int3406_thermal_remove,
+	.driver = {
+		   .name = "int3406 thermal",
+		   .owner = THIS_MODULE,
+		   .acpi_match_table = int3406_thermal_match,
+		   },
+};
+
+module_platform_driver(int3406_thermal_driver);
+
+MODULE_DESCRIPTION("INT3406 Thermal driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index bddc8b1..288318a 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c

@@ -164,6 +164,30 @@
 	return sprintf(buf, "%d\n", bd->props.brightness);
 }
 
+int backlight_device_set_brightness(struct backlight_device *bd,
+				    unsigned long brightness)
+{
+	int rc = -ENXIO;
+
+	mutex_lock(&bd->ops_lock);
+	if (bd->ops) {
+		if (brightness > bd->props.max_brightness)
+			rc = -EINVAL;
+		else {
+			pr_debug("set brightness to %lu\n", brightness);
+			bd->props.brightness = brightness;
+			backlight_update_status(bd);
+			rc = 0;
+		}
+	}
+	mutex_unlock(&bd->ops_lock);
+
+	backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
+
+	return rc;
+}
+EXPORT_SYMBOL(backlight_device_set_brightness);
+
 static ssize_t brightness_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
@@ -175,24 +199,9 @@
 	if (rc)
 		return rc;
 
-	rc = -ENXIO;
+	rc = backlight_device_set_brightness(bd, brightness);
 
-	mutex_lock(&bd->ops_lock);
-	if (bd->ops) {
-		if (brightness > bd->props.max_brightness)
-			rc = -EINVAL;
-		else {
-			pr_debug("set brightness to %lu\n", brightness);
-			bd->props.brightness = brightness;
-			backlight_update_status(bd);
-			rc = count;
-		}
-	}
-	mutex_unlock(&bd->ops_lock);
-
-	backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
-	return rc;
+	return rc ? rc : count;
 }
 static DEVICE_ATTR_RW(brightness);
 
@@ -380,7 +389,7 @@
 }
 EXPORT_SYMBOL(backlight_device_register);
 
-bool backlight_device_registered(enum backlight_type type)
+struct backlight_device *backlight_device_get_by_type(enum backlight_type type)
 {
 	bool found = false;
 	struct backlight_device *bd;
@@ -394,9 +403,9 @@
 	}
 	mutex_unlock(&backlight_dev_list_mutex);
 
-	return found;
+	return found ? bd : NULL;
 }
-EXPORT_SYMBOL(backlight_device_registered);
+EXPORT_SYMBOL(backlight_device_get_by_type);
 
 /**
  * backlight_device_unregister - unregisters a backlight device object.

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 983280e..e5a391a 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig

@@ -761,7 +761,7 @@
 
 config FB_EFI
 	bool "EFI-based Framebuffer Support"
-	depends on (FB = y) && X86 && EFI
+	depends on (FB = y) && !IA64 && EFI
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT

diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index 95d293b..f4c045c 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c

@@ -6,16 +6,14 @@
  *
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/efi.h>
 #include <linux/errno.h>
 #include <linux/fb.h>
 #include <linux/platform_device.h>
 #include <linux/screen_info.h>
-#include <linux/dmi.h>
-#include <linux/pci.h>
 #include <video/vga.h>
-#include <asm/sysfb.h>
+#include <asm/efi.h>
 
 static bool request_mem_succeeded = false;
 
@@ -85,21 +83,13 @@
 static int efifb_setup(char *options)
 {
 	char *this_opt;
-	int i;
 
 	if (options && *options) {
 		while ((this_opt = strsep(&options, ",")) != NULL) {
 			if (!*this_opt) continue;
 
-			for (i = 0; i < M_UNKNOWN; i++) {
-				if (efifb_dmi_list[i].base != 0 &&
-				    !strcmp(this_opt, efifb_dmi_list[i].optname)) {
-					screen_info.lfb_base = efifb_dmi_list[i].base;
-					screen_info.lfb_linelength = efifb_dmi_list[i].stride;
-					screen_info.lfb_width = efifb_dmi_list[i].width;
-					screen_info.lfb_height = efifb_dmi_list[i].height;
-				}
-			}
+			efifb_setup_from_dmi(&screen_info, this_opt);
+
 			if (!strncmp(this_opt, "base:", 5))
 				screen_info.lfb_base = simple_strtoul(this_opt+5, NULL, 0);
 			else if (!strncmp(this_opt, "stride:", 7))
@@ -338,5 +328,4 @@
 	.remove = efifb_remove,
 };
 
-module_platform_driver(efifb_driver);
-MODULE_LICENSE("GPL");
+builtin_platform_driver(efifb_driver);

diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index be7e56a..e9d2135 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c

@@ -316,7 +316,6 @@
 	.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
 	.reset_system             = NULL, /* Functionality provided by Xen. */
 	.set_virtual_address_map  = NULL, /* Not used under Xen. */
-	.memmap                   = NULL, /* Not used under Xen. */
 	.flags			  = 0     /* Initialized later. */
 };
 

diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index d48e0d2..5f22e74 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c

@@ -157,7 +157,7 @@
 	return 0;
 }
 
-long
+static long
 efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p)
 {
 	void __user *arg = (void __user *)p;

diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 553c5d2..9cb54a3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c

@@ -216,8 +216,7 @@
 
 	INIT_LIST_HEAD(&efivarfs_list);
 
-	err = efivar_init(efivarfs_callback, (void *)sb, false,
-			  true, &efivarfs_list);
+	err = efivar_init(efivarfs_callback, (void *)sb, true, &efivarfs_list);
 	if (err)
 		__efivar_entry_iter(efivarfs_destroy, &efivarfs_list, NULL, NULL);
 

diff --git a/fs/namei.c b/fs/namei.c
index 30145f8..42f8ca0 100644
--- a/fs/namei.c
+++ b/fs/namei.c

@@ -1794,30 +1794,49 @@
 	return hash_64(hash, 32);
 }
 
+/*
+ * This is George Marsaglia's XORSHIFT generator.
+ * It implements a maximum-period LFSR in only a few
+ * instructions.  It also has the property (required
+ * by hash_name()) that mix_hash(0) = 0.
+ */
+static inline unsigned long mix_hash(unsigned long hash)
+{
+	hash ^= hash << 13;
+	hash ^= hash >> 7;
+	hash ^= hash << 17;
+	return hash;
+}
+
 #else	/* 32-bit case */
 
 #define fold_hash(x) (x)
 
+static inline unsigned long mix_hash(unsigned long hash)
+{
+	hash ^= hash << 13;
+	hash ^= hash >> 17;
+	hash ^= hash << 5;
+	return hash;
+}
+
 #endif
 
 unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 {
-	unsigned long a, mask;
-	unsigned long hash = 0;
+	unsigned long a, hash = 0;
 
 	for (;;) {
 		a = load_unaligned_zeropad(name);
 		if (len < sizeof(unsigned long))
 			break;
-		hash += a;
-		hash *= 9;
+		hash = mix_hash(hash + a);
 		name += sizeof(unsigned long);
 		len -= sizeof(unsigned long);
 		if (!len)
 			goto done;
 	}
-	mask = bytemask_from_count(len);
-	hash += mask & a;
+	hash += a & bytemask_from_count(len);
 done:
 	return fold_hash(hash);
 }
@@ -1835,7 +1854,7 @@
 	hash = a = 0;
 	len = -sizeof(unsigned long);
 	do {
-		hash = (hash + a) * 9;
+		hash = mix_hash(hash + a);
 		len += sizeof(unsigned long);
 		a = load_unaligned_zeropad(name+len);
 		b = a ^ REPEAT_BYTE('/');

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 3a93250..4d40e9b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h

@@ -87,7 +87,7 @@
 	  .package.elements = (eles)			\
 	}
 
-bool acpi_dev_present(const char *hid);
+bool acpi_dev_found(const char *hid);
 
 #ifdef CONFIG_ACPI
 

diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 29c6912..797ae2e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h

@@ -78,7 +78,6 @@
 
 /* ACPI PCI Interrupt Link (pci_link.c) */
 
-int acpi_irq_penalty_init(void);
 int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering,
 			       int *polarity, char **name);
 int acpi_pci_link_free_irq(acpi_handle handle);

diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index d1e34d1..562603d 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h

@@ -96,7 +96,7 @@
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_predefined_override
 acpi_status
 acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
-			    char **new_val);
+			    acpi_string *new_val);
 #endif
 
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_table_override
@@ -108,7 +108,7 @@
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_physical_table_override
 acpi_status
 acpi_os_physical_table_override(struct acpi_table_header *existing_table,
-				acpi_physical_address * new_address,
+				acpi_physical_address *new_address,
 				u32 *new_table_length);
 #endif
 
@@ -203,7 +203,7 @@
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_physical_address
 acpi_status
 acpi_os_get_physical_address(void *logical_address,
-			     acpi_physical_address * physical_address);
+			     acpi_physical_address *physical_address);
 #endif
 
 /*
@@ -379,14 +379,14 @@
 acpi_os_get_table_by_name(char *signature,
 			  u32 instance,
 			  struct acpi_table_header **table,
-			  acpi_physical_address * address);
+			  acpi_physical_address *address);
 #endif
 
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_table_by_index
 acpi_status
 acpi_os_get_table_by_index(u32 index,
 			   struct acpi_table_header **table,
-			   u32 *instance, acpi_physical_address * address);
+			   u32 *instance, acpi_physical_address *address);
 #endif
 
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_table_by_address

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 1755697..4e4c214 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h

@@ -46,7 +46,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20160108
+#define ACPI_CA_VERSION                 0x20160422
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
@@ -192,7 +192,7 @@
 /*
  * Optionally support group module level code.
  */
-ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, TRUE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, FALSE);
 
 /*
  * Optionally use 32-bit FADT addresses if and when there is a conflict
@@ -484,8 +484,8 @@
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_reallocate_root_table(void))
 
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
-			    acpi_find_root_pointer(acpi_physical_address *
-						   rsdp_address))
+			    acpi_find_root_pointer(acpi_physical_address
+						   *rsdp_address))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			     acpi_get_table_header(acpi_string signature,
 						   u32 instance,
@@ -530,7 +530,7 @@
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			     acpi_get_handle(acpi_handle parent,
 					     acpi_string pathname,
-					     acpi_handle * ret_handle))
+					     acpi_handle *ret_handle))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			     acpi_attach_data(acpi_handle object,
 					      acpi_object_handler handler,
@@ -575,15 +575,15 @@
 			    acpi_get_next_object(acpi_object_type type,
 						 acpi_handle parent,
 						 acpi_handle child,
-						 acpi_handle * out_handle))
+						 acpi_handle *out_handle))
 
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_get_type(acpi_handle object,
-					  acpi_object_type * out_type))
+					  acpi_object_type *out_type))
 
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_get_parent(acpi_handle object,
-					    acpi_handle * out_handle))
+					    acpi_handle *out_handle))
 
 /*
  * Handler interfaces
@@ -755,7 +755,7 @@
 
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
 				acpi_get_gpe_device(u32 gpe_index,
-						    acpi_handle * gpe_device))
+						    acpi_handle *gpe_device))
 
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
 				acpi_install_gpe_block(acpi_handle gpe_device,
@@ -771,8 +771,8 @@
  * Resource interfaces
  */
 typedef
-acpi_status(*acpi_walk_resource_callback) (struct acpi_resource * resource,
-					   void *context);
+acpi_status (*acpi_walk_resource_callback) (struct acpi_resource * resource,
+					    void *context);
 
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_get_vendor_resource(acpi_handle device,
@@ -938,7 +938,8 @@
 ACPI_APP_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(1)
 				void ACPI_INTERNAL_VAR_XFACE
 				acpi_log_error(const char *format, ...))
- acpi_status acpi_initialize_debugger(void);
+
+acpi_status acpi_initialize_debugger(void);
 
 void acpi_terminate_debugger(void);
 

diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index cf2acb8..16c1892 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h

@@ -417,6 +417,7 @@
 	u8                                      type; \
 	u8                                      producer_consumer;   /* For values, see Producer/Consumer above */\
 	u8                                      slave_mode; \
+	u8                                      connection_sharing; \
 	u8                                      type_revision_id; \
 	u16                                     type_data_length; \
 	u16                                     vendor_length; \

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 0cb1a00..c19700e 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h

@@ -223,7 +223,7 @@
 /*******************************************************************************
  *
  * FADT - Fixed ACPI Description Table (Signature "FACP")
- *        Version 4
+ *        Version 6
  *
  ******************************************************************************/
 
@@ -413,4 +413,6 @@
 #define ACPI_FADT_V5_SIZE       (u32) (ACPI_FADT_OFFSET (hypervisor_id))
 #define ACPI_FADT_V6_SIZE       (u32) (sizeof (struct acpi_table_fadt))
 
+#define ACPI_FADT_CONFORMANCE   "ACPI 6.1 (FADT version 6)"
+
 #endif				/* __ACTBL_H__ */

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 16e0136..796d6ba 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h

@@ -236,7 +236,8 @@
 	ACPI_EINJ_CHECK_BUSY_STATUS = 6,
 	ACPI_EINJ_GET_COMMAND_STATUS = 7,
 	ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
-	ACPI_EINJ_ACTION_RESERVED = 9,	/* 9 and greater are reserved */
+	ACPI_EINJ_GET_EXECUTE_TIMINGS = 9,
+	ACPI_EINJ_ACTION_RESERVED = 10,	/* 10 and greater are reserved */
 	ACPI_EINJ_TRIGGER_ERROR = 0xFF	/* Except for this value */
 };
 
@@ -348,7 +349,8 @@
 	ACPI_ERST_GET_ERROR_RANGE = 13,
 	ACPI_ERST_GET_ERROR_LENGTH = 14,
 	ACPI_ERST_GET_ERROR_ATTRIBUTES = 15,
-	ACPI_ERST_ACTION_RESERVED = 16	/* 16 and greater are reserved */
+	ACPI_ERST_EXECUTE_TIMINGS = 16,
+	ACPI_ERST_ACTION_RESERVED = 17	/* 17 and greater are reserved */
 };
 
 /* Values for Instruction field above */
@@ -427,7 +429,8 @@
 	ACPI_HEST_TYPE_AER_ENDPOINT = 7,
 	ACPI_HEST_TYPE_AER_BRIDGE = 8,
 	ACPI_HEST_TYPE_GENERIC_ERROR = 9,
-	ACPI_HEST_TYPE_RESERVED = 10	/* 10 and greater are reserved */
+	ACPI_HEST_TYPE_GENERIC_ERROR_V2 = 10,
+	ACPI_HEST_TYPE_RESERVED = 11	/* 11 and greater are reserved */
 };
 
 /*
@@ -506,7 +509,11 @@
 	ACPI_HEST_NOTIFY_NMI = 4,
 	ACPI_HEST_NOTIFY_CMCI = 5,	/* ACPI 5.0 */
 	ACPI_HEST_NOTIFY_MCE = 6,	/* ACPI 5.0 */
-	ACPI_HEST_NOTIFY_RESERVED = 7	/* 7 and greater are reserved */
+	ACPI_HEST_NOTIFY_GPIO = 7,	/* ACPI 6.0 */
+	ACPI_HEST_NOTIFY_SEA = 8,	/* ACPI 6.1 */
+	ACPI_HEST_NOTIFY_SEI = 9,	/* ACPI 6.1 */
+	ACPI_HEST_NOTIFY_GSIV = 10,	/* ACPI 6.1 */
+	ACPI_HEST_NOTIFY_RESERVED = 11	/* 11 and greater are reserved */
 };
 
 /* Values for config_write_enable bitfield above */
@@ -603,6 +610,24 @@
 	u32 error_block_length;
 };
 
+/* 10: Generic Hardware Error Source, version 2 */
+
+struct acpi_hest_generic_v2 {
+	struct acpi_hest_header header;
+	u16 related_source_id;
+	u8 reserved;
+	u8 enabled;
+	u32 records_to_preallocate;
+	u32 max_sections_per_record;
+	u32 max_raw_data_length;
+	struct acpi_generic_address error_status_address;
+	struct acpi_hest_notify notify;
+	u32 error_block_length;
+	struct acpi_generic_address read_ack_register;
+	u64 read_ack_preserve;
+	u64 read_ack_write;
+};
+
 /* Generic Error Status block */
 
 struct acpi_hest_generic_status {
@@ -634,6 +659,33 @@
 	u8 fru_text[20];
 };
 
+/* Extension for revision 0x0300 */
+
+struct acpi_hest_generic_data_v300 {
+	u8 section_type[16];
+	u32 error_severity;
+	u16 revision;
+	u8 validation_bits;
+	u8 flags;
+	u32 error_data_length;
+	u8 fru_id[16];
+	u8 fru_text[20];
+	u64 time_stamp;
+};
+
+/* Values for error_severity above */
+
+#define ACPI_HEST_GEN_ERROR_RECOVERABLE     0
+#define ACPI_HEST_GEN_ERROR_FATAL           1
+#define ACPI_HEST_GEN_ERROR_CORRECTED       2
+#define ACPI_HEST_GEN_ERROR_NONE            3
+
+/* Flags for validation_bits above */
+
+#define ACPI_HEST_GEN_VALID_FRU_ID          (1)
+#define ACPI_HEST_GEN_VALID_FRU_STRING      (1<<1)
+#define ACPI_HEST_GEN_VALID_TIMESTAMP       (1<<2)
+
 /*******************************************************************************
  *
  * MADT - Multiple APIC Description Table
@@ -934,7 +986,7 @@
 
 /*******************************************************************************
  *
- * NFIT - NVDIMM Interface Table (ACPI 6.0)
+ * NFIT - NVDIMM Interface Table (ACPI 6.0+)
  *        Version 1
  *
  ******************************************************************************/
@@ -1015,6 +1067,7 @@
 #define ACPI_NFIT_MEM_NOT_ARMED         (1<<3)	/* 03: Memory Device is not armed */
 #define ACPI_NFIT_MEM_HEALTH_OBSERVED   (1<<4)	/* 04: Memory Device observed SMART/health events */
 #define ACPI_NFIT_MEM_HEALTH_ENABLED    (1<<5)	/* 05: SMART/health events enabled */
+#define ACPI_NFIT_MEM_MAP_FAILED        (1<<6)	/* 06: Mapping to SPA failed */
 
 /* 2: Interleave Structure */
 
@@ -1046,7 +1099,10 @@
 	u16 subsystem_vendor_id;
 	u16 subsystem_device_id;
 	u16 subsystem_revision_id;
-	u8 reserved[6];		/* Reserved, must be zero */
+	u8 valid_fields;
+	u8 manufacturing_location;
+	u16 manufacturing_date;
+	u8 reserved[2];		/* Reserved, must be zero */
 	u32 serial_number;
 	u16 code;
 	u16 windows;
@@ -1061,7 +1117,11 @@
 
 /* Flags */
 
-#define ACPI_NFIT_CONTROL_BUFFERED      (1)	/* Block Data Windows implementation is buffered */
+#define ACPI_NFIT_CONTROL_BUFFERED          (1)	/* Block Data Windows implementation is buffered */
+
+/* valid_fields bits */
+
+#define ACPI_NFIT_CONTROL_MFG_INFO_VALID    (1)	/* Manufacturing fields are valid */
 
 /* 5: NVDIMM Block Data Window Region Structure */
 

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index a4ef625..c93dbad 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h

@@ -321,7 +321,7 @@
  * DBG2 - Debug Port Table 2
  *        Version 0 (Both main table and subtables)
  *
- * Conforms to "Microsoft Debug Port Table 2 (DBG2)", May 22 2012.
+ * Conforms to "Microsoft Debug Port Table 2 (DBG2)", December 10, 2015
  *
  ******************************************************************************/
 
@@ -371,6 +371,11 @@
 
 #define ACPI_DBG2_16550_COMPATIBLE  0x0000
 #define ACPI_DBG2_16550_SUBSET      0x0001
+#define ACPI_DBG2_ARM_PL011         0x0003
+#define ACPI_DBG2_ARM_SBSA_32BIT    0x000D
+#define ACPI_DBG2_ARM_SBSA_GENERIC  0x000E
+#define ACPI_DBG2_ARM_DCC           0x000F
+#define ACPI_DBG2_BCM2835           0x0010
 
 #define ACPI_DBG2_1394_STANDARD     0x0000
 
@@ -399,7 +404,7 @@
  *        Version 1
  *
  * Conforms to "Intel Virtualization Technology for Directed I/O",
- * Version 2.2, Sept. 2013
+ * Version 2.3, October 2014
  *
  ******************************************************************************/
 
@@ -413,6 +418,8 @@
 /* Masks for Flags field above */
 
 #define ACPI_DMAR_INTR_REMAP        (1)
+#define ACPI_DMAR_X2APIC_OPT_OUT    (1<<1)
+#define ACPI_DMAR_X2APIC_MODE       (1<<2)
 
 /* DMAR subtable header */
 
@@ -655,7 +662,7 @@
  * IORT - IO Remapping Table
  *
  * Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049A, 2015
+ * Document number: ARM DEN 0049B, October 2015
  *
  ******************************************************************************/
 
@@ -685,7 +692,8 @@
 	ACPI_IORT_NODE_ITS_GROUP = 0x00,
 	ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
 	ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
-	ACPI_IORT_NODE_SMMU = 0x03
+	ACPI_IORT_NODE_SMMU = 0x03,
+	ACPI_IORT_NODE_SMMU_V3 = 0x04
 };
 
 struct acpi_iort_id_mapping {
@@ -775,6 +783,23 @@
 #define ACPI_IORT_SMMU_DVM_SUPPORTED    (1)
 #define ACPI_IORT_SMMU_COHERENT_WALK    (1<<1)
 
+struct acpi_iort_smmu_v3 {
+	u64 base_address;	/* SMMUv3 base address */
+	u32 flags;
+	u32 reserved;
+	u64 vatos_address;
+	u32 model;		/* O: generic SMMUv3 */
+	u32 event_gsiv;
+	u32 pri_gsiv;
+	u32 gerr_gsiv;
+	u32 sync_gsiv;
+};
+
+/* Masks for Flags field above */
+
+#define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE   (1)
+#define ACPI_IORT_SMMU_V3_HTTU_OVERRIDE     (1<<1)
+
 /*******************************************************************************
  *
  * IVRS - I/O Virtualization Reporting Structure
@@ -1102,10 +1127,10 @@
 /*******************************************************************************
  *
  * SPCR - Serial Port Console Redirection table
- *        Version 1
+ *        Version 2
  *
  * Conforms to "Serial Port Console Redirection Table",
- * Version 1.00, January 11, 2002
+ * Version 1.03, August 10, 2015
  *
  ******************************************************************************/
 
@@ -1137,6 +1162,8 @@
 
 #define ACPI_SPCR_DO_NOT_DISABLE    (1)
 
+/* Values for Interface Type: See the definition of the DBG2 table */
+
 /*******************************************************************************
  *
  * SPMI - Server Platform Management Interface table

diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index ddf5e66..ebc1f4f 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h

@@ -184,7 +184,7 @@
 	struct acpi_table_header header;	/* Common ACPI table header */
 };
 
-/* FPDT subtable header */
+/* FPDT subtable header (Performance Record Structure) */
 
 struct acpi_fpdt_header {
 	u16 type;
@@ -205,6 +205,57 @@
 
 /* 0: Firmware Basic Boot Performance Record */
 
+struct acpi_fpdt_boot_pointer {
+	struct acpi_fpdt_header header;
+	u8 reserved[4];
+	u64 address;
+};
+
+/* 1: S3 Performance Table Pointer Record */
+
+struct acpi_fpdt_s3pt_pointer {
+	struct acpi_fpdt_header header;
+	u8 reserved[4];
+	u64 address;
+};
+
+/*
+ * S3PT - S3 Performance Table. This table is pointed to by the
+ * S3 Pointer Record above.
+ */
+struct acpi_table_s3pt {
+	u8 signature[4];	/* "S3PT" */
+	u32 length;
+};
+
+/*
+ * S3PT Subtables (Not part of the actual FPDT)
+ */
+
+/* Values for Type field in S3PT header */
+
+enum acpi_s3pt_type {
+	ACPI_S3PT_TYPE_RESUME = 0,
+	ACPI_S3PT_TYPE_SUSPEND = 1,
+	ACPI_FPDT_BOOT_PERFORMANCE = 2
+};
+
+struct acpi_s3pt_resume {
+	struct acpi_fpdt_header header;
+	u32 resume_count;
+	u64 full_resume;
+	u64 average_resume;
+};
+
+struct acpi_s3pt_suspend {
+	struct acpi_fpdt_header header;
+	u64 suspend_start;
+	u64 suspend_end;
+};
+
+/*
+ * FPDT Boot Performance Record (Not part of the actual FPDT)
+ */
 struct acpi_fpdt_boot {
 	struct acpi_fpdt_header header;
 	u8 reserved[4];
@@ -215,52 +266,6 @@
 	u64 exit_services_exit;
 };
 
-/* 1: S3 Performance Table Pointer Record */
-
-struct acpi_fpdt_s3pt_ptr {
-	struct acpi_fpdt_header header;
-	u8 reserved[4];
-	u64 address;
-};
-
-/*
- * S3PT - S3 Performance Table. This table is pointed to by the
- * FPDT S3 Pointer Record above.
- */
-struct acpi_table_s3pt {
-	u8 signature[4];	/* "S3PT" */
-	u32 length;
-};
-
-/*
- * S3PT Subtables
- */
-struct acpi_s3pt_header {
-	u16 type;
-	u8 length;
-	u8 revision;
-};
-
-/* Values for Type field above */
-
-enum acpi_s3pt_type {
-	ACPI_S3PT_TYPE_RESUME = 0,
-	ACPI_S3PT_TYPE_SUSPEND = 1
-};
-
-struct acpi_s3pt_resume {
-	struct acpi_s3pt_header header;
-	u32 resume_count;
-	u64 full_resume;
-	u64 average_resume;
-};
-
-struct acpi_s3pt_suspend {
-	struct acpi_s3pt_header header;
-	u64 suspend_start;
-	u64 suspend_end;
-};
-
 /*******************************************************************************
  *
  * GTDT - Generic Timer Description Table (ACPI 5.1)
@@ -476,7 +481,8 @@
 enum acpi_pcct_type {
 	ACPI_PCCT_TYPE_GENERIC_SUBSPACE = 0,
 	ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE = 1,
-	ACPI_PCCT_TYPE_RESERVED = 2	/* 2 and greater are reserved */
+	ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2 = 2,	/* ACPI 6.1 */
+	ACPI_PCCT_TYPE_RESERVED = 3	/* 3 and greater are reserved */
 };
 
 /*
@@ -515,6 +521,26 @@
 	u16 min_turnaround_time;
 };
 
+/* 2: HW-reduced Communications Subspace Type 2 (ACPI 6.1) */
+
+struct acpi_pcct_hw_reduced_type2 {
+	struct acpi_subtable_header header;
+	u32 doorbell_interrupt;
+	u8 flags;
+	u8 reserved;
+	u64 base_address;
+	u64 length;
+	struct acpi_generic_address doorbell_register;
+	u64 preserve_mask;
+	u64 write_mask;
+	u32 latency;
+	u32 max_access_rate;
+	u16 min_turnaround_time;
+	struct acpi_generic_address doorbell_ack_register;
+	u64 ack_preserve_mask;
+	u64 ack_write_mask;
+};
+
 /* Values for doorbell flags above */
 
 #define ACPI_PCCT_INTERRUPT_POLARITY    (1)

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index db46546..cb389ef 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h

@@ -630,7 +630,8 @@
 #define ACPI_NOTIFY_SHUTDOWN_REQUEST    (u8) 0x0C
 #define ACPI_NOTIFY_AFFINITY_UPDATE     (u8) 0x0D
 
-#define ACPI_NOTIFY_MAX                 0x0D
+#define ACPI_GENERIC_NOTIFY_MAX         0x0D
+#define ACPI_SPECIFIC_NOTIFY_MAX        0x84
 
 /*
  * Types associated with ACPI names and objects. The first group of
@@ -892,7 +893,7 @@
 
 /* Sleep function dispatch */
 
-typedef acpi_status(*acpi_sleep_function) (u8 sleep_state);
+typedef acpi_status (*acpi_sleep_function) (u8 sleep_state);
 
 struct acpi_sleep_functions {
 	acpi_sleep_function legacy_function;
@@ -994,7 +995,7 @@
  * Predefined Namespace items
  */
 struct acpi_predefined_names {
-	char *name;
+	const char *name;
 	u8 type;
 	char *val;
 };
@@ -1071,20 +1072,21 @@
 typedef
 void (*acpi_object_handler) (acpi_handle object, void *data);
 
-typedef acpi_status(*acpi_init_handler) (acpi_handle object, u32 function);
+typedef
+acpi_status (*acpi_init_handler) (acpi_handle object, u32 function);
 
 #define ACPI_INIT_DEVICE_INI        1
 
 typedef
-acpi_status(*acpi_exception_handler) (acpi_status aml_status,
-				      acpi_name name,
-				      u16 opcode,
-				      u32 aml_offset, void *context);
+acpi_status (*acpi_exception_handler) (acpi_status aml_status,
+				       acpi_name name,
+				       u16 opcode,
+				       u32 aml_offset, void *context);
 
 /* Table Event handler (Load, load_table, etc.) and types */
 
 typedef
-acpi_status(*acpi_table_handler) (u32 event, void *table, void *context);
+acpi_status (*acpi_table_handler) (u32 event, void *table, void *context);
 
 #define ACPI_TABLE_LOAD             0x0
 #define ACPI_TABLE_UNLOAD           0x1
@@ -1093,12 +1095,12 @@
 /* Address Spaces (For Operation Regions) */
 
 typedef
-acpi_status(*acpi_adr_space_handler) (u32 function,
-				      acpi_physical_address address,
-				      u32 bit_width,
-				      u64 *value,
-				      void *handler_context,
-				      void *region_context);
+acpi_status (*acpi_adr_space_handler) (u32 function,
+				       acpi_physical_address address,
+				       u32 bit_width,
+				       u64 *value,
+				       void *handler_context,
+				       void *region_context);
 
 #define ACPI_DEFAULT_HANDLER            NULL
 
@@ -1111,18 +1113,18 @@
 };
 
 typedef
-acpi_status(*acpi_adr_space_setup) (acpi_handle region_handle,
-				    u32 function,
-				    void *handler_context,
-				    void **region_context);
+acpi_status (*acpi_adr_space_setup) (acpi_handle region_handle,
+				     u32 function,
+				     void *handler_context,
+				     void **region_context);
 
 #define ACPI_REGION_ACTIVATE    0
 #define ACPI_REGION_DEACTIVATE  1
 
 typedef
-acpi_status(*acpi_walk_callback) (acpi_handle object,
-				  u32 nesting_level,
-				  void *context, void **return_value);
+acpi_status (*acpi_walk_callback) (acpi_handle object,
+				   u32 nesting_level,
+				   void *context, void **return_value);
 
 typedef
 u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported);
@@ -1227,7 +1229,7 @@
  * struct acpi_memory_list is used only if the ACPICA local cache is enabled
  */
 struct acpi_memory_list {
-	char *list_name;
+	const char *list_name;
 	void *list_head;
 	u16 object_size;
 	u16 max_depth;

diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 7c0595b..86b5a84 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h

@@ -66,17 +66,28 @@
  *
  *****************************************************************************/
 
+/* Common application configuration. All single threaded except for acpi_exec. */
+
+#if (defined ACPI_ASL_COMPILER) || \
+	(defined ACPI_BIN_APP)      || \
+	(defined ACPI_DUMP_APP)     || \
+	(defined ACPI_HELP_APP)     || \
+	(defined ACPI_NAMES_APP)    || \
+	(defined ACPI_SRC_APP)      || \
+	(defined ACPI_XTRACT_APP)   || \
+	(defined ACPI_EXAMPLE_APP)
+#define ACPI_APPLICATION
+#define ACPI_SINGLE_THREADED
+#endif
+
 /* iASL configuration */
 
 #ifdef ACPI_ASL_COMPILER
-#define ACPI_APPLICATION
 #define ACPI_DEBUG_OUTPUT
 #define ACPI_CONSTANT_EVAL_ONLY
 #define ACPI_LARGE_NAMESPACE_NODE
 #define ACPI_DATA_TABLE_DISASSEMBLY
-#define ACPI_SINGLE_THREADED
 #define ACPI_32BIT_PHYSICAL_ADDRESS
-
 #define ACPI_DISASSEMBLER 1
 #endif
 
@@ -89,21 +100,6 @@
 #define ACPI_DBG_TRACK_ALLOCATIONS
 #endif
 
-/*
- * acpi_bin/acpi_dump/acpi_help/acpi_names/acpi_src/acpi_xtract/Example
- * configuration. All single threaded.
- */
-#if (defined ACPI_BIN_APP)      || \
-	(defined ACPI_DUMP_APP)     || \
-	(defined ACPI_HELP_APP)     || \
-	(defined ACPI_NAMES_APP)    || \
-	(defined ACPI_SRC_APP)      || \
-	(defined ACPI_XTRACT_APP)   || \
-	(defined ACPI_EXAMPLE_APP)
-#define ACPI_APPLICATION
-#define ACPI_SINGLE_THREADED
-#endif
-
 /* acpi_help configuration. Error messages disabled. */
 
 #ifdef ACPI_HELP_APP
@@ -138,11 +134,16 @@
 #define ACPI_REDUCED_HARDWARE 1
 #endif
 
-/* Linkable ACPICA library */
+/* Linkable ACPICA library. Two versions, one with full debug. */
 
 #ifdef ACPI_LIBRARY
 #define ACPI_USE_LOCAL_CACHE
-#define ACPI_FULL_DEBUG
+#define ACPI_DEBUGGER 1
+#define ACPI_DISASSEMBLER 1
+
+#ifdef _DEBUG
+#define ACPI_DEBUG_OUTPUT
+#endif
 #endif
 
 /* Common for all ACPICA applications */
@@ -218,6 +219,9 @@
 #elif defined(__HAIKU__)
 #include "achaiku.h"
 
+#elif defined(__QNX__)
+#include "acqnx.h"
+
 #else
 
 /* Unknown environment */

diff --git a/include/acpi/platform/acmsvcex.h b/include/acpi/platform/acmsvcex.h
deleted file mode 100644
index 28084a1..0000000
--- a/include/acpi/platform/acmsvcex.h
+++ /dev/null

@@ -1,54 +0,0 @@
-/******************************************************************************
- *
- * Name: acmsvcex.h - Extra VC specific defines, etc.
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2016, Intel Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions, and the following disclaimer,
- *    without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- *    substantially similar to the "NO WARRANTY" disclaimer below
- *    ("Disclaimer") and any redistribution must be conditioned upon
- *    including a substantially similar Disclaimer requirement for further
- *    binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- *    of any contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACMSVCEX_H__
-#define __ACMSVCEX_H__
-
-/* Debug support. */
-
-#ifdef _DEBUG
-#define _CRTDBG_MAP_ALLOC	/* Enables specific file/lineno for leaks */
-#include <crtdbg.h>
-#endif
-
-#endif				/* __ACMSVCEX_H__ */

diff --git a/include/acpi/platform/acwinex.h b/include/acpi/platform/acwinex.h
deleted file mode 100644
index a00b3e4..0000000
--- a/include/acpi/platform/acwinex.h
+++ /dev/null

@@ -1,49 +0,0 @@
-/******************************************************************************
- *
- * Name: acwinex.h - Extra OS specific defines, etc.
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2016, Intel Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions, and the following disclaimer,
- *    without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- *    substantially similar to the "NO WARRANTY" disclaimer below
- *    ("Disclaimer") and any redistribution must be conditioned upon
- *    including a substantially similar Disclaimer requirement for further
- *    binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- *    of any contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACWINEX_H__
-#define __ACWINEX_H__
-
-/* Windows uses VC */
-
-#endif				/* __ACWINEX_H__ */

diff --git a/include/acpi/video.h b/include/acpi/video.h
index 5ca2f2c..70a41f7 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h

@@ -4,6 +4,19 @@
 #include <linux/errno.h> /* for ENODEV */
 #include <linux/types.h> /* for bool */
 
+struct acpi_video_brightness_flags {
+	u8 _BCL_no_ac_battery_levels:1;	/* no AC/Battery levels in _BCL */
+	u8 _BCL_reversed:1;		/* _BCL package is in a reversed order */
+	u8 _BQC_use_index:1;		/* _BQC returns an index value */
+};
+
+struct acpi_video_device_brightness {
+	int curr;
+	int count;
+	int *levels;
+	struct acpi_video_brightness_flags flags;
+};
+
 struct acpi_device;
 
 #define ACPI_VIDEO_CLASS	"video"
@@ -37,6 +50,8 @@
  * may change over time and should not be cached.
  */
 extern bool acpi_video_handles_brightness_key_presses(void);
+extern int acpi_video_get_levels(struct acpi_device *device,
+				 struct acpi_video_device_brightness **dev_br);
 #else
 static inline int acpi_video_register(void) { return 0; }
 static inline void acpi_video_unregister(void) { return; }
@@ -56,6 +71,11 @@
 {
 	return false;
 }
+static inline int acpi_video_get_levels(struct acpi_device *device,
+			struct acpi_video_device_brightness **dev_br)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif

diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index d6d5dc9..3fc94a0 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h

@@ -53,7 +53,7 @@
 /*
  * lock for writing
  */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline void __down_write(struct rw_semaphore *sem)
 {
 	long tmp;
 
@@ -63,9 +63,16 @@
 		rwsem_down_write_failed(sem);
 }
 
-static inline void __down_write(struct rw_semaphore *sem)
+static inline int __down_write_killable(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	long tmp;
+
+	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+				     (atomic_long_t *)&sem->count);
+	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+		if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+			return -EINTR;
+	return 0;
 }
 
 static inline int __down_write_trylock(struct rw_semaphore *sem)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 339125b..6a67ab9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h

@@ -245,7 +245,9 @@
 
 #define INIT_TASK_DATA(align)						\
 	. = ALIGN(align);						\
-	*(.data..init_task)
+	VMLINUX_SYMBOL(__start_init_task) = .;				\
+	*(.data..init_task)						\
+	VMLINUX_SYMBOL(__end_init_task) = .;
 
 /*
  * Read only Data

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 06ed7e5..288fac5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h

@@ -190,14 +190,6 @@
 }
 #endif
 
-#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
-void acpi_initrd_override(void *data, size_t size);
-#else
-static inline void acpi_initrd_override(void *data, size_t size)
-{
-}
-#endif
-
 #define BAD_MADT_ENTRY(entry, end) (					    \
 		(!entry) || (unsigned long)entry + sizeof(*entry) > end ||  \
 		((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
@@ -216,6 +208,7 @@
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
+void early_acpi_table_init(void *data, size_t size);
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_parse_entries(char *id, unsigned long table_size,
@@ -278,6 +271,7 @@
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;
 extern unsigned int acpi_sci_irq;
+extern bool acpi_no_s5;
 #define INVALID_ACPI_IRQ	((unsigned)-1)
 static inline bool acpi_sci_irq_valid(void)
 {
@@ -311,7 +305,6 @@
 int acpi_pci_irq_enable (struct pci_dev *dev);
 void acpi_penalize_isa_irq(int irq, int active);
 bool acpi_isa_irq_available(int irq);
-void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
 void acpi_pci_irq_disable (struct pci_dev *dev);
 
 extern int ec_read(u8 addr, u8 *val);
@@ -359,7 +352,6 @@
 extern char acpi_video_backlight_string[];
 extern long acpi_is_video_device(acpi_handle handle);
 extern int acpi_blacklisted(void);
-extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern void acpi_osi_setup(char *str);
 extern bool acpi_osi_is_win8(void);
 
@@ -596,6 +588,7 @@
 	return NULL;
 }
 
+static inline void early_acpi_table_init(void *data, size_t size) { }
 static inline void acpi_early_init(void) { }
 static inline void acpi_subsystem_init(void) { }
 

diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
index b2d32e0..714186d 100644
--- a/include/linux/apple-gmux.h
+++ b/include/linux/apple-gmux.h

@@ -35,7 +35,7 @@
  */
 static inline bool apple_gmux_present(void)
 {
-	return acpi_dev_present(GMUX_ACPI_HID);
+	return acpi_dev_found(GMUX_ACPI_HID);
 }
 
 #else  /* !CONFIG_APPLE_GMUX */

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 506c353..e451534 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h

@@ -560,11 +560,11 @@
 
 /**
  * atomic_fetch_or - perform *p |= mask and return old value of *p
- * @p: pointer to atomic_t
  * @mask: mask to OR on the atomic_t
+ * @p: pointer to atomic_t
  */
 #ifndef atomic_fetch_or
-static inline int atomic_fetch_or(atomic_t *p, int mask)
+static inline int atomic_fetch_or(int mask, atomic_t *p)
 {
 	int old, val = atomic_read(p);
 

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 1e7a69a..5f2fd61 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h

@@ -141,9 +141,10 @@
 					struct backlight_device *bd);
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
-extern bool backlight_device_registered(enum backlight_type type);
 extern int backlight_register_notifier(struct notifier_block *nb);
 extern int backlight_unregister_notifier(struct notifier_block *nb);
+extern struct backlight_device *backlight_device_get_by_type(enum backlight_type type);
+extern int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness);
 
 #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
 

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f9b1fab..21597dc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h

@@ -59,25 +59,7 @@
  * CPU notifier priorities.
  */
 enum {
-	/*
-	 * SCHED_ACTIVE marks a cpu which is coming up active during
-	 * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
-	 * notifier.  CPUSET_ACTIVE adjusts cpuset according to
-	 * cpu_active mask right after SCHED_ACTIVE.  During
-	 * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
-	 * ordered in the similar way.
-	 *
-	 * This ordering guarantees consistent cpu_active mask and
-	 * migration behavior to all cpu notifiers.
-	 */
-	CPU_PRI_SCHED_ACTIVE	= INT_MAX,
-	CPU_PRI_CPUSET_ACTIVE	= INT_MAX - 1,
-	CPU_PRI_SCHED_INACTIVE	= INT_MIN + 1,
-	CPU_PRI_CPUSET_INACTIVE	= INT_MIN,
-
-	/* migration should happen before other stuff but after perf */
 	CPU_PRI_PERF		= 20,
-	CPU_PRI_MIGRATION	= 10,
 
 	/* bring up workqueues before normal notifiers and down after */
 	CPU_PRI_WORKQUEUE_UP	= 5,

diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
deleted file mode 100644
index 0414009..0000000
--- a/include/linux/cpufreq-dt.h
+++ /dev/null

@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2014 Marvell
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __CPUFREQ_DT_H__
-#define __CPUFREQ_DT_H__
-
-struct cpufreq_dt_platform_data {
-	/*
-	 * True when each CPU has its own clock to control its
-	 * frequency, false when all CPUs are controlled by a single
-	 * clock.
-	 */
-	bool independent_clocks;
-};
-
-#endif /* __CPUFREQ_DT_H__ */

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 718e872..4e81e08 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h

@@ -102,6 +102,17 @@
 	 */
 	struct rw_semaphore	rwsem;
 
+	/*
+	 * Fast switch flags:
+	 * - fast_switch_possible should be set by the driver if it can
+	 *   guarantee that frequency can be changed on any CPU sharing the
+	 *   policy and that the change will affect all of the policy CPUs then.
+	 * - fast_switch_enabled is to be set by governors that support fast
+	 *   freqnency switching with the help of cpufreq_enable_fast_switch().
+	 */
+	bool			fast_switch_possible;
+	bool			fast_switch_enabled;
+
 	/* Synchronization for frequency transitions */
 	bool			transition_ongoing; /* Tracks transition status */
 	spinlock_t		transition_lock;
@@ -156,6 +167,8 @@
 int cpufreq_update_policy(unsigned int cpu);
 bool have_governor_per_policy(void);
 struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
+void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
+void cpufreq_disable_fast_switch(struct cpufreq_policy *policy);
 #else
 static inline unsigned int cpufreq_get(unsigned int cpu)
 {
@@ -236,6 +249,8 @@
 				  unsigned int relation);	/* Deprecated */
 	int		(*target_index)(struct cpufreq_policy *policy,
 					unsigned int index);
+	unsigned int	(*fast_switch)(struct cpufreq_policy *policy,
+				       unsigned int target_freq);
 	/*
 	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
 	 * unset.
@@ -426,6 +441,20 @@
 #define CPUFREQ_POLICY_POWERSAVE	(1)
 #define CPUFREQ_POLICY_PERFORMANCE	(2)
 
+/*
+ * The polling frequency depends on the capability of the processor. Default
+ * polling frequency is 1000 times the transition latency of the processor. The
+ * ondemand governor will work on any processor with transition latency <= 10ms,
+ * using appropriate sampling rate.
+ *
+ * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL)
+ * the ondemand governor will not work. All times here are in us (microseconds).
+ */
+#define MIN_SAMPLING_RATE_RATIO		(2)
+#define LATENCY_MULTIPLIER		(1000)
+#define MIN_LATENCY_MULTIPLIER		(20)
+#define TRANSITION_LATENCY_LIMIT	(10 * 1000 * 1000)
+
 /* Governor Events */
 #define CPUFREQ_GOV_START	1
 #define CPUFREQ_GOV_STOP	2
@@ -450,6 +479,8 @@
 };
 
 /* Pass a target to the cpufreq driver */
+unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+					unsigned int target_freq);
 int cpufreq_driver_target(struct cpufreq_policy *policy,
 				 unsigned int target_freq,
 				 unsigned int relation);
@@ -462,6 +493,29 @@
 struct cpufreq_governor *cpufreq_default_governor(void);
 struct cpufreq_governor *cpufreq_fallback_governor(void);
 
+/* Governor attribute set */
+struct gov_attr_set {
+	struct kobject kobj;
+	struct list_head policy_list;
+	struct mutex update_lock;
+	int usage_count;
+};
+
+/* sysfs ops for cpufreq governors */
+extern const struct sysfs_ops governor_sysfs_ops;
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
+
+/* Governor sysfs attribute */
+struct governor_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct gov_attr_set *attr_set, char *buf);
+	ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf,
+			 size_t count);
+};
+
 /*********************************************************************
  *                     FREQUENCY TABLE HELPERS                       *
  *********************************************************************/

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 5d68e15..386374d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h

@@ -8,6 +8,7 @@
 	CPUHP_BRINGUP_CPU,
 	CPUHP_AP_IDLE_DEAD,
 	CPUHP_AP_OFFLINE,
+	CPUHP_AP_SCHED_STARTING,
 	CPUHP_AP_NOTIFY_STARTING,
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
@@ -16,6 +17,7 @@
 	CPUHP_AP_NOTIFY_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
+	CPUHP_AP_ACTIVE,
 	CPUHP_ONLINE,
 };
 

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 40cee6b..e828cf6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h

@@ -743,12 +743,10 @@
 static inline void
 set_cpu_online(unsigned int cpu, bool online)
 {
-	if (online) {
+	if (online)
 		cpumask_set_cpu(cpu, &__cpu_online_mask);
-		cpumask_set_cpu(cpu, &__cpu_active_mask);
-	} else {
+	else
 		cpumask_clear_cpu(cpu, &__cpu_online_mask);
-	}
 }
 
 static inline void

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 6fa02a2..2de4e2e 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h

@@ -19,6 +19,13 @@
 
 #define DEVFREQ_NAME_LEN 16
 
+/* DEVFREQ notifier interface */
+#define DEVFREQ_TRANSITION_NOTIFIER	(0)
+
+/* Transition notifiers of DEVFREQ_TRANSITION_NOTIFIER */
+#define	DEVFREQ_PRECHANGE		(0)
+#define DEVFREQ_POSTCHANGE		(1)
+
 struct devfreq;
 
 /**
@@ -143,6 +150,7 @@
  * @trans_table:	Statistics of devfreq transitions
  * @time_in_state:	Statistics of devfreq states
  * @last_stat_updated:	The last time stat updated
+ * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier
  *
  * This structure stores the devfreq information for a give device.
  *
@@ -177,6 +185,13 @@
 	unsigned int *trans_table;
 	unsigned long *time_in_state;
 	unsigned long last_stat_updated;
+
+	struct srcu_notifier_head transition_notifier_list;
+};
+
+struct devfreq_freqs {
+	unsigned long old;
+	unsigned long new;
 };
 
 #if defined(CONFIG_PM_DEVFREQ)
@@ -207,6 +222,22 @@
 					      struct devfreq *devfreq);
 extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
 						struct devfreq *devfreq);
+extern int devfreq_register_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list);
+extern int devfreq_unregister_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list);
+extern int devm_devfreq_register_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list);
+extern void devm_devfreq_unregister_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list);
+extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
+						int index);
 
 /**
  * devfreq_update_stats() - update the last_status pointer in struct devfreq
@@ -241,6 +272,39 @@
 };
 #endif
 
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE)
+/**
+ * struct devfreq_passive_data - void *data fed to struct devfreq
+ *	and devfreq_add_device
+ * @parent:	the devfreq instance of parent device.
+ * @get_target_freq:	Optional callback, Returns desired operating frequency
+ *			for the device using passive governor. That is called
+ *			when passive governor should decide the next frequency
+ *			by using the new frequency of parent devfreq device
+ *			using governors except for passive governor.
+ *			If the devfreq device has the specific method to decide
+ *			the next frequency, should use this callback.
+ * @this:	the devfreq instance of own device.
+ * @nb:		the notifier block for DEVFREQ_TRANSITION_NOTIFIER list
+ *
+ * The devfreq_passive_data have to set the devfreq instance of parent
+ * device with governors except for the passive governor. But, don't need to
+ * initialize the 'this' and 'nb' field because the devfreq core will handle
+ * them.
+ */
+struct devfreq_passive_data {
+	/* Should set the devfreq instance of parent device */
+	struct devfreq *parent;
+
+	/* Optional callback to decide the next frequency of passvice device */
+	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+
+	/* For passive governor's internal use. Don't need to set them */
+	struct devfreq *this;
+	struct notifier_block nb;
+};
+#endif
+
 #else /* !CONFIG_PM_DEVFREQ */
 static inline struct devfreq *devfreq_add_device(struct device *dev,
 					  struct devfreq_dev_profile *profile,
@@ -307,6 +371,41 @@
 {
 }
 
+static inline int devfreq_register_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list)
+{
+	return 0;
+}
+
+static inline int devfreq_unregister_notifier(struct devfreq *devfreq,
+					struct notifier_block *nb,
+					unsigned int list)
+{
+	return 0;
+}
+
+static inline int devm_devfreq_register_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+	return 0;
+}
+
+static inline void devm_devfreq_unregister_notifier(struct device *dev,
+				struct devfreq *devfreq,
+				struct notifier_block *nb,
+				unsigned int list)
+{
+}
+
+static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
+							int index)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline int devfreq_update_stats(struct devfreq *df)
 {
 	return -EINVAL;

diff --git a/include/linux/device.h b/include/linux/device.h
index 002c597..b130304 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h

@@ -956,11 +956,6 @@
 	return !!dev->power.async_suspend;
 }
 
-static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
-{
-	dev->power.ignore_children = enable;
-}
-
 static inline void dev_pm_syscore_device(struct device *dev, bool val)
 {
 #ifdef CONFIG_PM_SLEEP

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 1626474..df7acb5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h

@@ -21,6 +21,7 @@
 #include <linux/pfn.h>
 #include <linux/pstore.h>
 #include <linux/reboot.h>
+#include <linux/screen_info.h>
 
 #include <asm/page.h>
 
@@ -124,6 +125,13 @@
 } efi_capsule_header_t;
 
 /*
+ * EFI capsule flags
+ */
+#define EFI_CAPSULE_PERSIST_ACROSS_RESET	0x00010000
+#define EFI_CAPSULE_POPULATE_SYSTEM_TABLE	0x00020000
+#define EFI_CAPSULE_INITIATE_RESET		0x00040000
+
+/*
  * Allocation types for calls to boottime->allocate_pages.
  */
 #define EFI_ALLOCATE_ANY_PAGES		0
@@ -282,9 +290,10 @@
 	efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **);
 	void *__reserved;
 	void *register_protocol_notify;
-	void *locate_handle;
+	efi_status_t (*locate_handle)(int, efi_guid_t *, void *,
+				      unsigned long *, efi_handle_t *);
 	void *locate_device_path;
-	void *install_configuration_table;
+	efi_status_t (*install_configuration_table)(efi_guid_t *, void *);
 	void *load_image;
 	void *start_image;
 	void *exit;
@@ -623,6 +632,27 @@
 	EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
 		 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
 
+#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID \
+	EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \
+		 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
+
+#define EFI_CONSOLE_OUT_DEVICE_GUID \
+	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \
+		 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+
+/*
+ * This GUID is used to pass to the kernel proper the struct screen_info
+ * structure that was populated by the stub based on the GOP protocol instance
+ * associated with ConOut
+ */
+#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID \
+	EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \
+		 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
+
+#define LINUX_EFI_LOADER_ENTRY_GUID \
+	EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, \
+		 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
@@ -847,6 +877,14 @@
 
 #define EFI_INVALID_TABLE_ADDR		(~0UL)
 
+typedef struct {
+	u32 version;
+	u32 num_entries;
+	u32 desc_size;
+	u32 reserved;
+	efi_memory_desc_t entry[0];
+} efi_memory_attributes_table_t;
+
 /*
  * All runtime access to EFI goes through this structure:
  */
@@ -868,6 +906,7 @@
 	unsigned long config_table;	/* config tables */
 	unsigned long esrt;		/* ESRT table */
 	unsigned long properties_table;	/* properties table */
+	unsigned long mem_attr_table;	/* memory attributes table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
@@ -883,7 +922,7 @@
 	efi_get_next_high_mono_count_t *get_next_high_mono_count;
 	efi_reset_system_t *reset_system;
 	efi_set_virtual_address_map_t *set_virtual_address_map;
-	struct efi_memory_map *memmap;
+	struct efi_memory_map memmap;
 	unsigned long flags;
 } efi;
 
@@ -945,7 +984,6 @@
 extern void efi_get_time(struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern int efi_get_fdt_params(struct efi_fdt_params *params);
-extern struct efi_memory_map memmap;
 extern struct kobject *efi_kobj;
 
 extern int efi_reboot_quirk_mode;
@@ -957,12 +995,34 @@
 static inline void efi_fake_memmap(void) { }
 #endif
 
+/*
+ * efi_memattr_perm_setter - arch specific callback function passed into
+ *                           efi_memattr_apply_permissions() that updates the
+ *                           mapping permissions described by the second
+ *                           argument in the page tables referred to by the
+ *                           first argument.
+ */
+typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *);
+
+extern int efi_memattr_init(void);
+extern int efi_memattr_apply_permissions(struct mm_struct *mm,
+					 efi_memattr_perm_setter fn);
+
 /* Iterate through an efi_memory_map */
-#define for_each_efi_memory_desc(m, md)					   \
+#define for_each_efi_memory_desc_in_map(m, md)				   \
 	for ((md) = (m)->map;						   \
 	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
 	     (md) = (void *)(md) + (m)->desc_size)
 
+/**
+ * for_each_efi_memory_desc - iterate over descriptors in efi.memmap
+ * @md: the efi_memory_desc_t * iterator
+ *
+ * Once the loop finishes @md must not be accessed.
+ */
+#define for_each_efi_memory_desc(md) \
+	for_each_efi_memory_desc_in_map(&efi.memmap, md)
+
 /*
  * Format an EFI memory descriptor's type and attributes to a user-provided
  * character buffer, as per snprintf(), and return the buffer.
@@ -1000,7 +1060,6 @@
  * possible, remove EFI-related code altogether.
  */
 #define EFI_BOOT		0	/* Were we booted from EFI? */
-#define EFI_SYSTEM_TABLES	1	/* Can we use EFI system tables? */
 #define EFI_CONFIG_TABLES	2	/* Can we use EFI config tables? */
 #define EFI_RUNTIME_SERVICES	3	/* Can we use runtime services? */
 #define EFI_MEMMAP		4	/* Can we use EFI memory map? */
@@ -1026,8 +1085,16 @@
 }
 static inline void
 efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
+
+static inline bool
+efi_capsule_pending(int *reset_type)
+{
+	return false;
+}
 #endif
 
+extern int efi_status_to_err(efi_status_t status);
+
 /*
  * Variable Attributes
  */
@@ -1180,6 +1247,80 @@
 	void *test_string;
 };
 
+#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR		0
+#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR		1
+#define PIXEL_BIT_MASK					2
+#define PIXEL_BLT_ONLY					3
+#define PIXEL_FORMAT_MAX				4
+
+struct efi_pixel_bitmask {
+	u32 red_mask;
+	u32 green_mask;
+	u32 blue_mask;
+	u32 reserved_mask;
+};
+
+struct efi_graphics_output_mode_info {
+	u32 version;
+	u32 horizontal_resolution;
+	u32 vertical_resolution;
+	int pixel_format;
+	struct efi_pixel_bitmask pixel_information;
+	u32 pixels_per_scan_line;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_32 {
+	u32 max_mode;
+	u32 mode;
+	u32 info;
+	u32 size_of_info;
+	u64 frame_buffer_base;
+	u32 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_64 {
+	u32 max_mode;
+	u32 mode;
+	u64 info;
+	u64 size_of_info;
+	u64 frame_buffer_base;
+	u64 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode {
+	u32 max_mode;
+	u32 mode;
+	unsigned long info;
+	unsigned long size_of_info;
+	u64 frame_buffer_base;
+	unsigned long frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_32 {
+	u32 query_mode;
+	u32 set_mode;
+	u32 blt;
+	u32 mode;
+};
+
+struct efi_graphics_output_protocol_64 {
+	u64 query_mode;
+	u64 set_mode;
+	u64 blt;
+	u64 mode;
+};
+
+struct efi_graphics_output_protocol {
+	unsigned long query_mode;
+	unsigned long set_mode;
+	unsigned long blt;
+	struct efi_graphics_output_protocol_mode *mode;
+};
+
+typedef efi_status_t (*efi_graphics_output_protocol_query_mode)(
+	struct efi_graphics_output_protocol *, u32, unsigned long *,
+	struct efi_graphics_output_mode_info **);
+
 extern struct list_head efivar_sysfs_list;
 
 static inline void
@@ -1195,8 +1336,7 @@
 struct kobject *efivars_kobject(void);
 
 int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
-		void *data, bool atomic, bool duplicates,
-		struct list_head *head);
+		void *data, bool duplicates, struct list_head *head);
 
 void efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
 void efivar_entry_remove(struct efivar_entry *entry);
@@ -1242,6 +1382,13 @@
 #define EFIVARS_DATA_SIZE_MAX 1024
 
 #endif /* CONFIG_EFI_VARS */
+extern bool efi_capsule_pending(int *reset_type);
+
+extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
+				 size_t size, int *reset);
+
+extern int efi_capsule_update(efi_capsule_header_t *capsule,
+			      struct page **pages);
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
@@ -1319,5 +1466,9 @@
 
 efi_status_t efi_parse_options(char *cmdline);
 
+efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+			   struct screen_info *si, efi_guid_t *proto,
+			   unsigned long size);
+
 bool efi_runtime_disabled(void);
 #endif /* _LINUX_EFI_H */

diff --git a/include/linux/leds.h b/include/linux/leds.h
index f203a8f..d2b1306 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h

@@ -50,6 +50,7 @@
 #define LED_SYSFS_DISABLE	(1 << 22)
 #define LED_DEV_CAP_FLASH	(1 << 23)
 #define LED_HW_PLUGGABLE	(1 << 24)
+#define LED_PANIC_INDICATOR	(1 << 25)
 
 	/* Set LED brightness level
 	 * Must not sleep. Use brightness_set_blocking for drivers
@@ -329,6 +330,12 @@
 static inline void ledtrig_ide_activity(void) {}
 #endif
 
+#ifdef CONFIG_LEDS_TRIGGER_MTD
+extern void ledtrig_mtd_activity(void);
+#else
+static inline void ledtrig_mtd_activity(void) {}
+#endif
+
 #if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE)
 extern void ledtrig_flash_ctrl(bool on);
 extern void ledtrig_torch_ctrl(bool on);
@@ -358,6 +365,7 @@
 	unsigned 	gpio;
 	unsigned	active_low : 1;
 	unsigned	retain_state_suspended : 1;
+	unsigned	panic_indicator : 1;
 	unsigned	default_state : 2;
 	/* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */
 	struct gpio_desc *gpiod;

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d10ef06..eabe013 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h

@@ -356,8 +356,13 @@
 extern void lockdep_clear_current_reclaim_state(void);
 extern void lockdep_trace_alloc(gfp_t mask);
 
-extern void lock_pin_lock(struct lockdep_map *lock);
-extern void lock_unpin_lock(struct lockdep_map *lock);
+struct pin_cookie { unsigned int val; };
+
+#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
+
+extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
+extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
+extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 
 # define INIT_LOCKDEP				.lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
 
@@ -373,8 +378,9 @@
 
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
 
-#define lockdep_pin_lock(l)		lock_pin_lock(&(l)->dep_map)
-#define lockdep_unpin_lock(l)	lock_unpin_lock(&(l)->dep_map)
+#define lockdep_pin_lock(l)	lock_pin_lock(&(l)->dep_map)
+#define lockdep_repin_lock(l,c)	lock_repin_lock(&(l)->dep_map, (c))
+#define lockdep_unpin_lock(l,c)	lock_unpin_lock(&(l)->dep_map, (c))
 
 #else /* !CONFIG_LOCKDEP */
 
@@ -427,8 +433,13 @@
 
 #define lockdep_recursing(tsk)			(0)
 
-#define lockdep_pin_lock(l)				do { (void)(l); } while (0)
-#define lockdep_unpin_lock(l)			do { (void)(l); } while (0)
+struct pin_cookie { };
+
+#define NIL_COOKIE (struct pin_cookie){ }
+
+#define lockdep_pin_lock(l)			({ struct pin_cookie cookie; cookie; })
+#define lockdep_repin_lock(l, c)		do { (void)(l); (void)(c); } while (0)
+#define lockdep_unpin_lock(l, c)		do { (void)(l); (void)(c); } while (0)
 
 #endif /* !LOCKDEP */
 
@@ -446,6 +457,18 @@
 	lock_acquired(&(_lock)->dep_map, _RET_IP_);			\
 } while (0)
 
+#define LOCK_CONTENDED_RETURN(_lock, try, lock)			\
+({								\
+	int ____err = 0;					\
+	if (!try(_lock)) {					\
+		lock_contended(&(_lock)->dep_map, _RET_IP_);	\
+		____err = lock(_lock);				\
+	}							\
+	if (!____err)						\
+		lock_acquired(&(_lock)->dep_map, _RET_IP_);	\
+	____err;						\
+})
+
 #else /* CONFIG_LOCK_STAT */
 
 #define lock_contended(lockdep_map, ip) do {} while (0)
@@ -454,6 +477,9 @@
 #define LOCK_CONTENDED(_lock, try, lock) \
 	lock(_lock)
 
+#define LOCK_CONTENDED_RETURN(_lock, try, lock) \
+	lock(_lock)
+
 #endif /* CONFIG_LOCK_STAT */
 
 #ifdef CONFIG_LOCKDEP

diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h
index 8d43e9f..51e6f94 100644
--- a/include/linux/mfd/as3722.h
+++ b/include/linux/mfd/as3722.h

@@ -196,6 +196,7 @@
 #define AS3722_LDO3_VSEL_MIN				0x01
 #define AS3722_LDO3_VSEL_MAX				0x2D
 #define AS3722_LDO3_NUM_VOLT				0x2D
+#define AS3722_LDO6_VSEL_BYPASS 			0x3F
 #define AS3722_LDO_VSEL_MASK				0x7F
 #define AS3722_LDO_VSEL_MIN				0x01
 #define AS3722_LDO_VSEL_MAX				0x7F

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 6bc4bcd..5a23dd4 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h

@@ -30,6 +30,9 @@
 #define MIN_600_MV		600000
 #define MIN_500_MV		500000
 
+/* Ramp delay in uV/us */
+#define RAMP_DELAY_12_MVUS	12000
+
 /* Macros to represent steps for LDO/BUCK */
 #define STEP_50_MV		50000
 #define STEP_25_MV		25000

diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 05d58ee..7a26286 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h

@@ -66,8 +66,8 @@
  */
 #define TMIO_MMC_SDIO_IRQ		(1 << 2)
 
-/* Some controllers don't need to wait 10ms for clock changes */
-#define TMIO_MMC_FAST_CLK_CHG		(1 << 3)
+/* Some features are only available or tested on RCar Gen2 or later */
+#define TMIO_MMC_MIN_RCAR2		(1 << 3)
 
 /*
  * Some controllers require waiting for the SD bus to become

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f468e0..727f7997 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -72,6 +72,10 @@
 #define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
 #endif
 
+#ifndef page_to_virt
+#define page_to_virt(x)	__va(PFN_PHYS(page_to_pfn(x)))
+#endif
+
 /*
  * To prevent common memory management code establishing
  * a zero page mapping on a read fault.
@@ -957,7 +961,7 @@
 
 static __always_inline void *lowmem_page_address(const struct page *page)
 {
-	return __va(PFN_PHYS(page_to_pfn(page)));
+	return page_to_virt(page);
 }
 
 #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 7b41c6d..f7ed271 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h

@@ -36,7 +36,6 @@
 	EVENT_XFER_COMPLETE,
 	EVENT_DATA_COMPLETE,
 	EVENT_DATA_ERROR,
-	EVENT_XFER_ERROR
 };
 
 struct mmc_data;
@@ -55,6 +54,7 @@
 /**
  * struct dw_mci - MMC controller state shared between all slots
  * @lock: Spinlock protecting the queue and associated data.
+ * @irq_lock: Spinlock protecting the INTMASK setting.
  * @regs: Pointer to MMIO registers.
  * @fifo_reg: Pointer to MMIO registers for data FIFO
  * @sg: Scatterlist entry currently being processed by PIO code, if any.
@@ -65,6 +65,9 @@
  * @cmd: The command currently being sent to the card, or NULL.
  * @data: The data currently being transferred, or NULL if no data
  *	transfer is in progress.
+ * @stop_abort: The command currently prepared for stoping transfer.
+ * @prev_blksz: The former transfer blksz record.
+ * @timing: Record of current ios timing.
  * @use_dma: Whether DMA channel is initialized or not.
  * @using_dma: Whether DMA is in use for the current transfer.
  * @dma_64bit_address: Whether DMA supports 64-bit address mode or not.
@@ -72,7 +75,10 @@
  * @sg_cpu: Virtual address of DMA buffer.
  * @dma_ops: Pointer to platform-specific DMA callbacks.
  * @cmd_status: Snapshot of SR taken upon completion of the current
+ * @ring_size: Buffer size for idma descriptors.
  *	command. Only valid when EVENT_CMD_COMPLETE is pending.
+ * @dms: structure of slave-dma private data.
+ * @phy_regs: physical address of controller's register map
  * @data_status: Snapshot of SR taken upon completion of the current
  *	data transfer. Only valid when EVENT_DATA_COMPLETE or
  *	EVENT_DATA_ERROR is pending.
@@ -80,7 +86,6 @@
  *	to be sent.
  * @dir_status: Direction of current transfer.
  * @tasklet: Tasklet running the request state machine.
- * @card_tasklet: Tasklet handling card detect.
  * @pending_events: Bitmask of events flagged by the interrupt handler
  *	to be processed by the tasklet.
  * @completed_events: Bitmask of events which the state machine has
@@ -91,6 +96,7 @@
  *	rate and timeout calculations.
  * @current_speed: Configured rate of the controller.
  * @num_slots: Number of slots available.
+ * @fifoth_val: The value of FIFOTH register.
  * @verid: Denote Version ID.
  * @dev: Device associated with the MMC controller.
  * @pdata: Platform data associated with the MMC controller.
@@ -107,9 +113,11 @@
  * @push_data: Pointer to FIFO push function.
  * @pull_data: Pointer to FIFO pull function.
  * @quirks: Set of quirks that apply to specific versions of the IP.
+ * @vqmmc_enabled: Status of vqmmc, should be true or false.
  * @irq_flags: The flags to be passed to request_irq.
  * @irq: The irq value to be passed to request_irq.
  * @sdio_id0: Number of slot0 in the SDIO interrupt registers.
+ * @cmd11_timer: Timer for SD3.0 voltage switch over scheme.
  * @dto_timer: Timer for broken data transfer over scheme.
  *
  * Locking

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8dd4d29..85800b4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h

@@ -93,28 +93,39 @@
 	void	(*pre_req)(struct mmc_host *host, struct mmc_request *req,
 			   bool is_first_req);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
+
 	/*
-	 * Avoid calling these three functions too often or in a "fast path",
-	 * since underlaying controller might implement them in an expensive
-	 * and/or slow way.
-	 *
-	 * Also note that these functions might sleep, so don't call them
-	 * in the atomic contexts!
-	 *
+	 * Avoid calling the next three functions too often or in a "fast
+	 * path", since underlaying controller might implement them in an
+	 * expensive and/or slow way. Also note that these functions might
+	 * sleep, so don't call them in the atomic contexts!
+	 */
+
+	/*
+	 * Notes to the set_ios callback:
+	 * ios->clock might be 0. For some controllers, setting 0Hz
+	 * as any other frequency works. However, some controllers
+	 * explicitly need to disable the clock. Otherwise e.g. voltage
+	 * switching might fail because the SDCLK is not really quiet.
+	 */
+	void	(*set_ios)(struct mmc_host *host, struct mmc_ios *ios);
+
+	/*
 	 * Return values for the get_ro callback should be:
 	 *   0 for a read/write card
 	 *   1 for a read-only card
 	 *   -ENOSYS when not supported (equal to NULL callback)
 	 *   or a negative errno value when something bad happened
-	 *
+	 */
+	int	(*get_ro)(struct mmc_host *host);
+
+	/*
 	 * Return values for the get_cd callback should be:
 	 *   0 for a absent card
 	 *   1 for a present card
 	 *   -ENOSYS when not supported (equal to NULL callback)
 	 *   or a negative errno value when something bad happened
 	 */
-	void	(*set_ios)(struct mmc_host *host, struct mmc_ios *ios);
-	int	(*get_ro)(struct mmc_host *host);
 	int	(*get_cd)(struct mmc_host *host);
 
 	void	(*enable_sdio_irq)(struct mmc_host *host, int enable);

diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
deleted file mode 100644
index 95d6f03..0000000
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ /dev/null

@@ -1,10 +0,0 @@
-#ifndef LINUX_MMC_SH_MOBILE_SDHI_H
-#define LINUX_MMC_SH_MOBILE_SDHI_H
-
-#include <linux/types.h>
-
-#define SH_MOBILE_SDHI_IRQ_CARD_DETECT	"card_detect"
-#define SH_MOBILE_SDHI_IRQ_SDCARD	"sdcard"
-#define SH_MOBILE_SDHI_IRQ_SDIO		"sdio"
-
-#endif /* LINUX_MMC_SH_MOBILE_SDHI_H */

diff --git a/include/linux/mmc/tmio.h b/include/linux/mmc/tmio.h
deleted file mode 100644
index 5f5cd80..0000000
--- a/include/linux/mmc/tmio.h
+++ /dev/null

@@ -1,71 +0,0 @@
-/*
- * include/linux/mmc/tmio.h
- *
- * Copyright (C) 2016 Sang Engineering, Wolfram Sang
- * Copyright (C) 2015-16 Renesas Electronics Corporation
- * Copyright (C) 2007 Ian Molton
- * Copyright (C) 2004 Ian Molton
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Driver for the MMC / SD / SDIO cell found in:
- *
- * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3
- */
-#ifndef LINUX_MMC_TMIO_H
-#define LINUX_MMC_TMIO_H
-
-#define CTL_SD_CMD 0x00
-#define CTL_ARG_REG 0x04
-#define CTL_STOP_INTERNAL_ACTION 0x08
-#define CTL_XFER_BLK_COUNT 0xa
-#define CTL_RESPONSE 0x0c
-#define CTL_STATUS 0x1c
-#define CTL_STATUS2 0x1e
-#define CTL_IRQ_MASK 0x20
-#define CTL_SD_CARD_CLK_CTL 0x24
-#define CTL_SD_XFER_LEN 0x26
-#define CTL_SD_MEM_CARD_OPT 0x28
-#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
-#define CTL_SD_DATA_PORT 0x30
-#define CTL_TRANSACTION_CTL 0x34
-#define CTL_SDIO_STATUS 0x36
-#define CTL_SDIO_IRQ_MASK 0x38
-#define CTL_DMA_ENABLE 0xd8
-#define CTL_RESET_SD 0xe0
-#define CTL_VERSION 0xe2
-#define CTL_SDIO_REGS 0x100
-#define CTL_CLK_AND_WAIT_CTL 0x138
-#define CTL_RESET_SDIO 0x1e0
-
-/* Definitions for values the CTRL_STATUS register can take. */
-#define TMIO_STAT_CMDRESPEND    0x00000001
-#define TMIO_STAT_DATAEND       0x00000004
-#define TMIO_STAT_CARD_REMOVE   0x00000008
-#define TMIO_STAT_CARD_INSERT   0x00000010
-#define TMIO_STAT_SIGSTATE      0x00000020
-#define TMIO_STAT_WRPROTECT     0x00000080
-#define TMIO_STAT_CARD_REMOVE_A 0x00000100
-#define TMIO_STAT_CARD_INSERT_A 0x00000200
-#define TMIO_STAT_SIGSTATE_A    0x00000400
-#define TMIO_STAT_CMD_IDX_ERR   0x00010000
-#define TMIO_STAT_CRCFAIL       0x00020000
-#define TMIO_STAT_STOPBIT_ERR   0x00040000
-#define TMIO_STAT_DATATIMEOUT   0x00080000
-#define TMIO_STAT_RXOVERFLOW    0x00100000
-#define TMIO_STAT_TXUNDERRUN    0x00200000
-#define TMIO_STAT_CMDTIMEOUT    0x00400000
-#define TMIO_STAT_RXRDY         0x01000000
-#define TMIO_STAT_TXRQ          0x02000000
-#define TMIO_STAT_ILL_FUNC      0x20000000
-#define TMIO_STAT_CMD_BUSY      0x40000000
-#define TMIO_STAT_ILL_ACCESS    0x80000000
-
-#define	CLK_CTL_DIV_MASK	0xff
-#define	CLK_CTL_SCLKEN		BIT(8)
-
-#define TMIO_BBS		512		/* Boot block size */
-
-#endif /* LINUX_MMC_TMIO_H */

diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index 70fffeb..a444178 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h

@@ -1,9 +1,16 @@
 #ifndef _LINUX_MMU_CONTEXT_H
 #define _LINUX_MMU_CONTEXT_H
 
+#include <asm/mmu_context.h>
+
 struct mm_struct;
 
 void use_mm(struct mm_struct *mm);
 void unuse_mm(struct mm_struct *mm);
 
+/* Architectures that care about IRQ state in switch_mm can override this. */
+#ifndef switch_mm_irqs_off
+# define switch_mm_irqs_off switch_mm
+#endif
+
 #endif

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7712721..ef9fea4 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h

@@ -283,17 +283,7 @@
 		    const u_char *buf);
 
 int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops);
-
-static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
-				struct mtd_oob_ops *ops)
-{
-	ops->retlen = ops->oobretlen = 0;
-	if (!mtd->_write_oob)
-		return -EOPNOTSUPP;
-	if (!(mtd->flags & MTD_WRITEABLE))
-		return -EROFS;
-	return mtd->_write_oob(mtd, to, ops);
-}
+int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops);
 
 int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
 			   struct otp_info *buf);

diff --git a/include/linux/of.h b/include/linux/of.h
index 3175803..77ddace 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h

@@ -685,6 +685,15 @@
 }
 #endif
 
+#ifdef CONFIG_OF_NUMA
+extern int of_numa_init(void);
+#else
+static inline int of_numa_init(void)
+{
+	return -ENOSYS;
+}
+#endif
+
 static inline struct device_node *of_find_matching_node(
 	struct device_node *from,
 	const struct of_device_id *matches)

diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4196c90..d28ac05 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h

@@ -105,6 +105,8 @@
 	struct mutex	reserve_mutex;
 	u64		max_period;
 	bool		secure_access; /* 32-bit ARM only */
+#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
+	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
 	struct platform_device	*plat_device;
 	struct pmu_hw_events	__percpu *hw_events;
 	struct notifier_block	hotplug_nb;

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f291275..9e1c3ad 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h

@@ -58,7 +58,7 @@
 
 struct perf_callchain_entry {
 	__u64				nr;
-	__u64				ip[PERF_MAX_STACK_DEPTH];
+	__u64				ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
 };
 
 struct perf_raw_record {
@@ -151,6 +151,15 @@
 	 */
 	struct task_struct		*target;
 
+	/*
+	 * PMU would store hardware filter configuration
+	 * here.
+	 */
+	void				*addr_filters;
+
+	/* Last sync'ed generation of filters */
+	unsigned long			addr_filters_gen;
+
 /*
  * hw_perf_event::state flags; used to track the PERF_EF_* state.
  */
@@ -216,6 +225,7 @@
 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
 #define PERF_PMU_CAP_EXCLUSIVE			0x10
 #define PERF_PMU_CAP_ITRACE			0x20
+#define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -240,6 +250,9 @@
 	int				task_ctx_nr;
 	int				hrtimer_interval_ms;
 
+	/* number of address filters this PMU can do */
+	unsigned int			nr_addr_filters;
+
 	/*
 	 * Fully disable/enable this PMU, can be used to protect from the PMI
 	 * as well as for lazy/batch writing of the MSRs.
@@ -393,12 +406,71 @@
 	void (*free_aux)		(void *aux); /* optional */
 
 	/*
+	 * Validate address range filters: make sure the HW supports the
+	 * requested configuration and number of filters; return 0 if the
+	 * supplied filters are valid, -errno otherwise.
+	 *
+	 * Runs in the context of the ioctl()ing process and is not serialized
+	 * with the rest of the PMU callbacks.
+	 */
+	int (*addr_filters_validate)	(struct list_head *filters);
+					/* optional */
+
+	/*
+	 * Synchronize address range filter configuration:
+	 * translate hw-agnostic filters into hardware configuration in
+	 * event::hw::addr_filters.
+	 *
+	 * Runs as a part of filter sync sequence that is done in ->start()
+	 * callback by calling perf_event_addr_filters_sync().
+	 *
+	 * May (and should) traverse event::addr_filters::list, for which its
+	 * caller provides necessary serialization.
+	 */
+	void (*addr_filters_sync)	(struct perf_event *event);
+					/* optional */
+
+	/*
 	 * Filter events for PMU-specific reasons.
 	 */
 	int (*filter_match)		(struct perf_event *event); /* optional */
 };
 
 /**
+ * struct perf_addr_filter - address range filter definition
+ * @entry:	event's filter list linkage
+ * @inode:	object file's inode for file-based filters
+ * @offset:	filter range offset
+ * @size:	filter range size
+ * @range:	1: range, 0: address
+ * @filter:	1: filter/start, 0: stop
+ *
+ * This is a hardware-agnostic filter configuration as specified by the user.
+ */
+struct perf_addr_filter {
+	struct list_head	entry;
+	struct inode		*inode;
+	unsigned long		offset;
+	unsigned long		size;
+	unsigned int		range	: 1,
+				filter	: 1;
+};
+
+/**
+ * struct perf_addr_filters_head - container for address range filters
+ * @list:	list of filters for this event
+ * @lock:	spinlock that serializes accesses to the @list and event's
+ *		(and its children's) filter generations.
+ *
+ * A child event will use parent's @list (and therefore @lock), so they are
+ * bundled together; see perf_event_addr_filters().
+ */
+struct perf_addr_filters_head {
+	struct list_head	list;
+	raw_spinlock_t		lock;
+};
+
+/**
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
@@ -566,6 +638,12 @@
 
 	atomic_t			event_limit;
 
+	/* address range filters */
+	struct perf_addr_filters_head	addr_filters;
+	/* vma address array for file-based filders */
+	unsigned long			*addr_filters_offs;
+	unsigned long			addr_filters_gen;
+
 	void (*destroy)(struct perf_event *);
 	struct rcu_head			rcu_head;
 
@@ -834,9 +912,25 @@
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
+extern void perf_event_output_forward(struct perf_event *event,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs);
+extern void perf_event_output_backward(struct perf_event *event,
+				       struct perf_sample_data *data,
+				       struct pt_regs *regs);
 extern void perf_event_output(struct perf_event *event,
-				struct perf_sample_data *data,
-				struct pt_regs *regs);
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs);
+
+static inline bool
+is_default_overflow_handler(struct perf_event *event)
+{
+	if (likely(event->overflow_handler == perf_event_output_forward))
+		return true;
+	if (unlikely(event->overflow_handler == perf_event_output_backward))
+		return true;
+	return false;
+}
 
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
@@ -977,9 +1071,11 @@
 extern int get_callchain_buffers(void);
 extern void put_callchain_buffers(void);
 
+extern int sysctl_perf_event_max_stack;
+
 static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
 {
-	if (entry->nr < PERF_MAX_STACK_DEPTH) {
+	if (entry->nr < sysctl_perf_event_max_stack) {
 		entry->ip[entry->nr++] = ip;
 		return 0;
 	} else {
@@ -1001,6 +1097,8 @@
 		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp, loff_t *ppos);
 
 static inline bool perf_paranoid_tracepoint_raw(void)
 {
@@ -1045,8 +1143,41 @@
 	return event->pmu->setup_aux;
 }
 
+static inline bool is_write_backward(struct perf_event *event)
+{
+	return !!event->attr.write_backward;
+}
+
+static inline bool has_addr_filter(struct perf_event *event)
+{
+	return event->pmu->nr_addr_filters;
+}
+
+/*
+ * An inherited event uses parent's filters
+ */
+static inline struct perf_addr_filters_head *
+perf_event_addr_filters(struct perf_event *event)
+{
+	struct perf_addr_filters_head *ifh = &event->addr_filters;
+
+	if (event->parent)
+		ifh = &event->parent->addr_filters;
+
+	return ifh;
+}
+
+extern void perf_event_addr_filters_sync(struct perf_event *event);
+
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
+extern int perf_output_begin_forward(struct perf_output_handle *handle,
+				    struct perf_event *event,
+				    unsigned int size);
+extern int perf_output_begin_backward(struct perf_output_handle *handle,
+				      struct perf_event *event,
+				      unsigned int size);
+
 extern void perf_output_end(struct perf_output_handle *handle);
 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6a5d654..06eb353 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h

@@ -563,7 +563,6 @@
 	bool			is_suspended:1;	/* Ditto */
 	bool			is_noirq_suspended:1;
 	bool			is_late_suspended:1;
-	bool			ignore_children:1;
 	bool			early_init:1;	/* Owned by the PM core */
 	bool			direct_complete:1;	/* Owned by the PM core */
 	spinlock_t		lock;
@@ -591,6 +590,7 @@
 	unsigned int		deferred_resume:1;
 	unsigned int		run_wake:1;
 	unsigned int		runtime_auto:1;
+	bool			ignore_children:1;
 	unsigned int		no_callbacks:1;
 	unsigned int		irq_safe:1;
 	unsigned int		use_autosuspend:1;

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 49cd889..39285c7 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h

@@ -28,14 +28,12 @@
 
 struct dev_power_governor {
 	bool (*power_down_ok)(struct dev_pm_domain *domain);
-	bool (*stop_ok)(struct device *dev);
+	bool (*suspend_ok)(struct device *dev);
 };
 
 struct gpd_dev_ops {
 	int (*start)(struct device *dev);
 	int (*stop)(struct device *dev);
-	int (*save_state)(struct device *dev);
-	int (*restore_state)(struct device *dev);
 	bool (*active_wakeup)(struct device *dev);
 };
 
@@ -94,7 +92,7 @@
 	s64 resume_latency_ns;
 	s64 effective_constraint_ns;
 	bool constraint_changed;
-	bool cached_stop_ok;
+	bool cached_suspend_ok;
 };
 
 struct pm_domain_data {

diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index cccaf4a..bca2615 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h

@@ -65,6 +65,10 @@
 int dev_pm_opp_set_regulator(struct device *dev, const char *name);
 void dev_pm_opp_put_regulator(struct device *dev);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
+void dev_pm_opp_remove_table(struct device *dev);
+void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask);
 #else
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
 {
@@ -109,25 +113,25 @@
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					unsigned long freq, bool available)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					unsigned long *freq)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 					unsigned long *freq)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
 					unsigned long u_volt)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq)
@@ -147,73 +151,85 @@
 static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 							struct device *dev)
 {
-	return ERR_PTR(-EINVAL);
+	return ERR_PTR(-ENOTSUPP);
 }
 
 static inline int dev_pm_opp_set_supported_hw(struct device *dev,
 					      const u32 *versions,
 					      unsigned int count)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
 
 static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
 
 static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_put_regulator(struct device *dev) {}
 
 static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
 {
+	return -ENOTSUPP;
+}
+
+static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask)
+{
+	return -ENOTSUPP;
+}
+
+static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
+{
 	return -EINVAL;
 }
 
+static inline void dev_pm_opp_remove_table(struct device *dev)
+{
+}
+
+static inline void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
+{
+}
+
 #endif		/* CONFIG_PM_OPP */
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 int dev_pm_opp_of_add_table(struct device *dev);
 void dev_pm_opp_of_remove_table(struct device *dev);
-int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask);
-void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask);
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
-	return -EINVAL;
+	return -ENOTSUPP;
 }
 
 static inline void dev_pm_opp_of_remove_table(struct device *dev)
 {
 }
 
-static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 {
-	return -ENOSYS;
+	return -ENOTSUPP;
 }
 
-static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
+static inline void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
 {
 }
 
-static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 {
-	return -ENOSYS;
-}
-
-static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
-{
-	return -ENOSYS;
+	return -ENOTSUPP;
 }
 #endif
 

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7af093d..2e14d26 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h

@@ -56,6 +56,11 @@
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
+{
+	dev->power.ignore_children = enable;
+}
+
 static inline bool pm_children_suspended(struct device *dev)
 {
 	return dev->power.ignore_children
@@ -156,6 +161,7 @@
 static inline void pm_runtime_allow(struct device *dev) {}
 static inline void pm_runtime_forbid(struct device *dev) {}
 
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
 static inline bool pm_children_suspended(struct device *dev) { return false; }
 static inline void pm_runtime_get_noresume(struct device *dev) {}
 static inline void pm_runtime_put_noidle(struct device *dev) {}

diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 5df733b..2588ca6 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h

@@ -337,9 +337,11 @@
 
 #ifdef CONFIG_PNPBIOS
 extern struct pnp_protocol pnpbios_protocol;
+extern bool arch_pnpbios_disabled(void);
 #define pnp_device_is_pnpbios(dev) ((dev)->protocol == (&pnpbios_protocol))
 #else
 #define pnp_device_is_pnpbios(dev) 0
+#define arch_pnpbios_disabled()	false
 #endif
 
 #ifdef CONFIG_PNPACPI

diff --git a/include/linux/proportions.h b/include/linux/proportions.h
deleted file mode 100644
index 2122133..0000000
--- a/include/linux/proportions.h
+++ /dev/null

@@ -1,137 +0,0 @@
-/*
- * FLoating proportions
- *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * This file contains the public data structure and API definitions.
- */
-
-#ifndef _LINUX_PROPORTIONS_H
-#define _LINUX_PROPORTIONS_H
-
-#include <linux/percpu_counter.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/gfp.h>
-
-struct prop_global {
-	/*
-	 * The period over which we differentiate
-	 *
-	 *   period = 2^shift
-	 */
-	int shift;
-	/*
-	 * The total event counter aka 'time'.
-	 *
-	 * Treated as an unsigned long; the lower 'shift - 1' bits are the
-	 * counter bits, the remaining upper bits the period counter.
-	 */
-	struct percpu_counter events;
-};
-
-/*
- * global proportion descriptor
- *
- * this is needed to consistently flip prop_global structures.
- */
-struct prop_descriptor {
-	int index;
-	struct prop_global pg[2];
-	struct mutex mutex;		/* serialize the prop_global switch */
-};
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
-void prop_change_shift(struct prop_descriptor *pd, int new_shift);
-
-/*
- * ----- PERCPU ------
- */
-
-struct prop_local_percpu {
-	/*
-	 * the local events counter
-	 */
-	struct percpu_counter events;
-
-	/*
-	 * snapshot of the last seen global state
-	 */
-	int shift;
-	unsigned long period;
-	raw_spinlock_t lock;		/* protect the snapshot state */
-};
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
-void prop_local_destroy_percpu(struct prop_local_percpu *pl);
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
-void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
-		long *numerator, long *denominator);
-
-static inline
-void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__prop_inc_percpu(pd, pl);
-	local_irq_restore(flags);
-}
-
-/*
- * Limit the time part in order to ensure there are some bits left for the
- * cycle counter and fraction multiply.
- */
-#if BITS_PER_LONG == 32
-#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
-#else
-#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
-#endif
-
-#define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
-#define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)
-
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
-			   struct prop_local_percpu *pl, long frac);
-
-
-/*
- * ----- SINGLE ------
- */
-
-struct prop_local_single {
-	/*
-	 * the local events counter
-	 */
-	unsigned long events;
-
-	/*
-	 * snapshot of the last seen global state
-	 * and a lock protecting this state
-	 */
-	unsigned long period;
-	int shift;
-	raw_spinlock_t lock;		/* protect the snapshot state */
-};
-
-#define INIT_PROP_LOCAL_SINGLE(name)			\
-{	.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
-}
-
-int prop_local_init_single(struct prop_local_single *pl);
-void prop_local_destroy_single(struct prop_local_single *pl);
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
-void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
-		long *numerator, long *denominator);
-
-static inline
-void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__prop_inc_single(pd, pl);
-	local_irq_restore(flags);
-}
-
-#endif /* _LINUX_PROPORTIONS_H */

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index cfc3ed4..b78d27c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h

@@ -74,6 +74,24 @@
 	PWM_POLARITY_INVERSED,
 };
 
+/**
+ * struct pwm_args - board-dependent PWM arguments
+ * @period: reference period
+ * @polarity: reference polarity
+ *
+ * This structure describes board-dependent arguments attached to a PWM
+ * device. These arguments are usually retrieved from the PWM lookup table or
+ * device tree.
+ *
+ * Do not confuse this with the PWM state: PWM arguments represent the initial
+ * configuration that users want to use on this PWM device rather than the
+ * current PWM hardware state.
+ */
+struct pwm_args {
+	unsigned int period;
+	enum pwm_polarity polarity;
+};
+
 enum {
 	PWMF_REQUESTED = 1 << 0,
 	PWMF_ENABLED = 1 << 1,
@@ -92,6 +110,7 @@
  * @period: period of the PWM signal (in nanoseconds)
  * @duty_cycle: duty cycle of the PWM signal (in nanoseconds)
  * @polarity: polarity of the PWM signal
+ * @args: PWM arguments
  */
 struct pwm_device {
 	const char *label;
@@ -105,6 +124,8 @@
 	unsigned int period;
 	unsigned int duty_cycle;
 	enum pwm_polarity polarity;
+
+	struct pwm_args args;
 };
 
 static inline bool pwm_is_enabled(const struct pwm_device *pwm)
@@ -144,6 +165,18 @@
 	return pwm ? pwm->polarity : PWM_POLARITY_NORMAL;
 }
 
+static inline void pwm_get_args(const struct pwm_device *pwm,
+				struct pwm_args *args)
+{
+	*args = pwm->args;
+}
+
+static inline void pwm_apply_args(struct pwm_device *pwm)
+{
+	pwm_set_period(pwm, pwm->args.period);
+	pwm_set_polarity(pwm, pwm->args.polarity);
+}
+
 /**
  * struct pwm_ops - PWM controller operations
  * @request: optional hook for requesting a PWM

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2657aff..5f1533e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h

@@ -508,14 +508,7 @@
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.
  */
-#ifdef CONFIG_PREEMPT_COUNT
 int rcu_read_lock_sched_held(void);
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
-	return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -532,18 +525,10 @@
 	return 1;
 }
 
-#ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return preempt_count() != 0 || irqs_disabled();
+	return !preemptible();
 }
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
-	return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #ifdef CONFIG_PROVE_RCU
@@ -1144,4 +1129,17 @@
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 
 
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+	static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+	\
+	if (!atomic_read(&___rfd_beenhere) && \
+	    !atomic_xchg(&___rfd_beenhere, 1)) \
+		ftrace_dump(oops_dump_mode); \
+} while (0)
+
+
 #endif /* __LINUX_RCUPDATE_H */

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 64809ae..93aea75 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h

@@ -149,6 +149,22 @@
 	return 0;
 }
 
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return 0;
+}
+
 static inline void rcu_force_quiescent_state(void)
 {
 }

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index ad1eda9..5043cb8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h

@@ -87,6 +87,8 @@
 unsigned long rcu_batches_completed(void);
 unsigned long rcu_batches_completed_bh(void);
 unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
 void show_rcu_gp_kthreads(void);
 
 void rcu_force_quiescent_state(void);

diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h
index 2eb3860..113d861 100644
--- a/include/linux/regulator/act8865.h
+++ b/include/linux/regulator/act8865.h

@@ -69,11 +69,13 @@
  * @id: regulator id
  * @name: regulator name
  * @init_data: regulator init data
+ * @of_node: device tree node (optional)
  */
 struct act8865_regulator_data {
 	int id;
 	const char *name;
 	struct regulator_init_data *init_data;
+	struct device_node *of_node;
 };
 
 /**

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 4860350..80dc4e5 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h

@@ -224,7 +224,7 @@
 void regulator_bulk_free(int num_consumers,
 			 struct regulator_bulk_data *consumers);
 
-int regulator_can_change_voltage(struct regulator *regulator);
+int __deprecated regulator_can_change_voltage(struct regulator *regulator);
 int regulator_count_voltages(struct regulator *regulator);
 int regulator_list_voltage(struct regulator *regulator, unsigned selector);
 int regulator_is_supported_voltage(struct regulator *regulator,
@@ -436,7 +436,7 @@
 {
 }
 
-static inline int regulator_can_change_voltage(struct regulator *regulator)
+static inline int __deprecated regulator_can_change_voltage(struct regulator *regulator)
 {
 	return 0;
 }

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index cd271e8..fcfa40a 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h

@@ -93,6 +93,9 @@
  * @get_current_limit: Get the configured limit for a current-limited regulator.
  * @set_input_current_limit: Configure an input limit.
  *
+ * @set_over_current_protection: Support capability of automatically shutting
+ *                               down when detecting an over current event.
+ *
  * @set_active_discharge: Set active discharge enable/disable of regulators.
  *
  * @set_mode: Set the configured operating mode for the regulator.
@@ -255,6 +258,8 @@
  *
  * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
  * @vsel_mask: Mask for register bitfield used for selector
+ * @csel_reg: Register for TPS65218 LS3 current regulator
+ * @csel_mask: Mask for TPS65218 LS3 current regulator
  * @apply_reg: Register for initiate voltage change on the output when
  *                using regulator_set_voltage_sel_regmap
  * @apply_bit: Register bitfield used for initiate voltage change on the
@@ -292,7 +297,7 @@
 			    const struct regulator_desc *,
 			    struct regulator_config *);
 	int id;
-	bool continuous_voltage_range;
+	unsigned int continuous_voltage_range:1;
 	unsigned n_voltages;
 	const struct regulator_ops *ops;
 	int irq;

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 5d627c8..ad3e5158 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h

@@ -97,6 +97,7 @@
  * @ramp_disable: Disable ramp delay when initialising or when setting voltage.
  * @soft_start: Enable soft start so that voltage ramps slowly.
  * @pull_down: Enable pull down when regulator is disabled.
+ * @over_current_protection: Auto disable on over current event.
  *
  * @input_uV: Input voltage for regulator when supplied by another regulator.
  *

diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h
index f6a8a16..2fcb998 100644
--- a/include/linux/regulator/max8973-regulator.h
+++ b/include/linux/regulator/max8973-regulator.h

@@ -54,6 +54,10 @@
  * @reg_init_data: The regulator init data.
  * @control_flags: Control flags which are ORed value of above flags to
  *		configure device.
+ * @junction_temp_warning: Junction temp in millicelcius on which warning need
+ *			   to be set. Thermal functionality is only supported on
+ *			   MAX77621. The threshold warning supported by MAX77621
+ *			   are 120C and 140C.
  * @enable_ext_control: Enable the voltage enable/disable through external
  *		control signal from EN input pin. If it is false then
  *		voltage output will be enabled/disabled through EN bit of
@@ -67,6 +71,7 @@
 struct max8973_regulator_platform_data {
 	struct regulator_init_data *reg_init_data;
 	unsigned long control_flags;
+	unsigned long junction_temp_warning;
 	bool enable_ext_control;
 	int enable_gpio;
 	int dvs_gpio;

diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 561e861..ae0528b 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h

@@ -34,7 +34,7 @@
 extern void __down_read(struct rw_semaphore *sem);
 extern int __down_read_trylock(struct rw_semaphore *sem);
 extern void __down_write(struct rw_semaphore *sem);
-extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+extern int __must_check __down_write_killable(struct rw_semaphore *sem);
 extern int __down_write_trylock(struct rw_semaphore *sem);
 extern void __up_read(struct rw_semaphore *sem);
 extern void __up_write(struct rw_semaphore *sem);

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 8f498cd..d1c12d1 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h

@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#include <linux/err.h>
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #include <linux/osq_lock.h>
 #endif
@@ -43,6 +44,7 @@
 
 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
 extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
 
@@ -116,6 +118,7 @@
  * lock for writing
  */
 extern void down_write(struct rw_semaphore *sem);
+extern int __must_check down_write_killable(struct rw_semaphore *sem);
 
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52c4847..31bd0d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -40,7 +40,6 @@
 #include <linux/pid.h>
 #include <linux/percpu.h>
 #include <linux/topology.h>
-#include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
@@ -178,9 +177,11 @@
 extern void calc_global_load(unsigned long ticks);
 
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void update_cpu_load_nohz(int active);
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
 #else
-static inline void update_cpu_load_nohz(int active) { }
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
 #endif
 
 extern void dump_cpu_task(int cpu);
@@ -372,6 +373,15 @@
 extern void trap_init(void);
 extern void update_process_times(int user);
 extern void scheduler_tick(void);
+extern int sched_cpu_starting(unsigned int cpu);
+extern int sched_cpu_activate(unsigned int cpu);
+extern int sched_cpu_deactivate(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_dying(unsigned int cpu);
+#else
+# define sched_cpu_dying	NULL
+#endif
 
 extern void sched_show_task(struct task_struct *p);
 
@@ -935,9 +945,19 @@
 };
 
 /*
+ * Integer metrics need fixed point arithmetic, e.g., sched/fair
+ * has a few: load, load_avg, util_avg, freq, and capacity.
+ *
+ * We define a basic fixed point arithmetic range, and then formalize
+ * all these metrics based on that basic range.
+ */
+# define SCHED_FIXEDPOINT_SHIFT	10
+# define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)
+
+/*
  * Increase resolution of cpu_capacity calculations
  */
-#define SCHED_CAPACITY_SHIFT	10
+#define SCHED_CAPACITY_SHIFT	SCHED_FIXEDPOINT_SHIFT
 #define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
 /*
@@ -1199,18 +1219,56 @@
 };
 
 /*
- * The load_avg/util_avg accumulates an infinite geometric series.
- * 1) load_avg factors frequency scaling into the amount of time that a
- * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
- * aggregated such weights of all runnable and blocked sched_entities.
- * 2) util_avg factors frequency and cpu scaling into the amount of time
- * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
- * For cfs_rq, it is the aggregated such times of all runnable and
+ * The load_avg/util_avg accumulates an infinite geometric series
+ * (see __update_load_avg() in kernel/sched/fair.c).
+ *
+ * [load_avg definition]
+ *
+ *   load_avg = runnable% * scale_load_down(load)
+ *
+ * where runnable% is the time ratio that a sched_entity is runnable.
+ * For cfs_rq, it is the aggregated load_avg of all runnable and
  * blocked sched_entities.
- * The 64 bit load_sum can:
- * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
- * the highest weight (=88761) always runnable, we should not overflow
- * 2) for entity, support any load.weight always runnable
+ *
+ * load_avg may also take frequency scaling into account:
+ *
+ *   load_avg = runnable% * scale_load_down(load) * freq%
+ *
+ * where freq% is the CPU frequency normalized to the highest frequency.
+ *
+ * [util_avg definition]
+ *
+ *   util_avg = running% * SCHED_CAPACITY_SCALE
+ *
+ * where running% is the time ratio that a sched_entity is running on
+ * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
+ * and blocked sched_entities.
+ *
+ * util_avg may also factor frequency scaling and CPU capacity scaling:
+ *
+ *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
+ *
+ * where freq% is the same as above, and capacity% is the CPU capacity
+ * normalized to the greatest capacity (due to uarch differences, etc).
+ *
+ * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
+ * themselves are in the range of [0, 1]. To do fixed point arithmetics,
+ * we therefore scale them to as large a range as necessary. This is for
+ * example reflected by util_avg's SCHED_CAPACITY_SCALE.
+ *
+ * [Overflow issue]
+ *
+ * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
+ * with the highest load (=88761), always runnable on a single cfs_rq,
+ * and should not overflow as the number already hits PID_MAX_LIMIT.
+ *
+ * For all other cases (including 32-bit kernels), struct load_weight's
+ * weight will overflow first before we do, because:
+ *
+ *    Max(load_avg) <= Max(load.weight)
+ *
+ * Then it is the load_weight's responsibility to consider overflow
+ * issues.
  */
 struct sched_avg {
 	u64 last_update_time, load_sum;
@@ -1596,6 +1654,7 @@
 
 	unsigned long sas_ss_sp;
 	size_t sas_ss_size;
+	unsigned sas_ss_flags;
 
 	struct callback_head *task_works;
 
@@ -1871,6 +1930,11 @@
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
+static inline int tsk_nr_cpus_allowed(struct task_struct *p)
+{
+	return p->nr_cpus_allowed;
+}
+
 #define TNF_MIGRATED	0x01
 #define TNF_NO_GROUP	0x02
 #define TNF_SHARED	0x04
@@ -2303,8 +2367,6 @@
 /*
  * See the comment in kernel/sched/clock.c
  */
-extern u64 cpu_clock(int cpu);
-extern u64 local_clock(void);
 extern u64 running_clock(void);
 extern u64 sched_clock_cpu(int cpu);
 
@@ -2323,6 +2385,16 @@
 static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
 {
 }
+
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock();
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock();
+}
 #else
 /*
  * Architectures can set this to 1 if they have specified
@@ -2337,6 +2409,26 @@
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+/*
+ * As outlined in clock.c, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ */
+static inline u64 cpu_clock(int cpu)
+{
+	return sched_clock_cpu(cpu);
+}
+
+static inline u64 local_clock(void)
+{
+	return sched_clock_cpu(raw_smp_processor_id());
+}
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -2575,6 +2667,18 @@
  */
 static inline int on_sig_stack(unsigned long sp)
 {
+	/*
+	 * If the signal stack is SS_AUTODISARM then, by construction, we
+	 * can't be on the signal stack unless user code deliberately set
+	 * SS_AUTODISARM when we were already on it.
+	 *
+	 * This improves reliability: if user state gets corrupted such that
+	 * the stack pointer points very close to the end of the signal stack,
+	 * then this check will enable the signal to be handled anyway.
+	 */
+	if (current->sas_ss_flags & SS_AUTODISARM)
+		return 0;
+
 #ifdef CONFIG_STACK_GROWSUP
 	return sp >= current->sas_ss_sp &&
 		sp - current->sas_ss_sp < current->sas_ss_size;
@@ -2592,6 +2696,13 @@
 	return on_sig_stack(sp) ? SS_ONSTACK : 0;
 }
 
+static inline void sas_ss_reset(struct task_struct *p)
+{
+	p->sas_ss_sp = 0;
+	p->sas_ss_size = 0;
+	p->sas_ss_flags = SS_DISABLE;
+}
+
 static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
 {
 	if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
@@ -3240,7 +3351,10 @@
 		     u64 time, unsigned long util, unsigned long max);
 };
 
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+			void (*func)(struct update_util_data *data, u64 time,
+				     unsigned long util, unsigned long max));
+void cpufreq_remove_update_util_hook(int cpu);
 #endif /* CONFIG_CPU_FREQ */
 
 #endif

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 92557bb..3fbe814 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h

@@ -432,8 +432,10 @@
 	stack_t __user *__uss = uss; \
 	struct task_struct *t = current; \
 	put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \
-	put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+	put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
 	put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+	if (t->sas_ss_flags & SS_AUTODISARM) \
+		sas_ss_reset(t); \
 } while (0);
 
 #ifdef CONFIG_PROC_FS

diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h
new file mode 100644
index 0000000..a72f9b9
--- /dev/null
+++ b/include/trace/events/mmc.h

@@ -0,0 +1,182 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmc
+
+#if !defined(_TRACE_MMC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMC_H
+
+#include <linux/blkdev.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/host.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mmc_request_start,
+
+	TP_PROTO(struct mmc_host *host, struct mmc_request *mrq),
+
+	TP_ARGS(host, mrq),
+
+	TP_STRUCT__entry(
+		__field(u32,			cmd_opcode)
+		__field(u32,			cmd_arg)
+		__field(unsigned int,		cmd_flags)
+		__field(unsigned int,		cmd_retries)
+		__field(u32,			stop_opcode)
+		__field(u32,			stop_arg)
+		__field(unsigned int,		stop_flags)
+		__field(unsigned int,		stop_retries)
+		__field(u32,			sbc_opcode)
+		__field(u32,			sbc_arg)
+		__field(unsigned int,		sbc_flags)
+		__field(unsigned int,		sbc_retries)
+		__field(unsigned int,		blocks)
+		__field(unsigned int,		blksz)
+		__field(unsigned int,		data_flags)
+		__field(unsigned int,		can_retune)
+		__field(unsigned int,		doing_retune)
+		__field(unsigned int,		retune_now)
+		__field(int,			need_retune)
+		__field(int,			hold_retune)
+		__field(unsigned int,		retune_period)
+		__field(struct mmc_request *,	mrq)
+		__string(name,			mmc_hostname(host))
+	),
+
+	TP_fast_assign(
+		__entry->cmd_opcode = mrq->cmd->opcode;
+		__entry->cmd_arg = mrq->cmd->arg;
+		__entry->cmd_flags = mrq->cmd->flags;
+		__entry->cmd_retries = mrq->cmd->retries;
+		__entry->stop_opcode = mrq->stop ? mrq->stop->opcode : 0;
+		__entry->stop_arg = mrq->stop ? mrq->stop->arg : 0;
+		__entry->stop_flags = mrq->stop ? mrq->stop->flags : 0;
+		__entry->stop_retries = mrq->stop ? mrq->stop->retries : 0;
+		__entry->sbc_opcode = mrq->sbc ? mrq->sbc->opcode : 0;
+		__entry->sbc_arg = mrq->sbc ? mrq->sbc->arg : 0;
+		__entry->sbc_flags = mrq->sbc ? mrq->sbc->flags : 0;
+		__entry->sbc_retries = mrq->sbc ? mrq->sbc->retries : 0;
+		__entry->blksz = mrq->data ? mrq->data->blksz : 0;
+		__entry->blocks = mrq->data ? mrq->data->blocks : 0;
+		__entry->data_flags = mrq->data ? mrq->data->flags : 0;
+		__entry->can_retune = host->can_retune;
+		__entry->doing_retune = host->doing_retune;
+		__entry->retune_now = host->retune_now;
+		__entry->need_retune = host->need_retune;
+		__entry->hold_retune = host->hold_retune;
+		__entry->retune_period = host->retune_period;
+		__assign_str(name, mmc_hostname(host));
+		__entry->mrq = mrq;
+	),
+
+	TP_printk("%s: start struct mmc_request[%p]: "
+		  "cmd_opcode=%u cmd_arg=0x%x cmd_flags=0x%x cmd_retries=%u "
+		  "stop_opcode=%u stop_arg=0x%x stop_flags=0x%x stop_retries=%u "
+		  "sbc_opcode=%u sbc_arg=0x%x sbc_flags=0x%x sbc_retires=%u "
+		  "blocks=%u block_size=%u data_flags=0x%x "
+		  "can_retune=%u doing_retune=%u retune_now=%u "
+		  "need_retune=%d hold_retune=%d retune_period=%u",
+		  __get_str(name), __entry->mrq,
+		  __entry->cmd_opcode, __entry->cmd_arg,
+		  __entry->cmd_flags, __entry->cmd_retries,
+		  __entry->stop_opcode, __entry->stop_arg,
+		  __entry->stop_flags, __entry->stop_retries,
+		  __entry->sbc_opcode, __entry->sbc_arg,
+		  __entry->sbc_flags, __entry->sbc_retries,
+		  __entry->blocks, __entry->blksz, __entry->data_flags,
+		  __entry->can_retune, __entry->doing_retune,
+		  __entry->retune_now, __entry->need_retune,
+		  __entry->hold_retune, __entry->retune_period)
+);
+
+TRACE_EVENT(mmc_request_done,
+
+	TP_PROTO(struct mmc_host *host, struct mmc_request *mrq),
+
+	TP_ARGS(host, mrq),
+
+	TP_STRUCT__entry(
+		__field(u32,			cmd_opcode)
+		__field(int,			cmd_err)
+		__array(u32,			cmd_resp,	4)
+		__field(unsigned int,		cmd_retries)
+		__field(u32,			stop_opcode)
+		__field(int,			stop_err)
+		__array(u32,			stop_resp,	4)
+		__field(unsigned int,		stop_retries)
+		__field(u32,			sbc_opcode)
+		__field(int,			sbc_err)
+		__array(u32,			sbc_resp,	4)
+		__field(unsigned int,		sbc_retries)
+		__field(unsigned int,		bytes_xfered)
+		__field(int,			data_err)
+		__field(unsigned int,		can_retune)
+		__field(unsigned int,		doing_retune)
+		__field(unsigned int,		retune_now)
+		__field(int,			need_retune)
+		__field(int,			hold_retune)
+		__field(unsigned int,		retune_period)
+		__field(struct mmc_request *,	mrq)
+		__string(name,			mmc_hostname(host))
+	),
+
+	TP_fast_assign(
+		__entry->cmd_opcode = mrq->cmd->opcode;
+		__entry->cmd_err = mrq->cmd->error;
+		memcpy(__entry->cmd_resp, mrq->cmd->resp, 4);
+		__entry->cmd_retries = mrq->cmd->retries;
+		__entry->stop_opcode = mrq->stop ? mrq->stop->opcode : 0;
+		__entry->stop_err = mrq->stop ? mrq->stop->error : 0;
+		__entry->stop_resp[0] = mrq->stop ? mrq->stop->resp[0] : 0;
+		__entry->stop_resp[1] = mrq->stop ? mrq->stop->resp[1] : 0;
+		__entry->stop_resp[2] = mrq->stop ? mrq->stop->resp[2] : 0;
+		__entry->stop_resp[3] = mrq->stop ? mrq->stop->resp[3] : 0;
+		__entry->stop_retries = mrq->stop ? mrq->stop->retries : 0;
+		__entry->sbc_opcode = mrq->sbc ? mrq->sbc->opcode : 0;
+		__entry->sbc_err = mrq->sbc ? mrq->sbc->error : 0;
+		__entry->sbc_resp[0] = mrq->sbc ? mrq->sbc->resp[0] : 0;
+		__entry->sbc_resp[1] = mrq->sbc ? mrq->sbc->resp[1] : 0;
+		__entry->sbc_resp[2] = mrq->sbc ? mrq->sbc->resp[2] : 0;
+		__entry->sbc_resp[3] = mrq->sbc ? mrq->sbc->resp[3] : 0;
+		__entry->sbc_retries = mrq->sbc ? mrq->sbc->retries : 0;
+		__entry->bytes_xfered = mrq->data ? mrq->data->bytes_xfered : 0;
+		__entry->data_err = mrq->data ? mrq->data->error : 0;
+		__entry->can_retune = host->can_retune;
+		__entry->doing_retune = host->doing_retune;
+		__entry->retune_now = host->retune_now;
+		__entry->need_retune = host->need_retune;
+		__entry->hold_retune = host->hold_retune;
+		__entry->retune_period = host->retune_period;
+		__assign_str(name, mmc_hostname(host));
+		__entry->mrq = mrq;
+	),
+
+	TP_printk("%s: end struct mmc_request[%p]: "
+		  "cmd_opcode=%u cmd_err=%d cmd_resp=0x%x 0x%x 0x%x 0x%x "
+		  "cmd_retries=%u stop_opcode=%u stop_err=%d "
+		  "stop_resp=0x%x 0x%x 0x%x 0x%x stop_retries=%u "
+		  "sbc_opcode=%u sbc_err=%d sbc_resp=0x%x 0x%x 0x%x 0x%x "
+		  "sbc_retries=%u bytes_xfered=%u data_err=%d "
+		  "can_retune=%u doing_retune=%u retune_now=%u need_retune=%d "
+		  "hold_retune=%d retune_period=%u",
+		  __get_str(name), __entry->mrq,
+		  __entry->cmd_opcode, __entry->cmd_err,
+		  __entry->cmd_resp[0], __entry->cmd_resp[1],
+		  __entry->cmd_resp[2], __entry->cmd_resp[3],
+		  __entry->cmd_retries,
+		  __entry->stop_opcode, __entry->stop_err,
+		  __entry->stop_resp[0], __entry->stop_resp[1],
+		  __entry->stop_resp[2], __entry->stop_resp[3],
+		  __entry->stop_retries,
+		  __entry->sbc_opcode, __entry->sbc_err,
+		  __entry->sbc_resp[0], __entry->sbc_resp[1],
+		  __entry->sbc_resp[2], __entry->sbc_resp[3],
+		  __entry->sbc_retries,
+		  __entry->bytes_xfered, __entry->data_err,
+		  __entry->can_retune, __entry->doing_retune,
+		  __entry->retune_now, __entry->need_retune,
+		  __entry->hold_retune, __entry->retune_period)
+);
+
+#endif /* _TRACE_MMC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ef72c4a..d3e7565 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h

@@ -172,6 +172,77 @@
 );
 
 /*
+ * Tracepoint for expedited grace-period events.  Takes a string identifying
+ * the RCU flavor, the expedited grace-period sequence number, and a string
+ * identifying the grace-period-related event as follows:
+ *
+ *	"snap": Captured snapshot of expedited grace period sequence number.
+ *	"start": Started a real expedited grace period.
+ *	"end": Ended a real expedited grace period.
+ *	"endwake": Woke piggybackers up.
+ *	"done": Someone else did the expedited grace period for us.
+ */
+TRACE_EVENT(rcu_exp_grace_period,
+
+	TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent),
+
+	TP_ARGS(rcuname, gpseq, gpevent),
+
+	TP_STRUCT__entry(
+		__field(const char *, rcuname)
+		__field(unsigned long, gpseq)
+		__field(const char *, gpevent)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->gpseq = gpseq;
+		__entry->gpevent = gpevent;
+	),
+
+	TP_printk("%s %lu %s",
+		  __entry->rcuname, __entry->gpseq, __entry->gpevent)
+);
+
+/*
+ * Tracepoint for expedited grace-period funnel-locking events.  Takes a
+ * string identifying the RCU flavor, an integer identifying the rcu_node
+ * combining-tree level, another pair of integers identifying the lowest-
+ * and highest-numbered CPU associated with the current rcu_node structure,
+ * and a string.  identifying the grace-period-related event as follows:
+ *
+ *	"nxtlvl": Advance to next level of rcu_node funnel
+ *	"wait": Wait for someone else to do expedited GP
+ */
+TRACE_EVENT(rcu_exp_funnel_lock,
+
+	TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi,
+		 const char *gpevent),
+
+	TP_ARGS(rcuname, level, grplo, grphi, gpevent),
+
+	TP_STRUCT__entry(
+		__field(const char *, rcuname)
+		__field(u8, level)
+		__field(int, grplo)
+		__field(int, grphi)
+		__field(const char *, gpevent)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->level = level;
+		__entry->grplo = grplo;
+		__entry->grphi = grphi;
+		__entry->gpevent = gpevent;
+	),
+
+	TP_printk("%s %d %d %d %s",
+		  __entry->rcuname, __entry->level, __entry->grplo,
+		  __entry->grphi, __entry->gpevent)
+);
+
+/*
  * Tracepoint for RCU no-CBs CPU callback handoffs.  This event is intended
  * to assist debugging of these handoffs.
  *
@@ -704,11 +775,15 @@
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
-				    qsmask) do { } while (0)
 #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+				    qsmask) do { } while (0)
+#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
+	do { } while (0)
+#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
+	do { } while (0)
 #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1afe962..43fc8d2 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h

@@ -340,7 +340,8 @@
 				comm_exec      :  1, /* flag comm events that are due to an exec */
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
-				__reserved_1   : 37;
+				write_backward :  1, /* Write ring buffer from end to beginning */
+				__reserved_1   : 36;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -401,6 +402,7 @@
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
 #define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT	_IOW('$', 9, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,

diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h
index e1bd50c2..cd0804b 100644
--- a/include/uapi/linux/signal.h
+++ b/include/uapi/linux/signal.h

@@ -7,4 +7,9 @@
 #define SS_ONSTACK	1
 #define SS_DISABLE	2
 
+/* bit-flags */
+#define SS_AUTODISARM	(1U << 31)	/* disable sas during sighandling */
+/* mask for all SS_xxx flags */
+#define SS_FLAG_BITS	SS_AUTODISARM
+
 #endif /* _UAPI_LINUX_SIGNAL_H */

diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 499d9e9..f5a1954 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c

@@ -66,7 +66,7 @@
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
 	    value_size < 8 || value_size % 8 ||
-	    value_size / 8 > PERF_MAX_STACK_DEPTH)
+	    value_size / 8 > sysctl_perf_event_max_stack)
 		return ERR_PTR(-EINVAL);
 
 	/* hash table size must be power of 2 */
@@ -124,8 +124,8 @@
 	struct perf_callchain_entry *trace;
 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
 	u32 max_depth = map->value_size / 8;
-	/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
-	u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
+	/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+	u32 init_nr = sysctl_perf_event_max_stack - max_depth;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
 	u32 hash, id, trace_nr, trace_len;
 	bool user = flags & BPF_F_USER_STACK;
@@ -143,7 +143,7 @@
 		return -EFAULT;
 
 	/* get_perf_callchain() guarantees that trace->nr >= init_nr
-	 * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
+	 * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
 	 */
 	trace_nr = trace->nr - init_nr;
 

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3e3f6e4..d948e44 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c

@@ -703,21 +703,6 @@
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	int err;
 
-	/*
-	 * By now we've cleared cpu_active_mask, wait for all preempt-disabled
-	 * and RCU users of this state to go away such that all new such users
-	 * will observe it.
-	 *
-	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
-	 * not imply sync_sched(), so wait for both.
-	 *
-	 * Do sync before park smpboot threads to take care the rcu boost case.
-	 */
-	if (IS_ENABLED(CONFIG_PREEMPT))
-		synchronize_rcu_mult(call_rcu, call_rcu_sched);
-	else
-		synchronize_rcu();
-
 	/* Park the smpboot threads */
 	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
 	smpboot_park_threads(cpu);
@@ -923,8 +908,6 @@
 
 	st->state = CPUHP_AP_ONLINE_IDLE;
 
-	/* The cpu is marked online, set it active now */
-	set_cpu_active(cpu, true);
 	/* Unpark the stopper thread and the hotplug thread of this cpu */
 	stop_machine_unpark(cpu);
 	kthread_unpark(st->thread);
@@ -1236,6 +1219,12 @@
 		.name			= "ap:offline",
 		.cant_stop		= true,
 	},
+	/* First state is scheduler control. Interrupts are disabled */
+	[CPUHP_AP_SCHED_STARTING] = {
+		.name			= "sched:starting",
+		.startup		= sched_cpu_starting,
+		.teardown		= sched_cpu_dying,
+	},
 	/*
 	 * Low level startup/teardown notifiers. Run with interrupts
 	 * disabled. Will be removed once the notifiers are converted to
@@ -1274,6 +1263,15 @@
 	 * The dynamically registered state space is here
 	 */
 
+#ifdef CONFIG_SMP
+	/* Last state is scheduler control setting the cpu active */
+	[CPUHP_AP_ACTIVE] = {
+		.name			= "sched:active",
+		.startup		= sched_cpu_activate,
+		.teardown		= sched_cpu_deactivate,
+	},
+#endif
+
 	/* CPU is fully up and running. */
 	[CPUHP_ONLINE] = {
 		.name			= "online",

diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 343c22f..b9325e7 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c

@@ -18,6 +18,14 @@
 	struct perf_callchain_entry	*cpu_entries[0];
 };
 
+int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
+
+static inline size_t perf_callchain_entry__sizeof(void)
+{
+	return (sizeof(struct perf_callchain_entry) +
+		sizeof(__u64) * sysctl_perf_event_max_stack);
+}
+
 static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
 static atomic_t nr_callchain_events;
 static DEFINE_MUTEX(callchain_mutex);
@@ -73,7 +81,7 @@
 	if (!entries)
 		return -ENOMEM;
 
-	size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+	size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
 
 	for_each_possible_cpu(cpu) {
 		entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@@ -147,7 +155,8 @@
 
 	cpu = smp_processor_id();
 
-	return &entries->cpu_entries[cpu][*rctx];
+	return (((void *)entries->cpu_entries[cpu]) +
+		(*rctx * perf_callchain_entry__sizeof()));
 }
 
 static void
@@ -215,3 +224,25 @@
 
 	return entry;
 }
+
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int new_value = sysctl_perf_event_max_stack, ret;
+	struct ctl_table new_table = *table;
+
+	new_table.data = &new_value;
+	ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	mutex_lock(&callchain_mutex);
+	if (atomic_read(&nr_callchain_events))
+		ret = -EBUSY;
+	else
+		sysctl_perf_event_max_stack = new_value;
+
+	mutex_unlock(&callchain_mutex);
+
+	return ret;
+}

diff --git a/kernel/events/core.c b/kernel/events/core.c
index c0ded24..050a290 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -44,6 +44,8 @@
 #include <linux/compat.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/namei.h>
+#include <linux/parser.h>
 
 #include "internal.h"
 
@@ -1927,8 +1929,13 @@
 	if (event->state <= PERF_EVENT_STATE_OFF)
 		return 0;
 
-	event->state = PERF_EVENT_STATE_ACTIVE;
-	event->oncpu = smp_processor_id();
+	WRITE_ONCE(event->oncpu, smp_processor_id());
+	/*
+	 * Order event::oncpu write to happen before the ACTIVE state
+	 * is visible.
+	 */
+	smp_wmb();
+	WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
 
 	/*
 	 * Unthrottle events, since we scheduled we might have missed several
@@ -2360,6 +2367,112 @@
 }
 EXPORT_SYMBOL_GPL(perf_event_enable);
 
+struct stop_event_data {
+	struct perf_event	*event;
+	unsigned int		restart;
+};
+
+static int __perf_event_stop(void *info)
+{
+	struct stop_event_data *sd = info;
+	struct perf_event *event = sd->event;
+
+	/* if it's already INACTIVE, do nothing */
+	if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
+	/* matches smp_wmb() in event_sched_in() */
+	smp_rmb();
+
+	/*
+	 * There is a window with interrupts enabled before we get here,
+	 * so we need to check again lest we try to stop another CPU's event.
+	 */
+	if (READ_ONCE(event->oncpu) != smp_processor_id())
+		return -EAGAIN;
+
+	event->pmu->stop(event, PERF_EF_UPDATE);
+
+	/*
+	 * May race with the actual stop (through perf_pmu_output_stop()),
+	 * but it is only used for events with AUX ring buffer, and such
+	 * events will refuse to restart because of rb::aux_mmap_count==0,
+	 * see comments in perf_aux_output_begin().
+	 *
+	 * Since this is happening on a event-local CPU, no trace is lost
+	 * while restarting.
+	 */
+	if (sd->restart)
+		event->pmu->start(event, PERF_EF_START);
+
+	return 0;
+}
+
+static int perf_event_restart(struct perf_event *event)
+{
+	struct stop_event_data sd = {
+		.event		= event,
+		.restart	= 1,
+	};
+	int ret = 0;
+
+	do {
+		if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+			return 0;
+
+		/* matches smp_wmb() in event_sched_in() */
+		smp_rmb();
+
+		/*
+		 * We only want to restart ACTIVE events, so if the event goes
+		 * inactive here (event->oncpu==-1), there's nothing more to do;
+		 * fall through with ret==-ENXIO.
+		 */
+		ret = cpu_function_call(READ_ONCE(event->oncpu),
+					__perf_event_stop, &sd);
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+/*
+ * In order to contain the amount of racy and tricky in the address filter
+ * configuration management, it is a two part process:
+ *
+ * (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
+ *      we update the addresses of corresponding vmas in
+ *	event::addr_filters_offs array and bump the event::addr_filters_gen;
+ * (p2) when an event is scheduled in (pmu::add), it calls
+ *      perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
+ *      if the generation has changed since the previous call.
+ *
+ * If (p1) happens while the event is active, we restart it to force (p2).
+ *
+ * (1) perf_addr_filters_apply(): adjusting filters' offsets based on
+ *     pre-existing mappings, called once when new filters arrive via SET_FILTER
+ *     ioctl;
+ * (2) perf_addr_filters_adjust(): adjusting filters' offsets based on newly
+ *     registered mapping, called for every new mmap(), with mm::mmap_sem down
+ *     for reading;
+ * (3) perf_event_addr_filters_exec(): clearing filters' offsets in the process
+ *     of exec.
+ */
+void perf_event_addr_filters_sync(struct perf_event *event)
+{
+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+
+	if (!has_addr_filter(event))
+		return;
+
+	raw_spin_lock(&ifh->lock);
+	if (event->addr_filters_gen != event->hw.addr_filters_gen) {
+		event->pmu->addr_filters_sync(event);
+		event->hw.addr_filters_gen = event->addr_filters_gen;
+	}
+	raw_spin_unlock(&ifh->lock);
+}
+EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
+
 static int _perf_event_refresh(struct perf_event *event, int refresh)
 {
 	/*
@@ -3209,16 +3322,6 @@
 		put_ctx(clone_ctx);
 }
 
-void perf_event_exec(void)
-{
-	int ctxn;
-
-	rcu_read_lock();
-	for_each_task_context_nr(ctxn)
-		perf_event_enable_on_exec(ctxn);
-	rcu_read_unlock();
-}
-
 struct perf_read_data {
 	struct perf_event *event;
 	bool group;
@@ -3720,6 +3823,9 @@
 	return true;
 }
 
+static void perf_addr_filters_splice(struct perf_event *event,
+				       struct list_head *head);
+
 static void _free_event(struct perf_event *event)
 {
 	irq_work_sync(&event->pending);
@@ -3747,6 +3853,8 @@
 	}
 
 	perf_event_free_bpf_prog(event);
+	perf_addr_filters_splice(event, NULL);
+	kfree(event->addr_filters_offs);
 
 	if (event->destroy)
 		event->destroy(event);
@@ -4343,6 +4451,19 @@
 	case PERF_EVENT_IOC_SET_BPF:
 		return perf_event_set_bpf_prog(event, arg);
 
+	case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+		struct ring_buffer *rb;
+
+		rcu_read_lock();
+		rb = rcu_dereference(event->rb);
+		if (!rb || !rb->nr_pages) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		rb_toggle_paused(rb, !!arg);
+		rcu_read_unlock();
+		return 0;
+	}
 	default:
 		return -ENOTTY;
 	}
@@ -4659,6 +4780,8 @@
 		event->pmu->event_mapped(event);
 }
 
+static void perf_pmu_output_stop(struct perf_event *event);
+
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
  * event, or through other events by use of perf_event_set_output().
@@ -4686,10 +4809,22 @@
 	 */
 	if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
 	    atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+		/*
+		 * Stop all AUX events that are writing to this buffer,
+		 * so that we can free its AUX pages and corresponding PMU
+		 * data. Note that after rb::aux_mmap_count dropped to zero,
+		 * they won't start any more (see perf_aux_output_begin()).
+		 */
+		perf_pmu_output_stop(event);
+
+		/* now it's safe to free the pages */
 		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
 		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
 
+		/* this has to be the last one */
 		rb_free_aux(rb);
+		WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
+
 		mutex_unlock(&event->mmap_mutex);
 	}
 
@@ -5630,9 +5765,13 @@
 	}
 }
 
-void perf_event_output(struct perf_event *event,
-			struct perf_sample_data *data,
-			struct pt_regs *regs)
+static void __always_inline
+__perf_event_output(struct perf_event *event,
+		    struct perf_sample_data *data,
+		    struct pt_regs *regs,
+		    int (*output_begin)(struct perf_output_handle *,
+					struct perf_event *,
+					unsigned int))
 {
 	struct perf_output_handle handle;
 	struct perf_event_header header;
@@ -5642,7 +5781,7 @@
 
 	perf_prepare_sample(&header, data, event, regs);
 
-	if (perf_output_begin(&handle, event, header.size))
+	if (output_begin(&handle, event, header.size))
 		goto exit;
 
 	perf_output_sample(&handle, &header, data, event);
@@ -5653,6 +5792,30 @@
 	rcu_read_unlock();
 }
 
+void
+perf_event_output_forward(struct perf_event *event,
+			 struct perf_sample_data *data,
+			 struct pt_regs *regs)
+{
+	__perf_event_output(event, data, regs, perf_output_begin_forward);
+}
+
+void
+perf_event_output_backward(struct perf_event *event,
+			   struct perf_sample_data *data,
+			   struct pt_regs *regs)
+{
+	__perf_event_output(event, data, regs, perf_output_begin_backward);
+}
+
+void
+perf_event_output(struct perf_event *event,
+		  struct perf_sample_data *data,
+		  struct pt_regs *regs)
+{
+	__perf_event_output(event, data, regs, perf_output_begin);
+}
+
 /*
  * read event_id
  */
@@ -5698,15 +5861,18 @@
 static void
 perf_event_aux_ctx(struct perf_event_context *ctx,
 		   perf_event_aux_output_cb output,
-		   void *data)
+		   void *data, bool all)
 {
 	struct perf_event *event;
 
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
-		if (event->state < PERF_EVENT_STATE_INACTIVE)
-			continue;
-		if (!event_filter_match(event))
-			continue;
+		if (!all) {
+			if (event->state < PERF_EVENT_STATE_INACTIVE)
+				continue;
+			if (!event_filter_match(event))
+				continue;
+		}
+
 		output(event, data);
 	}
 }
@@ -5717,7 +5883,7 @@
 {
 	rcu_read_lock();
 	preempt_disable();
-	perf_event_aux_ctx(task_ctx, output, data);
+	perf_event_aux_ctx(task_ctx, output, data, false);
 	preempt_enable();
 	rcu_read_unlock();
 }
@@ -5747,13 +5913,13 @@
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
 		if (cpuctx->unique_pmu != pmu)
 			goto next;
-		perf_event_aux_ctx(&cpuctx->ctx, output, data);
+		perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
 		ctxn = pmu->task_ctx_nr;
 		if (ctxn < 0)
 			goto next;
 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
 		if (ctx)
-			perf_event_aux_ctx(ctx, output, data);
+			perf_event_aux_ctx(ctx, output, data, false);
 next:
 		put_cpu_ptr(pmu->pmu_cpu_context);
 	}
@@ -5761,6 +5927,134 @@
 }
 
 /*
+ * Clear all file-based filters at exec, they'll have to be
+ * re-instated when/if these objects are mmapped again.
+ */
+static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
+{
+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+	struct perf_addr_filter *filter;
+	unsigned int restart = 0, count = 0;
+	unsigned long flags;
+
+	if (!has_addr_filter(event))
+		return;
+
+	raw_spin_lock_irqsave(&ifh->lock, flags);
+	list_for_each_entry(filter, &ifh->list, entry) {
+		if (filter->inode) {
+			event->addr_filters_offs[count] = 0;
+			restart++;
+		}
+
+		count++;
+	}
+
+	if (restart)
+		event->addr_filters_gen++;
+	raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+	if (restart)
+		perf_event_restart(event);
+}
+
+void perf_event_exec(void)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	rcu_read_lock();
+	for_each_task_context_nr(ctxn) {
+		ctx = current->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
+
+		perf_event_enable_on_exec(ctxn);
+
+		perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+				   true);
+	}
+	rcu_read_unlock();
+}
+
+struct remote_output {
+	struct ring_buffer	*rb;
+	int			err;
+};
+
+static void __perf_event_output_stop(struct perf_event *event, void *data)
+{
+	struct perf_event *parent = event->parent;
+	struct remote_output *ro = data;
+	struct ring_buffer *rb = ro->rb;
+	struct stop_event_data sd = {
+		.event	= event,
+	};
+
+	if (!has_aux(event))
+		return;
+
+	if (!parent)
+		parent = event;
+
+	/*
+	 * In case of inheritance, it will be the parent that links to the
+	 * ring-buffer, but it will be the child that's actually using it:
+	 */
+	if (rcu_dereference(parent->rb) == rb)
+		ro->err = __perf_event_stop(&sd);
+}
+
+static int __perf_pmu_output_stop(void *info)
+{
+	struct perf_event *event = info;
+	struct pmu *pmu = event->pmu;
+	struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+	struct remote_output ro = {
+		.rb	= event->rb,
+	};
+
+	rcu_read_lock();
+	perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+	if (cpuctx->task_ctx)
+		perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+				   &ro, false);
+	rcu_read_unlock();
+
+	return ro.err;
+}
+
+static void perf_pmu_output_stop(struct perf_event *event)
+{
+	struct perf_event *iter;
+	int err, cpu;
+
+restart:
+	rcu_read_lock();
+	list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
+		/*
+		 * For per-CPU events, we need to make sure that neither they
+		 * nor their children are running; for cpu==-1 events it's
+		 * sufficient to stop the event itself if it's active, since
+		 * it can't have children.
+		 */
+		cpu = iter->cpu;
+		if (cpu == -1)
+			cpu = READ_ONCE(iter->oncpu);
+
+		if (cpu == -1)
+			continue;
+
+		err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
+		if (err == -EAGAIN) {
+			rcu_read_unlock();
+			goto restart;
+		}
+	}
+	rcu_read_unlock();
+}
+
+/*
  * task tracking -- fork/exit
  *
  * enabled by: attr.comm | attr.mmap | attr.mmap2 | attr.mmap_data | attr.task
@@ -6169,6 +6463,87 @@
 	kfree(buf);
 }
 
+/*
+ * Whether this @filter depends on a dynamic object which is not loaded
+ * yet or its load addresses are not known.
+ */
+static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
+{
+	return filter->filter && filter->inode;
+}
+
+/*
+ * Check whether inode and address range match filter criteria.
+ */
+static bool perf_addr_filter_match(struct perf_addr_filter *filter,
+				     struct file *file, unsigned long offset,
+				     unsigned long size)
+{
+	if (filter->inode != file->f_inode)
+		return false;
+
+	if (filter->offset > offset + size)
+		return false;
+
+	if (filter->offset + filter->size < offset)
+		return false;
+
+	return true;
+}
+
+static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
+{
+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+	struct vm_area_struct *vma = data;
+	unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
+	struct file *file = vma->vm_file;
+	struct perf_addr_filter *filter;
+	unsigned int restart = 0, count = 0;
+
+	if (!has_addr_filter(event))
+		return;
+
+	if (!file)
+		return;
+
+	raw_spin_lock_irqsave(&ifh->lock, flags);
+	list_for_each_entry(filter, &ifh->list, entry) {
+		if (perf_addr_filter_match(filter, file, off,
+					     vma->vm_end - vma->vm_start)) {
+			event->addr_filters_offs[count] = vma->vm_start;
+			restart++;
+		}
+
+		count++;
+	}
+
+	if (restart)
+		event->addr_filters_gen++;
+	raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+	if (restart)
+		perf_event_restart(event);
+}
+
+/*
+ * Adjust all task's events' filters to the new vma
+ */
+static void perf_addr_filters_adjust(struct vm_area_struct *vma)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	rcu_read_lock();
+	for_each_task_context_nr(ctxn) {
+		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+		if (!ctx)
+			continue;
+
+		perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+	}
+	rcu_read_unlock();
+}
+
 void perf_event_mmap(struct vm_area_struct *vma)
 {
 	struct perf_mmap_event mmap_event;
@@ -6200,6 +6575,7 @@
 		/* .flags (attr_mmap2 only) */
 	};
 
+	perf_addr_filters_adjust(vma);
 	perf_event_mmap_event(&mmap_event);
 }
 
@@ -6491,10 +6867,7 @@
 		irq_work_queue(&event->pending);
 	}
 
-	if (event->overflow_handler)
-		event->overflow_handler(event, data, regs);
-	else
-		perf_event_output(event, data, regs);
+	event->overflow_handler(event, data, regs);
 
 	if (*perf_event_fasync(event) && event->pending_kill) {
 		event->pending_wakeup = 1;
@@ -7081,24 +7454,6 @@
 	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
 }
 
-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
-{
-	char *filter_str;
-	int ret;
-
-	if (event->attr.type != PERF_TYPE_TRACEPOINT)
-		return -EINVAL;
-
-	filter_str = strndup_user(arg, PAGE_SIZE);
-	if (IS_ERR(filter_str))
-		return PTR_ERR(filter_str);
-
-	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
-
-	kfree(filter_str);
-	return ret;
-}
-
 static void perf_event_free_filter(struct perf_event *event)
 {
 	ftrace_profile_free_filter(event);
@@ -7153,11 +7508,6 @@
 {
 }
 
-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
-{
-	return -ENOENT;
-}
-
 static void perf_event_free_filter(struct perf_event *event)
 {
 }
@@ -7186,6 +7536,387 @@
 #endif
 
 /*
+ * Allocate a new address filter
+ */
+static struct perf_addr_filter *
+perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
+{
+	int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
+	struct perf_addr_filter *filter;
+
+	filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
+	if (!filter)
+		return NULL;
+
+	INIT_LIST_HEAD(&filter->entry);
+	list_add_tail(&filter->entry, filters);
+
+	return filter;
+}
+
+static void free_filters_list(struct list_head *filters)
+{
+	struct perf_addr_filter *filter, *iter;
+
+	list_for_each_entry_safe(filter, iter, filters, entry) {
+		if (filter->inode)
+			iput(filter->inode);
+		list_del(&filter->entry);
+		kfree(filter);
+	}
+}
+
+/*
+ * Free existing address filters and optionally install new ones
+ */
+static void perf_addr_filters_splice(struct perf_event *event,
+				     struct list_head *head)
+{
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	if (!has_addr_filter(event))
+		return;
+
+	/* don't bother with children, they don't have their own filters */
+	if (event->parent)
+		return;
+
+	raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
+
+	list_splice_init(&event->addr_filters.list, &list);
+	if (head)
+		list_splice(head, &event->addr_filters.list);
+
+	raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
+
+	free_filters_list(&list);
+}
+
+/*
+ * Scan through mm's vmas and see if one of them matches the
+ * @filter; if so, adjust filter's address range.
+ * Called with mm::mmap_sem down for reading.
+ */
+static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
+					    struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		struct file *file = vma->vm_file;
+		unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+		unsigned long vma_size = vma->vm_end - vma->vm_start;
+
+		if (!file)
+			continue;
+
+		if (!perf_addr_filter_match(filter, file, off, vma_size))
+			continue;
+
+		return vma->vm_start;
+	}
+
+	return 0;
+}
+
+/*
+ * Update event's address range filters based on the
+ * task's existing mappings, if any.
+ */
+static void perf_event_addr_filters_apply(struct perf_event *event)
+{
+	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+	struct task_struct *task = READ_ONCE(event->ctx->task);
+	struct perf_addr_filter *filter;
+	struct mm_struct *mm = NULL;
+	unsigned int count = 0;
+	unsigned long flags;
+
+	/*
+	 * We may observe TASK_TOMBSTONE, which means that the event tear-down
+	 * will stop on the parent's child_mutex that our caller is also holding
+	 */
+	if (task == TASK_TOMBSTONE)
+		return;
+
+	mm = get_task_mm(event->ctx->task);
+	if (!mm)
+		goto restart;
+
+	down_read(&mm->mmap_sem);
+
+	raw_spin_lock_irqsave(&ifh->lock, flags);
+	list_for_each_entry(filter, &ifh->list, entry) {
+		event->addr_filters_offs[count] = 0;
+
+		if (perf_addr_filter_needs_mmap(filter))
+			event->addr_filters_offs[count] =
+				perf_addr_filter_apply(filter, mm);
+
+		count++;
+	}
+
+	event->addr_filters_gen++;
+	raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+	up_read(&mm->mmap_sem);
+
+	mmput(mm);
+
+restart:
+	perf_event_restart(event);
+}
+
+/*
+ * Address range filtering: limiting the data to certain
+ * instruction address ranges. Filters are ioctl()ed to us from
+ * userspace as ascii strings.
+ *
+ * Filter string format:
+ *
+ * ACTION RANGE_SPEC
+ * where ACTION is one of the
+ *  * "filter": limit the trace to this region
+ *  * "start": start tracing from this address
+ *  * "stop": stop tracing at this address/region;
+ * RANGE_SPEC is
+ *  * for kernel addresses: <start address>[/<size>]
+ *  * for object files:     <start address>[/<size>]@</path/to/object/file>
+ *
+ * if <size> is not specified, the range is treated as a single address.
+ */
+enum {
+	IF_ACT_FILTER,
+	IF_ACT_START,
+	IF_ACT_STOP,
+	IF_SRC_FILE,
+	IF_SRC_KERNEL,
+	IF_SRC_FILEADDR,
+	IF_SRC_KERNELADDR,
+};
+
+enum {
+	IF_STATE_ACTION = 0,
+	IF_STATE_SOURCE,
+	IF_STATE_END,
+};
+
+static const match_table_t if_tokens = {
+	{ IF_ACT_FILTER,	"filter" },
+	{ IF_ACT_START,		"start" },
+	{ IF_ACT_STOP,		"stop" },
+	{ IF_SRC_FILE,		"%u/%u@%s" },
+	{ IF_SRC_KERNEL,	"%u/%u" },
+	{ IF_SRC_FILEADDR,	"%u@%s" },
+	{ IF_SRC_KERNELADDR,	"%u" },
+};
+
+/*
+ * Address filter string parser
+ */
+static int
+perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
+			     struct list_head *filters)
+{
+	struct perf_addr_filter *filter = NULL;
+	char *start, *orig, *filename = NULL;
+	struct path path;
+	substring_t args[MAX_OPT_ARGS];
+	int state = IF_STATE_ACTION, token;
+	unsigned int kernel = 0;
+	int ret = -EINVAL;
+
+	orig = fstr = kstrdup(fstr, GFP_KERNEL);
+	if (!fstr)
+		return -ENOMEM;
+
+	while ((start = strsep(&fstr, " ,\n")) != NULL) {
+		ret = -EINVAL;
+
+		if (!*start)
+			continue;
+
+		/* filter definition begins */
+		if (state == IF_STATE_ACTION) {
+			filter = perf_addr_filter_new(event, filters);
+			if (!filter)
+				goto fail;
+		}
+
+		token = match_token(start, if_tokens, args);
+		switch (token) {
+		case IF_ACT_FILTER:
+		case IF_ACT_START:
+			filter->filter = 1;
+
+		case IF_ACT_STOP:
+			if (state != IF_STATE_ACTION)
+				goto fail;
+
+			state = IF_STATE_SOURCE;
+			break;
+
+		case IF_SRC_KERNELADDR:
+		case IF_SRC_KERNEL:
+			kernel = 1;
+
+		case IF_SRC_FILEADDR:
+		case IF_SRC_FILE:
+			if (state != IF_STATE_SOURCE)
+				goto fail;
+
+			if (token == IF_SRC_FILE || token == IF_SRC_KERNEL)
+				filter->range = 1;
+
+			*args[0].to = 0;
+			ret = kstrtoul(args[0].from, 0, &filter->offset);
+			if (ret)
+				goto fail;
+
+			if (filter->range) {
+				*args[1].to = 0;
+				ret = kstrtoul(args[1].from, 0, &filter->size);
+				if (ret)
+					goto fail;
+			}
+
+			if (token == IF_SRC_FILE) {
+				filename = match_strdup(&args[2]);
+				if (!filename) {
+					ret = -ENOMEM;
+					goto fail;
+				}
+			}
+
+			state = IF_STATE_END;
+			break;
+
+		default:
+			goto fail;
+		}
+
+		/*
+		 * Filter definition is fully parsed, validate and install it.
+		 * Make sure that it doesn't contradict itself or the event's
+		 * attribute.
+		 */
+		if (state == IF_STATE_END) {
+			if (kernel && event->attr.exclude_kernel)
+				goto fail;
+
+			if (!kernel) {
+				if (!filename)
+					goto fail;
+
+				/* look up the path and grab its inode */
+				ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+				if (ret)
+					goto fail_free_name;
+
+				filter->inode = igrab(d_inode(path.dentry));
+				path_put(&path);
+				kfree(filename);
+				filename = NULL;
+
+				ret = -EINVAL;
+				if (!filter->inode ||
+				    !S_ISREG(filter->inode->i_mode))
+					/* free_filters_list() will iput() */
+					goto fail;
+			}
+
+			/* ready to consume more filters */
+			state = IF_STATE_ACTION;
+			filter = NULL;
+		}
+	}
+
+	if (state != IF_STATE_ACTION)
+		goto fail;
+
+	kfree(orig);
+
+	return 0;
+
+fail_free_name:
+	kfree(filename);
+fail:
+	free_filters_list(filters);
+	kfree(orig);
+
+	return ret;
+}
+
+static int
+perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
+{
+	LIST_HEAD(filters);
+	int ret;
+
+	/*
+	 * Since this is called in perf_ioctl() path, we're already holding
+	 * ctx::mutex.
+	 */
+	lockdep_assert_held(&event->ctx->mutex);
+
+	if (WARN_ON_ONCE(event->parent))
+		return -EINVAL;
+
+	/*
+	 * For now, we only support filtering in per-task events; doing so
+	 * for CPU-wide events requires additional context switching trickery,
+	 * since same object code will be mapped at different virtual
+	 * addresses in different processes.
+	 */
+	if (!event->ctx->task)
+		return -EOPNOTSUPP;
+
+	ret = perf_event_parse_addr_filter(event, filter_str, &filters);
+	if (ret)
+		return ret;
+
+	ret = event->pmu->addr_filters_validate(&filters);
+	if (ret) {
+		free_filters_list(&filters);
+		return ret;
+	}
+
+	/* remove existing filters, if any */
+	perf_addr_filters_splice(event, &filters);
+
+	/* install new filters */
+	perf_event_for_each_child(event, perf_event_addr_filters_apply);
+
+	return ret;
+}
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+	char *filter_str;
+	int ret = -EINVAL;
+
+	if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
+	    !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
+	    !has_addr_filter(event))
+		return -EINVAL;
+
+	filter_str = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(filter_str))
+		return PTR_ERR(filter_str);
+
+	if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
+	    event->attr.type == PERF_TYPE_TRACEPOINT)
+		ret = ftrace_profile_set_filter(event, event->attr.config,
+						filter_str);
+	else if (has_addr_filter(event))
+		ret = perf_event_set_addr_filter(event, filter_str);
+
+	kfree(filter_str);
+	return ret;
+}
+
+/*
  * hrtimer based swevent callback
  */
 
@@ -7542,6 +8273,20 @@
 out:
 	mutex_unlock(&pmus_lock);
 }
+
+/*
+ * Let userspace know that this PMU supports address range filtering:
+ */
+static ssize_t nr_addr_filters_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+
+	return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
+}
+DEVICE_ATTR_RO(nr_addr_filters);
+
 static struct idr pmu_idr;
 
 static ssize_t
@@ -7643,9 +8388,19 @@
 	if (ret)
 		goto free_dev;
 
+	/* For PMUs with address filters, throw in an extra attribute: */
+	if (pmu->nr_addr_filters)
+		ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
+
+	if (ret)
+		goto del_dev;
+
 out:
 	return ret;
 
+del_dev:
+	device_del(pmu->dev);
+
 free_dev:
 	put_device(pmu->dev);
 	goto out;
@@ -7685,6 +8440,21 @@
 	}
 
 skip_type:
+	if (pmu->task_ctx_nr == perf_hw_context) {
+		static int hw_context_taken = 0;
+
+		/*
+		 * Other than systems with heterogeneous CPUs, it never makes
+		 * sense for two PMUs to share perf_hw_context. PMUs which are
+		 * uncore must use perf_invalid_context.
+		 */
+		if (WARN_ON_ONCE(hw_context_taken &&
+		    !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
+			pmu->task_ctx_nr = perf_invalid_context;
+
+		hw_context_taken = 1;
+	}
+
 	pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
 	if (pmu->pmu_cpu_context)
 		goto got_cpu_context;
@@ -7772,6 +8542,8 @@
 	free_percpu(pmu->pmu_disable_count);
 	if (pmu->type >= PERF_TYPE_MAX)
 		idr_remove(&pmu_idr, pmu->type);
+	if (pmu->nr_addr_filters)
+		device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
 	device_del(pmu->dev);
 	put_device(pmu->dev);
 	free_pmu_context(pmu);
@@ -7965,6 +8737,7 @@
 	INIT_LIST_HEAD(&event->sibling_list);
 	INIT_LIST_HEAD(&event->rb_entry);
 	INIT_LIST_HEAD(&event->active_entry);
+	INIT_LIST_HEAD(&event->addr_filters.list);
 	INIT_HLIST_NODE(&event->hlist_entry);
 
 
@@ -7972,6 +8745,7 @@
 	init_irq_work(&event->pending, perf_pending_event);
 
 	mutex_init(&event->mmap_mutex);
+	raw_spin_lock_init(&event->addr_filters.lock);
 
 	atomic_long_set(&event->refcount, 1);
 	event->cpu		= cpu;
@@ -8006,8 +8780,16 @@
 		context = parent_event->overflow_handler_context;
 	}
 
-	event->overflow_handler	= overflow_handler;
-	event->overflow_handler_context = context;
+	if (overflow_handler) {
+		event->overflow_handler	= overflow_handler;
+		event->overflow_handler_context = context;
+	} else if (is_write_backward(event)){
+		event->overflow_handler = perf_event_output_backward;
+		event->overflow_handler_context = NULL;
+	} else {
+		event->overflow_handler = perf_event_output_forward;
+		event->overflow_handler_context = NULL;
+	}
 
 	perf_event__state_init(event);
 
@@ -8048,11 +8830,22 @@
 	if (err)
 		goto err_pmu;
 
+	if (has_addr_filter(event)) {
+		event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
+						   sizeof(unsigned long),
+						   GFP_KERNEL);
+		if (!event->addr_filters_offs)
+			goto err_per_task;
+
+		/* force hw sync on the address filters */
+		event->addr_filters_gen = 1;
+	}
+
 	if (!event->parent) {
 		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 			err = get_callchain_buffers();
 			if (err)
-				goto err_per_task;
+				goto err_addr_filters;
 		}
 	}
 
@@ -8061,6 +8854,9 @@
 
 	return event;
 
+err_addr_filters:
+	kfree(event->addr_filters_offs);
+
 err_per_task:
 	exclusive_event_destroy(event);
 
@@ -8240,6 +9036,13 @@
 		goto out;
 
 	/*
+	 * Either writing ring buffer from beginning or from end.
+	 * Mixing is not allowed.
+	 */
+	if (is_write_backward(output_event) != is_write_backward(event))
+		goto out;
+
+	/*
 	 * If both events generate aux data, they must be on the same PMU
 	 */
 	if (has_aux(event) && has_aux(output_event) &&

diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 4199b6d..05f9f6d 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h

@@ -11,13 +11,13 @@
 struct ring_buffer {
 	atomic_t			refcount;
 	struct rcu_head			rcu_head;
-	struct irq_work			irq_work;
 #ifdef CONFIG_PERF_USE_VMALLOC
 	struct work_struct		work;
 	int				page_order;	/* allocation order  */
 #endif
 	int				nr_pages;	/* nr of data pages  */
 	int				overwrite;	/* can overwrite itself */
+	int				paused;		/* can write into ring buffer */
 
 	atomic_t			poll;		/* POLL_ for wakeups */
 
@@ -65,6 +65,14 @@
 	rb_free(rb);
 }
 
+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
+{
+	if (!pause && rb->nr_pages)
+		rb->paused = 0;
+	else
+		rb->paused = 1;
+}
+
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 7611d0f..ae9b90d 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c

@@ -102,8 +102,21 @@
 	preempt_enable();
 }
 
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_event *event, unsigned int size)
+static bool __always_inline
+ring_buffer_has_space(unsigned long head, unsigned long tail,
+		      unsigned long data_size, unsigned int size,
+		      bool backward)
+{
+	if (!backward)
+		return CIRC_SPACE(head, tail, data_size) >= size;
+	else
+		return CIRC_SPACE(tail, head, data_size) >= size;
+}
+
+static int __always_inline
+__perf_output_begin(struct perf_output_handle *handle,
+		    struct perf_event *event, unsigned int size,
+		    bool backward)
 {
 	struct ring_buffer *rb;
 	unsigned long tail, offset, head;
@@ -125,8 +138,11 @@
 	if (unlikely(!rb))
 		goto out;
 
-	if (unlikely(!rb->nr_pages))
+	if (unlikely(rb->paused)) {
+		if (rb->nr_pages)
+			local_inc(&rb->lost);
 		goto out;
+	}
 
 	handle->rb    = rb;
 	handle->event = event;
@@ -143,9 +159,12 @@
 	do {
 		tail = READ_ONCE(rb->user_page->data_tail);
 		offset = head = local_read(&rb->head);
-		if (!rb->overwrite &&
-		    unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
-			goto fail;
+		if (!rb->overwrite) {
+			if (unlikely(!ring_buffer_has_space(head, tail,
+							    perf_data_size(rb),
+							    size, backward)))
+				goto fail;
+		}
 
 		/*
 		 * The above forms a control dependency barrier separating the
@@ -159,9 +178,17 @@
 		 * See perf_output_put_handle().
 		 */
 
-		head += size;
+		if (!backward)
+			head += size;
+		else
+			head -= size;
 	} while (local_cmpxchg(&rb->head, offset, head) != offset);
 
+	if (backward) {
+		offset = head;
+		head = (u64)(-head);
+	}
+
 	/*
 	 * We rely on the implied barrier() by local_cmpxchg() to ensure
 	 * none of the data stores below can be lifted up by the compiler.
@@ -203,6 +230,26 @@
 	return -ENOSPC;
 }
 
+int perf_output_begin_forward(struct perf_output_handle *handle,
+			     struct perf_event *event, unsigned int size)
+{
+	return __perf_output_begin(handle, event, size, false);
+}
+
+int perf_output_begin_backward(struct perf_output_handle *handle,
+			       struct perf_event *event, unsigned int size)
+{
+	return __perf_output_begin(handle, event, size, true);
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size)
+{
+
+	return __perf_output_begin(handle, event, size,
+				   unlikely(is_write_backward(event)));
+}
+
 unsigned int perf_output_copy(struct perf_output_handle *handle,
 		      const void *buf, unsigned int len)
 {
@@ -221,8 +268,6 @@
 	rcu_read_unlock();
 }
 
-static void rb_irq_work(struct irq_work *work);
-
 static void
 ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 {
@@ -243,16 +288,13 @@
 
 	INIT_LIST_HEAD(&rb->event_list);
 	spin_lock_init(&rb->event_lock);
-	init_irq_work(&rb->irq_work, rb_irq_work);
-}
 
-static void ring_buffer_put_async(struct ring_buffer *rb)
-{
-	if (!atomic_dec_and_test(&rb->refcount))
-		return;
-
-	rb->rcu_head.next = (void *)rb;
-	irq_work_queue(&rb->irq_work);
+	/*
+	 * perf_output_begin() only checks rb->paused, therefore
+	 * rb->paused must be true if we have no pages for output.
+	 */
+	if (!rb->nr_pages)
+		rb->paused = 1;
 }
 
 /*
@@ -264,6 +306,10 @@
  * The ordering is similar to that of perf_output_{begin,end}, with
  * the exception of (B), which should be taken care of by the pmu
  * driver, since ordering rules will differ depending on hardware.
+ *
+ * Call this from pmu::start(); see the comment in perf_aux_output_end()
+ * about its use in pmu callbacks. Both can also be called from the PMI
+ * handler if needed.
  */
 void *perf_aux_output_begin(struct perf_output_handle *handle,
 			    struct perf_event *event)
@@ -288,6 +334,13 @@
 		goto err;
 
 	/*
+	 * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
+	 * the aux buffer is in perf_mmap_close(), about to get freed.
+	 */
+	if (!atomic_read(&rb->aux_mmap_count))
+		goto err_put;
+
+	/*
 	 * Nesting is not supported for AUX area, make sure nested
 	 * writers are caught early
 	 */
@@ -328,10 +381,11 @@
 	return handle->rb->aux_priv;
 
 err_put:
+	/* can't be last */
 	rb_free_aux(rb);
 
 err:
-	ring_buffer_put_async(rb);
+	ring_buffer_put(rb);
 	handle->event = NULL;
 
 	return NULL;
@@ -342,6 +396,10 @@
  * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
  * pmu driver's responsibility to observe ordering rules of the hardware,
  * so that all the data is externally visible before this is called.
+ *
+ * Note: this has to be called from pmu::stop() callback, as the assumption
+ * of the AUX buffer management code is that after pmu::stop(), the AUX
+ * transaction must be stopped and therefore drop the AUX reference count.
  */
 void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 			 bool truncated)
@@ -389,8 +447,9 @@
 	handle->event = NULL;
 
 	local_set(&rb->aux_nest, 0);
+	/* can't be last */
 	rb_free_aux(rb);
-	ring_buffer_put_async(rb);
+	ring_buffer_put(rb);
 }
 
 /*
@@ -471,6 +530,14 @@
 {
 	int pg;
 
+	/*
+	 * Should never happen, the last reference should be dropped from
+	 * perf_mmap_close() path, which first stops aux transactions (which
+	 * in turn are the atomic holders of aux_refcount) and then does the
+	 * last rb_free_aux().
+	 */
+	WARN_ON_ONCE(in_atomic());
+
 	if (rb->aux_priv) {
 		rb->free_aux(rb->aux_priv);
 		rb->free_aux = NULL;
@@ -582,18 +649,7 @@
 void rb_free_aux(struct ring_buffer *rb)
 {
 	if (atomic_dec_and_test(&rb->aux_refcount))
-		irq_work_queue(&rb->irq_work);
-}
-
-static void rb_irq_work(struct irq_work *work)
-{
-	struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
-
-	if (!atomic_read(&rb->aux_refcount))
 		__rb_free_aux(rb);
-
-	if (rb->rcu_head.next == (void *)rb)
-		call_rcu(&rb->rcu_head, rb_free_rcu);
 }
 
 #ifndef CONFIG_PERF_USE_VMALLOC

diff --git a/kernel/fork.c b/kernel/fork.c
index d277e83..3e84515 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c

@@ -1494,7 +1494,7 @@
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
 	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
-		p->sas_ss_sp = p->sas_ss_size = 0;
+		sas_ss_reset(p);
 
 	/*
 	 * Syscall tracing and stepping should be turned off in the

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0e..81f1a71 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c

@@ -45,6 +45,7 @@
 #include <linux/bitops.h>
 #include <linux/gfp.h>
 #include <linux/kmemcheck.h>
+#include <linux/random.h>
 
 #include <asm/sections.h>
 
@@ -708,7 +709,7 @@
  * yet. Otherwise we look it up. We cache the result in the lock object
  * itself, so actual lookup of the hash should be once per lock object.
  */
-static inline struct lock_class *
+static struct lock_class *
 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 {
 	struct lockdep_subclass_key *key;
@@ -3585,7 +3586,35 @@
 	return 0;
 }
 
-static void __lock_pin_lock(struct lockdep_map *lock)
+static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)
+{
+	struct pin_cookie cookie = NIL_COOKIE;
+	struct task_struct *curr = current;
+	int i;
+
+	if (unlikely(!debug_locks))
+		return cookie;
+
+	for (i = 0; i < curr->lockdep_depth; i++) {
+		struct held_lock *hlock = curr->held_locks + i;
+
+		if (match_held_lock(hlock, lock)) {
+			/*
+			 * Grab 16bits of randomness; this is sufficient to not
+			 * be guessable and still allows some pin nesting in
+			 * our u32 pin_count.
+			 */
+			cookie.val = 1 + (prandom_u32() >> 16);
+			hlock->pin_count += cookie.val;
+			return cookie;
+		}
+	}
+
+	WARN(1, "pinning an unheld lock\n");
+	return cookie;
+}
+
+static void __lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
 {
 	struct task_struct *curr = current;
 	int i;
@@ -3597,7 +3626,7 @@
 		struct held_lock *hlock = curr->held_locks + i;
 
 		if (match_held_lock(hlock, lock)) {
-			hlock->pin_count++;
+			hlock->pin_count += cookie.val;
 			return;
 		}
 	}
@@ -3605,7 +3634,7 @@
 	WARN(1, "pinning an unheld lock\n");
 }
 
-static void __lock_unpin_lock(struct lockdep_map *lock)
+static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
 {
 	struct task_struct *curr = current;
 	int i;
@@ -3620,7 +3649,11 @@
 			if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
 				return;
 
-			hlock->pin_count--;
+			hlock->pin_count -= cookie.val;
+
+			if (WARN((int)hlock->pin_count < 0, "pin count corrupted\n"))
+				hlock->pin_count = 0;
+
 			return;
 		}
 	}
@@ -3751,24 +3784,27 @@
 }
 EXPORT_SYMBOL_GPL(lock_is_held);
 
-void lock_pin_lock(struct lockdep_map *lock)
+struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
 {
+	struct pin_cookie cookie = NIL_COOKIE;
 	unsigned long flags;
 
 	if (unlikely(current->lockdep_recursion))
-		return;
+		return cookie;
 
 	raw_local_irq_save(flags);
 	check_flags(flags);
 
 	current->lockdep_recursion = 1;
-	__lock_pin_lock(lock);
+	cookie = __lock_pin_lock(lock);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
+
+	return cookie;
 }
 EXPORT_SYMBOL_GPL(lock_pin_lock);
 
-void lock_unpin_lock(struct lockdep_map *lock)
+void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
 {
 	unsigned long flags;
 
@@ -3779,7 +3815,24 @@
 	check_flags(flags);
 
 	current->lockdep_recursion = 1;
-	__lock_unpin_lock(lock);
+	__lock_repin_lock(lock, cookie);
+	current->lockdep_recursion = 0;
+	raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_repin_lock);
+
+void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
+{
+	unsigned long flags;
+
+	if (unlikely(current->lockdep_recursion))
+		return;
+
+	raw_local_irq_save(flags);
+	check_flags(flags);
+
+	current->lockdep_recursion = 1;
+	__lock_unpin_lock(lock, cookie);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919..f8c5af5 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c

@@ -75,12 +75,7 @@
 	long n_lock_acquired;
 };
 
-#if defined(MODULE)
-#define LOCKTORTURE_RUNNABLE_INIT 1
-#else
-#define LOCKTORTURE_RUNNABLE_INIT 0
-#endif
-int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
+int torture_runnable = IS_ENABLED(MODULE);
 module_param(torture_runnable, int, 0444);
 MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
 
@@ -394,12 +389,12 @@
 
 	if (!rt_task(current)) {
 		/*
-		 * (1) Boost priority once every ~50k operations. When the
+		 * Boost priority once every ~50k operations. When the
 		 * task tries to take the lock, the rtmutex it will account
 		 * for the new priority, and do any corresponding pi-dance.
 		 */
-		if (!(torture_random(trsp) %
-		      (cxt.nrealwriters_stress * factor))) {
+		if (trsp && !(torture_random(trsp) %
+			      (cxt.nrealwriters_stress * factor))) {
 			policy = SCHED_FIFO;
 			param.sched_priority = MAX_RT_PRIO - 1;
 		} else /* common case, do nothing */
@@ -748,6 +743,15 @@
 	if (torture_cleanup_begin())
 		return;
 
+	/*
+	 * Indicates early cleanup, meaning that the test has not run,
+	 * such as when passing bogus args when loading the module. As
+	 * such, only perform the underlying torture-specific cleanups,
+	 * and avoid anything related to locktorture.
+	 */
+	if (!cxt.lwsa)
+		goto end;
+
 	if (writer_tasks) {
 		for (i = 0; i < cxt.nrealwriters_stress; i++)
 			torture_stop_kthread(lock_torture_writer,
@@ -776,6 +780,7 @@
 	else
 		lock_torture_print_module_parms(cxt.cur_ops,
 						"End of test: SUCCESS");
+end:
 	torture_cleanup_end();
 }
 
@@ -870,6 +875,7 @@
 			VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
 			firsterr = -ENOMEM;
 			kfree(cxt.lwsa);
+			cxt.lwsa = NULL;
 			goto unwind;
 		}
 
@@ -878,6 +884,7 @@
 			cxt.lrsa[i].n_lock_acquired = 0;
 		}
 	}
+
 	lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
 
 	/* Prepare torture context. */

diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index d734b75..22e0253 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h

@@ -191,8 +191,6 @@
 
 		for (i = 0 ; i < qstat_num; i++)
 			WRITE_ONCE(ptr[i], 0);
-		for (i = 0 ; i < qstat_num; i++)
-			WRITE_ONCE(ptr[i], 0);
 	}
 	return count;
 }
@@ -214,10 +212,8 @@
 	struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
 	int i;
 
-	if (!d_qstat) {
-		pr_warn("Could not create 'qlockstat' debugfs directory\n");
-		return 0;
-	}
+	if (!d_qstat)
+		goto out;
 
 	/*
 	 * Create the debugfs files
@@ -227,12 +223,20 @@
 	 * performance.
 	 */
 	for (i = 0; i < qstat_num; i++)
-		debugfs_create_file(qstat_names[i], 0400, d_qstat,
-				   (void *)(long)i, &fops_qstat);
+		if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
+					 (void *)(long)i, &fops_qstat))
+			goto fail_undo;
 
-	debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
-			   (void *)(long)qstat_reset_cnts, &fops_qstat);
+	if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
+				 (void *)(long)qstat_reset_cnts, &fops_qstat))
+		goto fail_undo;
+
 	return 0;
+fail_undo:
+	debugfs_remove_recursive(d_qstat);
+out:
+	pr_warn("Could not create 'qlockstat' debugfs entries\n");
+	return -ENOMEM;
 }
 fs_initcall(init_qspinlock_stat);
 

diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 3a50485..1591f6b 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c

@@ -191,11 +191,12 @@
 /*
  * get a write lock on the semaphore
  */
-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
+int __sched __down_write_common(struct rw_semaphore *sem, int state)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
 	unsigned long flags;
+	int ret = 0;
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
@@ -215,21 +216,33 @@
 		 */
 		if (sem->count == 0)
 			break;
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (signal_pending_state(state, current)) {
+			ret = -EINTR;
+			goto out;
+		}
+		set_task_state(tsk, state);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		schedule();
 		raw_spin_lock_irqsave(&sem->wait_lock, flags);
 	}
 	/* got the lock */
 	sem->count = -1;
+out:
 	list_del(&waiter.list);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+	return ret;
 }
 
 void __sched __down_write(struct rw_semaphore *sem)
 {
-	__down_write_nested(sem, 0);
+	__down_write_common(sem, TASK_UNINTERRUPTIBLE);
+}
+
+int __sched __down_write_killable(struct rw_semaphore *sem)
+{
+	return __down_write_common(sem, TASK_KILLABLE);
 }
 
 /*

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4d4de0..09e30c6 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c

@@ -433,12 +433,13 @@
 /*
  * Wait until we successfully acquire the write lock
  */
-__visible
-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
+static inline struct rw_semaphore *
+__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
 {
 	long count;
 	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
+	struct rw_semaphore *ret = sem;
 
 	/* undo write bias from down_write operation, stop active locking */
 	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
@@ -478,7 +479,7 @@
 		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_current_state(TASK_UNINTERRUPTIBLE);
+	set_current_state(state);
 	while (true) {
 		if (rwsem_try_write_lock(count, sem))
 			break;
@@ -486,21 +487,48 @@
 
 		/* Block until there are no active lockers. */
 		do {
+			if (signal_pending_state(state, current))
+				goto out_nolock;
+
 			schedule();
-			set_current_state(TASK_UNINTERRUPTIBLE);
+			set_current_state(state);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
 	__set_current_state(TASK_RUNNING);
-
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
 
-	return sem;
+	return ret;
+
+out_nolock:
+	__set_current_state(TASK_RUNNING);
+	raw_spin_lock_irq(&sem->wait_lock);
+	list_del(&waiter.list);
+	if (list_empty(&sem->wait_list))
+		rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+	else
+		__rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+	raw_spin_unlock_irq(&sem->wait_lock);
+
+	return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed(struct rw_semaphore *sem)
+{
+	return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(rwsem_down_write_failed);
 
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed_killable(struct rw_semaphore *sem)
+{
+	return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_write_failed_killable);
+
 /*
  * handle waking up a waiter on the semaphore
  * - up_read/up_write has decremented the active part of count if we come here

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 205be0c..c817216 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c

@@ -55,6 +55,25 @@
 EXPORT_SYMBOL(down_write);
 
 /*
+ * lock for writing
+ */
+int __sched down_write_killable(struct rw_semaphore *sem)
+{
+	might_sleep();
+	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
+	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+		rwsem_release(&sem->dep_map, 1, _RET_IP_);
+		return -EINTR;
+	}
+
+	rwsem_set_owner(sem);
+	return 0;
+}
+
+EXPORT_SYMBOL(down_write_killable);
+
+/*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
 int down_write_trylock(struct rw_semaphore *sem)

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 12cd989..160e100 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c

@@ -37,6 +37,14 @@
 #define HIBERNATE_SIG	"S1SUSPEND"
 
 /*
+ * When reading an {un,}compressed image, we may restore pages in place,
+ * in which case some architectures need these pages cleaning before they
+ * can be executed. We don't know which pages these may be, so clean the lot.
+ */
+static bool clean_pages_on_read;
+static bool clean_pages_on_decompress;
+
+/*
  *	The swap map is a data structure used for keeping track of each page
  *	written to a swap partition.  It consists of many swap_map_page
  *	structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
@@ -241,6 +249,9 @@
 
 	if (bio_data_dir(bio) == WRITE)
 		put_page(page);
+	else if (clean_pages_on_read)
+		flush_icache_range((unsigned long)page_address(page),
+				   (unsigned long)page_address(page) + PAGE_SIZE);
 
 	if (bio->bi_error && !hb->error)
 		hb->error = bio->bi_error;
@@ -1049,6 +1060,7 @@
 
 	hib_init_batch(&hb);
 
+	clean_pages_on_read = true;
 	printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
 		nr_to_read);
 	m = nr_to_read / 10;
@@ -1124,6 +1136,10 @@
 		d->unc_len = LZO_UNC_SIZE;
 		d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
 		                               d->unc, &d->unc_len);
+		if (clean_pages_on_decompress)
+			flush_icache_range((unsigned long)d->unc,
+					   (unsigned long)d->unc + d->unc_len);
+
 		atomic_set(&d->stop, 1);
 		wake_up(&d->done);
 	}
@@ -1189,6 +1205,8 @@
 	}
 	memset(crc, 0, offsetof(struct crc_data, go));
 
+	clean_pages_on_decompress = true;
+
 	/*
 	 * Start the decompression threads.
 	 */

diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 032b2c0..18dfc48 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile

@@ -5,6 +5,7 @@
 obj-y += update.o sync.o
 obj-$(CONFIG_SRCU) += srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
 obj-$(CONFIG_TREE_RCU) += tree.o
 obj-$(CONFIG_PREEMPT_RCU) += tree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
new file mode 100644
index 0000000..3cee0d8
--- /dev/null
+++ b/kernel/rcu/rcuperf.c

@@ -0,0 +1,655 @@
+/*
+ * Read-Copy Update module-based performance-test facility
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2015
+ *
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <linux/torture.h>
+#include <linux/vmalloc.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
+
+#define PERF_FLAG "-perf:"
+#define PERFOUT_STRING(s) \
+	pr_alert("%s" PERF_FLAG s "\n", perf_type)
+#define VERBOSE_PERFOUT_STRING(s) \
+	do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
+#define VERBOSE_PERFOUT_ERRSTRING(s) \
+	do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
+
+torture_param(bool, gp_exp, true, "Use expedited GP wait primitives");
+torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+torture_param(int, nreaders, -1, "Number of RCU reader threads");
+torture_param(int, nwriters, -1, "Number of RCU updater threads");
+torture_param(bool, shutdown, false, "Shutdown at end of performance tests.");
+torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+
+static char *perf_type = "rcu";
+module_param(perf_type, charp, 0444);
+MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)");
+
+static int nrealreaders;
+static int nrealwriters;
+static struct task_struct **writer_tasks;
+static struct task_struct **reader_tasks;
+static struct task_struct *shutdown_task;
+
+static u64 **writer_durations;
+static int *writer_n_durations;
+static atomic_t n_rcu_perf_reader_started;
+static atomic_t n_rcu_perf_writer_started;
+static atomic_t n_rcu_perf_writer_finished;
+static wait_queue_head_t shutdown_wq;
+static u64 t_rcu_perf_writer_started;
+static u64 t_rcu_perf_writer_finished;
+static unsigned long b_rcu_perf_writer_started;
+static unsigned long b_rcu_perf_writer_finished;
+
+static int rcu_perf_writer_state;
+#define RTWS_INIT		0
+#define RTWS_EXP_SYNC		1
+#define RTWS_SYNC		2
+#define RTWS_IDLE		2
+#define RTWS_STOPPING		3
+
+#define MAX_MEAS 10000
+#define MIN_MEAS 100
+
+#if defined(MODULE) || defined(CONFIG_RCU_PERF_TEST_RUNNABLE)
+#define RCUPERF_RUNNABLE_INIT 1
+#else
+#define RCUPERF_RUNNABLE_INIT 0
+#endif
+static int perf_runnable = RCUPERF_RUNNABLE_INIT;
+module_param(perf_runnable, int, 0444);
+MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
+
+/*
+ * Operations vector for selecting different types of tests.
+ */
+
+struct rcu_perf_ops {
+	int ptype;
+	void (*init)(void);
+	void (*cleanup)(void);
+	int (*readlock)(void);
+	void (*readunlock)(int idx);
+	unsigned long (*started)(void);
+	unsigned long (*completed)(void);
+	unsigned long (*exp_completed)(void);
+	void (*sync)(void);
+	void (*exp_sync)(void);
+	const char *name;
+};
+
+static struct rcu_perf_ops *cur_ops;
+
+/*
+ * Definitions for rcu perf testing.
+ */
+
+static int rcu_perf_read_lock(void) __acquires(RCU)
+{
+	rcu_read_lock();
+	return 0;
+}
+
+static void rcu_perf_read_unlock(int idx) __releases(RCU)
+{
+	rcu_read_unlock();
+}
+
+static unsigned long __maybe_unused rcu_no_completed(void)
+{
+	return 0;
+}
+
+static void rcu_sync_perf_init(void)
+{
+}
+
+static struct rcu_perf_ops rcu_ops = {
+	.ptype		= RCU_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= rcu_perf_read_lock,
+	.readunlock	= rcu_perf_read_unlock,
+	.started	= rcu_batches_started,
+	.completed	= rcu_batches_completed,
+	.exp_completed	= rcu_exp_batches_completed,
+	.sync		= synchronize_rcu,
+	.exp_sync	= synchronize_rcu_expedited,
+	.name		= "rcu"
+};
+
+/*
+ * Definitions for rcu_bh perf testing.
+ */
+
+static int rcu_bh_perf_read_lock(void) __acquires(RCU_BH)
+{
+	rcu_read_lock_bh();
+	return 0;
+}
+
+static void rcu_bh_perf_read_unlock(int idx) __releases(RCU_BH)
+{
+	rcu_read_unlock_bh();
+}
+
+static struct rcu_perf_ops rcu_bh_ops = {
+	.ptype		= RCU_BH_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= rcu_bh_perf_read_lock,
+	.readunlock	= rcu_bh_perf_read_unlock,
+	.started	= rcu_batches_started_bh,
+	.completed	= rcu_batches_completed_bh,
+	.exp_completed	= rcu_exp_batches_completed_sched,
+	.sync		= synchronize_rcu_bh,
+	.exp_sync	= synchronize_rcu_bh_expedited,
+	.name		= "rcu_bh"
+};
+
+/*
+ * Definitions for srcu perf testing.
+ */
+
+DEFINE_STATIC_SRCU(srcu_ctl_perf);
+static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf;
+
+static int srcu_perf_read_lock(void) __acquires(srcu_ctlp)
+{
+	return srcu_read_lock(srcu_ctlp);
+}
+
+static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp)
+{
+	srcu_read_unlock(srcu_ctlp, idx);
+}
+
+static unsigned long srcu_perf_completed(void)
+{
+	return srcu_batches_completed(srcu_ctlp);
+}
+
+static void srcu_perf_synchronize(void)
+{
+	synchronize_srcu(srcu_ctlp);
+}
+
+static void srcu_perf_synchronize_expedited(void)
+{
+	synchronize_srcu_expedited(srcu_ctlp);
+}
+
+static struct rcu_perf_ops srcu_ops = {
+	.ptype		= SRCU_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= srcu_perf_read_lock,
+	.readunlock	= srcu_perf_read_unlock,
+	.started	= NULL,
+	.completed	= srcu_perf_completed,
+	.exp_completed	= srcu_perf_completed,
+	.sync		= srcu_perf_synchronize,
+	.exp_sync	= srcu_perf_synchronize_expedited,
+	.name		= "srcu"
+};
+
+/*
+ * Definitions for sched perf testing.
+ */
+
+static int sched_perf_read_lock(void)
+{
+	preempt_disable();
+	return 0;
+}
+
+static void sched_perf_read_unlock(int idx)
+{
+	preempt_enable();
+}
+
+static struct rcu_perf_ops sched_ops = {
+	.ptype		= RCU_SCHED_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= sched_perf_read_lock,
+	.readunlock	= sched_perf_read_unlock,
+	.started	= rcu_batches_started_sched,
+	.completed	= rcu_batches_completed_sched,
+	.exp_completed	= rcu_exp_batches_completed_sched,
+	.sync		= synchronize_sched,
+	.exp_sync	= synchronize_sched_expedited,
+	.name		= "sched"
+};
+
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Definitions for RCU-tasks perf testing.
+ */
+
+static int tasks_perf_read_lock(void)
+{
+	return 0;
+}
+
+static void tasks_perf_read_unlock(int idx)
+{
+}
+
+static struct rcu_perf_ops tasks_ops = {
+	.ptype		= RCU_TASKS_FLAVOR,
+	.init		= rcu_sync_perf_init,
+	.readlock	= tasks_perf_read_lock,
+	.readunlock	= tasks_perf_read_unlock,
+	.started	= rcu_no_completed,
+	.completed	= rcu_no_completed,
+	.sync		= synchronize_rcu_tasks,
+	.exp_sync	= synchronize_rcu_tasks,
+	.name		= "tasks"
+};
+
+#define RCUPERF_TASKS_OPS &tasks_ops,
+
+static bool __maybe_unused torturing_tasks(void)
+{
+	return cur_ops == &tasks_ops;
+}
+
+#else /* #ifdef CONFIG_TASKS_RCU */
+
+#define RCUPERF_TASKS_OPS
+
+static bool __maybe_unused torturing_tasks(void)
+{
+	return false;
+}
+
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
+/*
+ * If performance tests complete, wait for shutdown to commence.
+ */
+static void rcu_perf_wait_shutdown(void)
+{
+	cond_resched_rcu_qs();
+	if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
+		return;
+	while (!torture_must_stop())
+		schedule_timeout_uninterruptible(1);
+}
+
+/*
+ * RCU perf reader kthread.  Repeatedly does empty RCU read-side
+ * critical section, minimizing update-side interference.
+ */
+static int
+rcu_perf_reader(void *arg)
+{
+	unsigned long flags;
+	int idx;
+	long me = (long)arg;
+
+	VERBOSE_PERFOUT_STRING("rcu_perf_reader task started");
+	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+	set_user_nice(current, MAX_NICE);
+	atomic_inc(&n_rcu_perf_reader_started);
+
+	do {
+		local_irq_save(flags);
+		idx = cur_ops->readlock();
+		cur_ops->readunlock(idx);
+		local_irq_restore(flags);
+		rcu_perf_wait_shutdown();
+	} while (!torture_must_stop());
+	torture_kthread_stopping("rcu_perf_reader");
+	return 0;
+}
+
+/*
+ * RCU perf writer kthread.  Repeatedly does a grace period.
+ */
+static int
+rcu_perf_writer(void *arg)
+{
+	int i = 0;
+	int i_max;
+	long me = (long)arg;
+	struct sched_param sp;
+	bool started = false, done = false, alldone = false;
+	u64 t;
+	u64 *wdp;
+	u64 *wdpp = writer_durations[me];
+
+	VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
+	WARN_ON(rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp);
+	WARN_ON(rcu_gp_is_normal() && gp_exp);
+	WARN_ON(!wdpp);
+	set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+	sp.sched_priority = 1;
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+
+	if (holdoff)
+		schedule_timeout_uninterruptible(holdoff * HZ);
+
+	t = ktime_get_mono_fast_ns();
+	if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) {
+		t_rcu_perf_writer_started = t;
+		if (gp_exp) {
+			b_rcu_perf_writer_started =
+				cur_ops->exp_completed() / 2;
+		} else {
+			b_rcu_perf_writer_started =
+				cur_ops->completed();
+		}
+	}
+
+	do {
+		wdp = &wdpp[i];
+		*wdp = ktime_get_mono_fast_ns();
+		if (gp_exp) {
+			rcu_perf_writer_state = RTWS_EXP_SYNC;
+			cur_ops->exp_sync();
+		} else {
+			rcu_perf_writer_state = RTWS_SYNC;
+			cur_ops->sync();
+		}
+		rcu_perf_writer_state = RTWS_IDLE;
+		t = ktime_get_mono_fast_ns();
+		*wdp = t - *wdp;
+		i_max = i;
+		if (!started &&
+		    atomic_read(&n_rcu_perf_writer_started) >= nrealwriters)
+			started = true;
+		if (!done && i >= MIN_MEAS) {
+			done = true;
+			sp.sched_priority = 0;
+			sched_setscheduler_nocheck(current,
+						   SCHED_NORMAL, &sp);
+			pr_alert("%s" PERF_FLAG
+				 "rcu_perf_writer %ld has %d measurements\n",
+				 perf_type, me, MIN_MEAS);
+			if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
+			    nrealwriters) {
+				schedule_timeout_interruptible(10);
+				rcu_ftrace_dump(DUMP_ALL);
+				PERFOUT_STRING("Test complete");
+				t_rcu_perf_writer_finished = t;
+				if (gp_exp) {
+					b_rcu_perf_writer_finished =
+						cur_ops->exp_completed() / 2;
+				} else {
+					b_rcu_perf_writer_finished =
+						cur_ops->completed();
+				}
+				if (shutdown) {
+					smp_mb(); /* Assign before wake. */
+					wake_up(&shutdown_wq);
+				}
+			}
+		}
+		if (done && !alldone &&
+		    atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters)
+			alldone = true;
+		if (started && !alldone && i < MAX_MEAS - 1)
+			i++;
+		rcu_perf_wait_shutdown();
+	} while (!torture_must_stop());
+	rcu_perf_writer_state = RTWS_STOPPING;
+	writer_n_durations[me] = i_max;
+	torture_kthread_stopping("rcu_perf_writer");
+	return 0;
+}
+
+static inline void
+rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
+{
+	pr_alert("%s" PERF_FLAG
+		 "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
+		 perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+}
+
+static void
+rcu_perf_cleanup(void)
+{
+	int i;
+	int j;
+	int ngps = 0;
+	u64 *wdp;
+	u64 *wdpp;
+
+	if (torture_cleanup_begin())
+		return;
+
+	if (reader_tasks) {
+		for (i = 0; i < nrealreaders; i++)
+			torture_stop_kthread(rcu_perf_reader,
+					     reader_tasks[i]);
+		kfree(reader_tasks);
+	}
+
+	if (writer_tasks) {
+		for (i = 0; i < nrealwriters; i++) {
+			torture_stop_kthread(rcu_perf_writer,
+					     writer_tasks[i]);
+			if (!writer_n_durations)
+				continue;
+			j = writer_n_durations[i];
+			pr_alert("%s%s writer %d gps: %d\n",
+				 perf_type, PERF_FLAG, i, j);
+			ngps += j;
+		}
+		pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
+			 perf_type, PERF_FLAG,
+			 t_rcu_perf_writer_started, t_rcu_perf_writer_finished,
+			 t_rcu_perf_writer_finished -
+			 t_rcu_perf_writer_started,
+			 ngps,
+			 b_rcu_perf_writer_finished -
+			 b_rcu_perf_writer_started);
+		for (i = 0; i < nrealwriters; i++) {
+			if (!writer_durations)
+				break;
+			if (!writer_n_durations)
+				continue;
+			wdpp = writer_durations[i];
+			if (!wdpp)
+				continue;
+			for (j = 0; j <= writer_n_durations[i]; j++) {
+				wdp = &wdpp[j];
+				pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+					perf_type, PERF_FLAG,
+					i, j, *wdp);
+				if (j % 100 == 0)
+					schedule_timeout_uninterruptible(1);
+			}
+			kfree(writer_durations[i]);
+		}
+		kfree(writer_tasks);
+		kfree(writer_durations);
+		kfree(writer_n_durations);
+	}
+
+	/* Do flavor-specific cleanup operations.  */
+	if (cur_ops->cleanup != NULL)
+		cur_ops->cleanup();
+
+	torture_cleanup_end();
+}
+
+/*
+ * Return the number if non-negative.  If -1, the number of CPUs.
+ * If less than -1, that much less than the number of CPUs, but
+ * at least one.
+ */
+static int compute_real(int n)
+{
+	int nr;
+
+	if (n >= 0) {
+		nr = n;
+	} else {
+		nr = num_online_cpus() + 1 + n;
+		if (nr <= 0)
+			nr = 1;
+	}
+	return nr;
+}
+
+/*
+ * RCU perf shutdown kthread.  Just waits to be awakened, then shuts
+ * down system.
+ */
+static int
+rcu_perf_shutdown(void *arg)
+{
+	do {
+		wait_event(shutdown_wq,
+			   atomic_read(&n_rcu_perf_writer_finished) >=
+			   nrealwriters);
+	} while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters);
+	smp_mb(); /* Wake before output. */
+	rcu_perf_cleanup();
+	kernel_power_off();
+	return -EINVAL;
+}
+
+static int __init
+rcu_perf_init(void)
+{
+	long i;
+	int firsterr = 0;
+	static struct rcu_perf_ops *perf_ops[] = {
+		&rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+		RCUPERF_TASKS_OPS
+	};
+
+	if (!torture_init_begin(perf_type, verbose, &perf_runnable))
+		return -EBUSY;
+
+	/* Process args and tell the world that the perf'er is on the job. */
+	for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
+		cur_ops = perf_ops[i];
+		if (strcmp(perf_type, cur_ops->name) == 0)
+			break;
+	}
+	if (i == ARRAY_SIZE(perf_ops)) {
+		pr_alert("rcu-perf: invalid perf type: \"%s\"\n",
+			 perf_type);
+		pr_alert("rcu-perf types:");
+		for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
+			pr_alert(" %s", perf_ops[i]->name);
+		pr_alert("\n");
+		firsterr = -EINVAL;
+		goto unwind;
+	}
+	if (cur_ops->init)
+		cur_ops->init();
+
+	nrealwriters = compute_real(nwriters);
+	nrealreaders = compute_real(nreaders);
+	atomic_set(&n_rcu_perf_reader_started, 0);
+	atomic_set(&n_rcu_perf_writer_started, 0);
+	atomic_set(&n_rcu_perf_writer_finished, 0);
+	rcu_perf_print_module_parms(cur_ops, "Start of test");
+
+	/* Start up the kthreads. */
+
+	if (shutdown) {
+		init_waitqueue_head(&shutdown_wq);
+		firsterr = torture_create_kthread(rcu_perf_shutdown, NULL,
+						  shutdown_task);
+		if (firsterr)
+			goto unwind;
+		schedule_timeout_uninterruptible(1);
+	}
+	reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
+			       GFP_KERNEL);
+	if (reader_tasks == NULL) {
+		VERBOSE_PERFOUT_ERRSTRING("out of memory");
+		firsterr = -ENOMEM;
+		goto unwind;
+	}
+	for (i = 0; i < nrealreaders; i++) {
+		firsterr = torture_create_kthread(rcu_perf_reader, (void *)i,
+						  reader_tasks[i]);
+		if (firsterr)
+			goto unwind;
+	}
+	while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders)
+		schedule_timeout_uninterruptible(1);
+	writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
+			       GFP_KERNEL);
+	writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
+				   GFP_KERNEL);
+	writer_n_durations =
+		kcalloc(nrealwriters, sizeof(*writer_n_durations),
+			GFP_KERNEL);
+	if (!writer_tasks || !writer_durations || !writer_n_durations) {
+		VERBOSE_PERFOUT_ERRSTRING("out of memory");
+		firsterr = -ENOMEM;
+		goto unwind;
+	}
+	for (i = 0; i < nrealwriters; i++) {
+		writer_durations[i] =
+			kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
+				GFP_KERNEL);
+		if (!writer_durations[i])
+			goto unwind;
+		firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
+						  writer_tasks[i]);
+		if (firsterr)
+			goto unwind;
+	}
+	torture_init_end();
+	return 0;
+
+unwind:
+	torture_init_end();
+	rcu_perf_cleanup();
+	return firsterr;
+}
+
+module_init(rcu_perf_init);
+module_exit(rcu_perf_cleanup);

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 250ea67..084a28a 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c

@@ -130,8 +130,8 @@
 static unsigned long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = { 0 };
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 };
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
 static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
 static atomic_t n_rcu_torture_alloc;
 static atomic_t n_rcu_torture_alloc_fail;
@@ -916,7 +916,7 @@
 static int
 rcu_torture_writer(void *arg)
 {
-	bool can_expedite = !rcu_gp_is_expedited();
+	bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
 	int expediting = 0;
 	unsigned long gp_snap;
 	bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
@@ -932,7 +932,7 @@
 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
 	if (!can_expedite) {
 		pr_alert("%s" TORTURE_FLAG
-			 " Grace periods expedited from boot/sysfs for %s,\n",
+			 " GP expediting controlled from boot/sysfs for %s,\n",
 			 torture_type, cur_ops->name);
 		pr_alert("%s" TORTURE_FLAG
 			 " Disabled dynamic grace-period expediting.\n",
@@ -1082,17 +1082,6 @@
 	return 0;
 }
 
-static void rcutorture_trace_dump(void)
-{
-	static atomic_t beenhere = ATOMIC_INIT(0);
-
-	if (atomic_read(&beenhere))
-		return;
-	if (atomic_xchg(&beenhere, 1) != 0)
-		return;
-	ftrace_dump(DUMP_ALL);
-}
-
 /*
  * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
  * incrementing the corresponding element of the pipeline array.  The
@@ -1142,7 +1131,7 @@
 	if (pipe_count > 1) {
 		do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
 					  started, completed);
-		rcutorture_trace_dump();
+		rcu_ftrace_dump(DUMP_ALL);
 	}
 	__this_cpu_inc(rcu_torture_count[pipe_count]);
 	completed = completed - started;
@@ -1215,7 +1204,7 @@
 		if (pipe_count > 1) {
 			do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
 						  ts, started, completed);
-			rcutorture_trace_dump();
+			rcu_ftrace_dump(DUMP_ALL);
 		}
 		__this_cpu_inc(rcu_torture_count[pipe_count]);
 		completed = completed - started;
@@ -1333,7 +1322,7 @@
 			 rcu_torture_writer_state,
 			 gpnum, completed, flags);
 		show_rcu_gp_kthreads();
-		rcutorture_trace_dump();
+		rcu_ftrace_dump(DUMP_ALL);
 	}
 	rtcv_snap = rcu_torture_current_version;
 }
@@ -1489,7 +1478,9 @@
 		 * The above smp_load_acquire() ensures barrier_phase load
 		 * is ordered before the folloiwng ->call().
 		 */
+		local_irq_disable(); /* Just to test no-irq call_rcu(). */
 		cur_ops->call(&rcu, rcu_torture_barrier_cbf);
+		local_irq_enable();
 		if (atomic_dec_and_test(&barrier_cbs_count))
 			wake_up(&barrier_wq);
 	} while (!torture_must_stop());
@@ -1596,7 +1587,7 @@
 {
 	long cpu = (long)hcpu;
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
 	case CPU_DOWN_FAILED:
 		(void)rcutorture_booster_init(cpu);

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9a535a8..c7f1bc4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c

@@ -102,6 +102,8 @@
 	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
+	.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
+	.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
 }
 
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
@@ -370,6 +372,21 @@
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
+	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
+		/*
+		 * Yes, we just checked a per-CPU variable with preemption
+		 * enabled, so we might be migrated to some other CPU at
+		 * this point.  That is OK because in that case, the
+		 * migration will supply the needed quiescent state.
+		 * We might end up needlessly disabling preemption and
+		 * invoking rcu_sched_qs() on the destination CPU, but
+		 * the probability and cost are both quite low, so this
+		 * should not be a problem in practice.
+		 */
+		preempt_disable();
+		rcu_sched_qs();
+		preempt_enable();
+	}
 	this_cpu_inc(rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
@@ -385,9 +402,11 @@
 
 static ulong jiffies_till_first_fqs = ULONG_MAX;
 static ulong jiffies_till_next_fqs = ULONG_MAX;
+static bool rcu_kick_kthreads;
 
 module_param(jiffies_till_first_fqs, ulong, 0644);
 module_param(jiffies_till_next_fqs, ulong, 0644);
+module_param(rcu_kick_kthreads, bool, 0644);
 
 /*
  * How long the grace period must be before we start recruiting
@@ -460,6 +479,28 @@
 EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
 
 /*
+ * Return the number of RCU expedited batches completed thus far for
+ * debug & stats.  Odd numbers mean that a batch is in progress, even
+ * numbers mean idle.  The value returned will thus be roughly double
+ * the cumulative batches since boot.
+ */
+unsigned long rcu_exp_batches_completed(void)
+{
+	return rcu_state_p->expedited_sequence;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
+
+/*
+ * Return the number of RCU-sched expedited batches completed thus far
+ * for debug & stats.  Similar to rcu_exp_batches_completed().
+ */
+unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return rcu_sched_state.expedited_sequence;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
+
+/*
  * Force a quiescent state.
  */
 void rcu_force_quiescent_state(void)
@@ -637,7 +678,7 @@
 			idle_task(smp_processor_id());
 
 		trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
-		ftrace_dump(DUMP_ORIG);
+		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
@@ -799,7 +840,7 @@
 
 		trace_rcu_dyntick(TPS("Error on exit: not idle task"),
 				  oldval, rdtp->dynticks_nesting);
-		ftrace_dump(DUMP_ORIG);
+		rcu_ftrace_dump(DUMP_ORIG);
 		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
@@ -1224,8 +1265,10 @@
 		       rsp->gp_flags,
 		       gp_state_getname(rsp->gp_state), rsp->gp_state,
 		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
-		if (rsp->gp_kthread)
+		if (rsp->gp_kthread) {
 			sched_show_task(rsp->gp_kthread);
+			wake_up_process(rsp->gp_kthread);
+		}
 	}
 }
 
@@ -1249,6 +1292,25 @@
 	}
 }
 
+/*
+ * If too much time has passed in the current grace period, and if
+ * so configured, go kick the relevant kthreads.
+ */
+static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
+{
+	unsigned long j;
+
+	if (!rcu_kick_kthreads)
+		return;
+	j = READ_ONCE(rsp->jiffies_kick_kthreads);
+	if (time_after(jiffies, j) && rsp->gp_kthread) {
+		WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
+		rcu_ftrace_dump(DUMP_ALL);
+		wake_up_process(rsp->gp_kthread);
+		WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ);
+	}
+}
+
 static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
 {
 	int cpu;
@@ -1260,6 +1322,11 @@
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	long totqlen = 0;
 
+	/* Kick and suppress, if so configured. */
+	rcu_stall_kick_kthreads(rsp);
+	if (rcu_cpu_stall_suppress)
+		return;
+
 	/* Only let one CPU complain about others per time interval. */
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -1333,6 +1400,11 @@
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	long totqlen = 0;
 
+	/* Kick and suppress, if so configured. */
+	rcu_stall_kick_kthreads(rsp);
+	if (rcu_cpu_stall_suppress)
+		return;
+
 	/*
 	 * OK, time to rat on ourselves...
 	 * See Documentation/RCU/stallwarn.txt for info on how to debug
@@ -1377,8 +1449,10 @@
 	unsigned long js;
 	struct rcu_node *rnp;
 
-	if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
+	if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+	    !rcu_gp_in_progress(rsp))
 		return;
+	rcu_stall_kick_kthreads(rsp);
 	j = jiffies;
 
 	/*
@@ -2117,8 +2191,11 @@
 		}
 		ret = 0;
 		for (;;) {
-			if (!ret)
+			if (!ret) {
 				rsp->jiffies_force_qs = jiffies + j;
+				WRITE_ONCE(rsp->jiffies_kick_kthreads,
+					   jiffies + 3 * j);
+			}
 			trace_rcu_grace_period(rsp->name,
 					       READ_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
@@ -2144,6 +2221,15 @@
 						       TPS("fqsend"));
 				cond_resched_rcu_qs();
 				WRITE_ONCE(rsp->gp_activity, jiffies);
+				ret = 0; /* Force full wait till next FQS. */
+				j = jiffies_till_next_fqs;
+				if (j > HZ) {
+					j = HZ;
+					jiffies_till_next_fqs = HZ;
+				} else if (j < 1) {
+					j = 1;
+					jiffies_till_next_fqs = 1;
+				}
 			} else {
 				/* Deal with stray signal. */
 				cond_resched_rcu_qs();
@@ -2152,14 +2238,12 @@
 				trace_rcu_grace_period(rsp->name,
 						       READ_ONCE(rsp->gpnum),
 						       TPS("fqswaitsig"));
-			}
-			j = jiffies_till_next_fqs;
-			if (j > HZ) {
-				j = HZ;
-				jiffies_till_next_fqs = HZ;
-			} else if (j < 1) {
-				j = 1;
-				jiffies_till_next_fqs = 1;
+				ret = 1; /* Keep old FQS timing. */
+				j = jiffies;
+				if (time_after(jiffies, rsp->jiffies_force_qs))
+					j = 1;
+				else
+					j = rsp->jiffies_force_qs - j;
 			}
 		}
 
@@ -3376,8 +3460,12 @@
 }
 static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
 {
+	unsigned long s;
+
 	smp_mb(); /* Caller's modifications seen first by other CPUs. */
-	return rcu_seq_snap(&rsp->expedited_sequence);
+	s = rcu_seq_snap(&rsp->expedited_sequence);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
+	return s;
 }
 static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
 {
@@ -3469,7 +3557,7 @@
  * for the current expedited grace period.  Works only for preemptible
  * RCU -- other RCU implementation use other means.
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the rcu_state's exp_mutex.
  */
 static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
@@ -3485,8 +3573,8 @@
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex and the
- * specified rcu_node structure's ->lock.
+ * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
+ * structure's ->lock.
  */
 static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 				 bool wake, unsigned long flags)
@@ -3523,7 +3611,7 @@
  * Report expedited quiescent state for specified node.  This is a
  * lock-acquisition wrapper function for __rcu_report_exp_rnp().
  *
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the rcu_state's exp_mutex.
  */
 static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 					      struct rcu_node *rnp, bool wake)
@@ -3536,8 +3624,8 @@
 
 /*
  * Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure.  Caller must hold the root
- * rcu_node's exp_funnel_mutex.
+ * specified leaf rcu_node structure.  Caller must hold the rcu_state's
+ * exp_mutex.
  */
 static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 				    unsigned long mask, bool wake)
@@ -3555,7 +3643,6 @@
 
 /*
  * Report expedited quiescent state for specified rcu_data (CPU).
- * Caller must hold the root rcu_node's exp_funnel_mutex.
  */
 static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 			       bool wake)
@@ -3564,15 +3651,11 @@
 }
 
 /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
-			       struct rcu_data *rdp,
-			       atomic_long_t *stat, unsigned long s)
+static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
+			       unsigned long s)
 {
 	if (rcu_exp_gp_seq_done(rsp, s)) {
-		if (rnp)
-			mutex_unlock(&rnp->exp_funnel_mutex);
-		else if (rdp)
-			mutex_unlock(&rdp->exp_funnel_mutex);
+		trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
 		/* Ensure test happens before caller kfree(). */
 		smp_mb__before_atomic(); /* ^^^ */
 		atomic_long_inc(stat);
@@ -3582,59 +3665,65 @@
 }
 
 /*
- * Funnel-lock acquisition for expedited grace periods.  Returns a
- * pointer to the root rcu_node structure, or NULL if some other
- * task did the expedited grace period for us.
+ * Funnel-lock acquisition for expedited grace periods.  Returns true
+ * if some other task completed an expedited grace period that this task
+ * can piggy-back on, and with no mutex held.  Otherwise, returns false
+ * with the mutex held, indicating that the caller must actually do the
+ * expedited grace period.
  */
-static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
+static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 {
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
-	struct rcu_node *rnp0;
-	struct rcu_node *rnp1 = NULL;
+	struct rcu_node *rnp = rdp->mynode;
+	struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+	/* Low-contention fastpath. */
+	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
+	    (rnp == rnp_root ||
+	     ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
+	    !mutex_is_locked(&rsp->exp_mutex) &&
+	    mutex_trylock(&rsp->exp_mutex))
+		goto fastpath;
 
 	/*
-	 * First try directly acquiring the root lock in order to reduce
-	 * latency in the common case where expedited grace periods are
-	 * rare.  We check mutex_is_locked() to avoid pathological levels of
-	 * memory contention on ->exp_funnel_mutex in the heavy-load case.
+	 * Each pass through the following loop works its way up
+	 * the rcu_node tree, returning if others have done the work or
+	 * otherwise falls through to acquire rsp->exp_mutex.  The mapping
+	 * from CPU to rcu_node structure can be inexact, as it is just
+	 * promoting locality and is not strictly needed for correctness.
 	 */
-	rnp0 = rcu_get_root(rsp);
-	if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
-		if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
-			if (sync_exp_work_done(rsp, rnp0, NULL,
-					       &rdp->expedited_workdone0, s))
-				return NULL;
-			return rnp0;
+	for (; rnp != NULL; rnp = rnp->parent) {
+		if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+			return true;
+
+		/* Work not done, either wait here or go up. */
+		spin_lock(&rnp->exp_lock);
+		if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
+
+			/* Someone else doing GP, so wait for them. */
+			spin_unlock(&rnp->exp_lock);
+			trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
+						  rnp->grplo, rnp->grphi,
+						  TPS("wait"));
+			wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+				   sync_exp_work_done(rsp,
+						      &rdp->exp_workdone2, s));
+			return true;
 		}
+		rnp->exp_seq_rq = s; /* Followers can wait on us. */
+		spin_unlock(&rnp->exp_lock);
+		trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
+					  rnp->grphi, TPS("nxtlvl"));
 	}
-
-	/*
-	 * Each pass through the following loop works its way
-	 * up the rcu_node tree, returning if others have done the
-	 * work or otherwise falls through holding the root rnp's
-	 * ->exp_funnel_mutex.  The mapping from CPU to rcu_node structure
-	 * can be inexact, as it is just promoting locality and is not
-	 * strictly needed for correctness.
-	 */
-	if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
-		return NULL;
-	mutex_lock(&rdp->exp_funnel_mutex);
-	rnp0 = rdp->mynode;
-	for (; rnp0 != NULL; rnp0 = rnp0->parent) {
-		if (sync_exp_work_done(rsp, rnp1, rdp,
-				       &rdp->expedited_workdone2, s))
-			return NULL;
-		mutex_lock(&rnp0->exp_funnel_mutex);
-		if (rnp1)
-			mutex_unlock(&rnp1->exp_funnel_mutex);
-		else
-			mutex_unlock(&rdp->exp_funnel_mutex);
-		rnp1 = rnp0;
+	mutex_lock(&rsp->exp_mutex);
+fastpath:
+	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+		mutex_unlock(&rsp->exp_mutex);
+		return true;
 	}
-	if (sync_exp_work_done(rsp, rnp1, rdp,
-			       &rdp->expedited_workdone3, s))
-		return NULL;
-	return rnp1;
+	rcu_exp_gp_seq_start(rsp);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
+	return false;
 }
 
 /* Invoked on each online non-idle CPU for expedited quiescent state. */
@@ -3649,6 +3738,11 @@
 	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
 	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
 		return;
+	if (rcu_is_cpu_rrupt_from_idle()) {
+		rcu_report_exp_rdp(&rcu_sched_state,
+				   this_cpu_ptr(&rcu_sched_data), true);
+		return;
+	}
 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
 	resched_cpu(smp_processor_id());
 }
@@ -3773,7 +3867,7 @@
 		       rsp->name);
 		ndetected = 0;
 		rcu_for_each_leaf_node(rsp, rnp) {
-			ndetected = rcu_print_task_exp_stall(rnp);
+			ndetected += rcu_print_task_exp_stall(rnp);
 			mask = 1;
 			for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
 				struct rcu_data *rdp;
@@ -3783,7 +3877,7 @@
 				ndetected++;
 				rdp = per_cpu_ptr(rsp->rda, cpu);
 				pr_cont(" %d-%c%c%c", cpu,
-					"O."[cpu_online(cpu)],
+					"O."[!!cpu_online(cpu)],
 					"o."[!!(rdp->grpmask & rnp->expmaskinit)],
 					"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
 			}
@@ -3792,7 +3886,7 @@
 		pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
 			jiffies - jiffies_start, rsp->expedited_sequence,
 			rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
-		if (!ndetected) {
+		if (ndetected) {
 			pr_err("blocking rcu_node structures:");
 			rcu_for_each_node_breadth_first(rsp, rnp) {
 				if (rnp == rnp_root)
@@ -3818,6 +3912,41 @@
 	}
 }
 
+/*
+ * Wait for the current expedited grace period to complete, and then
+ * wake up everyone who piggybacked on the just-completed expedited
+ * grace period.  Also update all the ->exp_seq_rq counters as needed
+ * in order to avoid counter-wrap problems.
+ */
+static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
+{
+	struct rcu_node *rnp;
+
+	synchronize_sched_expedited_wait(rsp);
+	rcu_exp_gp_seq_end(rsp);
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
+
+	/*
+	 * Switch over to wakeup mode, allowing the next GP, but -only- the
+	 * next GP, to proceed.
+	 */
+	mutex_lock(&rsp->exp_wake_mutex);
+	mutex_unlock(&rsp->exp_mutex);
+
+	rcu_for_each_node_breadth_first(rsp, rnp) {
+		if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
+			spin_lock(&rnp->exp_lock);
+			/* Recheck, avoid hang in case someone just arrived. */
+			if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
+				rnp->exp_seq_rq = s;
+			spin_unlock(&rnp->exp_lock);
+		}
+		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
+	}
+	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
+	mutex_unlock(&rsp->exp_wake_mutex);
+}
+
 /**
  * synchronize_sched_expedited - Brute-force RCU-sched grace period
  *
@@ -3837,7 +3966,6 @@
 void synchronize_sched_expedited(void)
 {
 	unsigned long s;
-	struct rcu_node *rnp;
 	struct rcu_state *rsp = &rcu_sched_state;
 
 	/* If only one CPU, this is automatically a grace period. */
@@ -3852,17 +3980,14 @@
 
 	/* Take a snapshot of the sequence number.  */
 	s = rcu_exp_gp_seq_snap(rsp);
-
-	rnp = exp_funnel_lock(rsp, s);
-	if (rnp == NULL)
+	if (exp_funnel_lock(rsp, s))
 		return;  /* Someone else did our work for us. */
 
-	rcu_exp_gp_seq_start(rsp);
+	/* Initialize the rcu_node tree in preparation for the wait. */
 	sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
-	synchronize_sched_expedited_wait(rsp);
 
-	rcu_exp_gp_seq_end(rsp);
-	mutex_unlock(&rnp->exp_funnel_mutex);
+	/* Wait and clean up, including waking everyone. */
+	rcu_exp_wait_wake(rsp, s);
 }
 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
 
@@ -4162,7 +4287,6 @@
 	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
 	rdp->cpu = cpu;
 	rdp->rsp = rsp;
-	mutex_init(&rdp->exp_funnel_mutex);
 	rcu_boot_init_nocb_percpu_data(rdp);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
@@ -4420,10 +4544,8 @@
 {
 	static const char * const buf[] = RCU_NODE_NAME_INIT;
 	static const char * const fqs[] = RCU_FQS_NAME_INIT;
-	static const char * const exp[] = RCU_EXP_NAME_INIT;
 	static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 	static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
-	static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
 	static u8 fl_mask = 0x1;
 
 	int levelcnt[RCU_NUM_LVLS];		/* # nodes in each level. */
@@ -4482,9 +4604,11 @@
 			rnp->level = i;
 			INIT_LIST_HEAD(&rnp->blkd_tasks);
 			rcu_init_one_nocb(rnp);
-			mutex_init(&rnp->exp_funnel_mutex);
-			lockdep_set_class_and_name(&rnp->exp_funnel_mutex,
-						   &rcu_exp_class[i], exp[i]);
+			init_waitqueue_head(&rnp->exp_wq[0]);
+			init_waitqueue_head(&rnp->exp_wq[1]);
+			init_waitqueue_head(&rnp->exp_wq[2]);
+			init_waitqueue_head(&rnp->exp_wq[3]);
+			spin_lock_init(&rnp->exp_lock);
 		}
 	}
 

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index df668c0..e3959f5 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h

@@ -70,7 +70,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0" }
 #elif NR_CPUS <= RCU_FANOUT_2
 #  define RCU_NUM_LVLS	      2
 #  define NUM_RCU_LVL_0	      1
@@ -79,7 +78,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1" }
 #elif NR_CPUS <= RCU_FANOUT_3
 #  define RCU_NUM_LVLS	      3
 #  define NUM_RCU_LVL_0	      1
@@ -89,7 +87,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
 #elif NR_CPUS <= RCU_FANOUT_4
 #  define RCU_NUM_LVLS	      4
 #  define NUM_RCU_LVL_0	      1
@@ -100,7 +97,6 @@
 #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
 #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
 #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-#  define RCU_EXP_NAME_INIT   { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
 #else
 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
 #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
@@ -252,7 +248,9 @@
 				/* Counts of upcoming no-CB GP requests. */
 	raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
 
-	struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp;
+	spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
+	unsigned long exp_seq_rq;
+	wait_queue_head_t exp_wq[4];
 } ____cacheline_internodealigned_in_smp;
 
 /*
@@ -387,11 +385,9 @@
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-	struct mutex exp_funnel_mutex;
-	atomic_long_t expedited_workdone0;	/* # done by others #0. */
-	atomic_long_t expedited_workdone1;	/* # done by others #1. */
-	atomic_long_t expedited_workdone2;	/* # done by others #2. */
-	atomic_long_t expedited_workdone3;	/* # done by others #3. */
+	atomic_long_t exp_workdone1;	/* # done by others #1. */
+	atomic_long_t exp_workdone2;	/* # done by others #2. */
+	atomic_long_t exp_workdone3;	/* # done by others #3. */
 
 	/* 7) Callback offloading. */
 #ifdef CONFIG_RCU_NOCB_CPU
@@ -505,6 +501,8 @@
 						/*  _rcu_barrier(). */
 	/* End of fields guarded by barrier_mutex. */
 
+	struct mutex exp_mutex;			/* Serialize expedited GP. */
+	struct mutex exp_wake_mutex;		/* Serialize wakeup. */
 	unsigned long expedited_sequence;	/* Take a ticket. */
 	atomic_long_t expedited_normal;		/* # fallbacks to normal. */
 	atomic_t expedited_need_qs;		/* # CPUs left to check in. */
@@ -513,6 +511,8 @@
 
 	unsigned long jiffies_force_qs;		/* Time at which to invoke */
 						/*  force_quiescent_state(). */
+	unsigned long jiffies_kick_kthreads;	/* Time at which to kick */
+						/*  kthreads, if configured. */
 	unsigned long n_force_qs;		/* Number of calls to */
 						/*  force_quiescent_state(). */
 	unsigned long n_force_qs_lh;		/* ~Number of calls leaving */

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index efdf7b6..ff1cd4e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h

@@ -722,18 +722,22 @@
  * synchronize_rcu_expedited - Brute-force RCU grace period
  *
  * Wait for an RCU-preempt grace period, but expedite it.  The basic
- * idea is to invoke synchronize_sched_expedited() to push all the tasks to
- * the ->blkd_tasks lists and wait for this list to drain.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.
- * In fact, if you are using synchronize_rcu_expedited() in a loop,
- * please restructure your code to batch your updates, and then Use a
- * single synchronize_rcu() instead.
+ * idea is to IPI all non-idle non-nohz online CPUs.  The IPI handler
+ * checks whether the CPU is in an RCU-preempt critical section, and
+ * if so, it sets a flag that causes the outermost rcu_read_unlock()
+ * to report the quiescent state.  On the other hand, if the CPU is
+ * not in an RCU read-side critical section, the IPI handler reports
+ * the quiescent state immediately.
+ *
+ * Although this is a greate improvement over previous expedited
+ * implementations, it is still unfriendly to real-time workloads, so is
+ * thus not recommended for any sort of common-case code.  In fact, if
+ * you are using synchronize_rcu_expedited() in a loop, please restructure
+ * your code to batch your updates, and then Use a single synchronize_rcu()
+ * instead.
  */
 void synchronize_rcu_expedited(void)
 {
-	struct rcu_node *rnp;
-	struct rcu_node *rnp_unlock;
 	struct rcu_state *rsp = rcu_state_p;
 	unsigned long s;
 
@@ -744,23 +748,14 @@
 	}
 
 	s = rcu_exp_gp_seq_snap(rsp);
-
-	rnp_unlock = exp_funnel_lock(rsp, s);
-	if (rnp_unlock == NULL)
+	if (exp_funnel_lock(rsp, s))
 		return;  /* Someone else did our work for us. */
 
-	rcu_exp_gp_seq_start(rsp);
-
 	/* Initialize the rcu_node tree in preparation for the wait. */
 	sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
 
-	/* Wait for snapshotted ->blkd_tasks lists to drain. */
-	rnp = rcu_get_root(rsp);
-	synchronize_sched_expedited_wait(rsp);
-
-	/* Clean up and exit. */
-	rcu_exp_gp_seq_end(rsp);
-	mutex_unlock(&rnp_unlock->exp_funnel_mutex);
+	/* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
+	rcu_exp_wait_wake(rsp, s);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 

diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 1088e64..86782f9a 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c

@@ -185,17 +185,16 @@
 	int cpu;
 	struct rcu_state *rsp = (struct rcu_state *)m->private;
 	struct rcu_data *rdp;
-	unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+	unsigned long s1 = 0, s2 = 0, s3 = 0;
 
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
-		s0 += atomic_long_read(&rdp->expedited_workdone0);
-		s1 += atomic_long_read(&rdp->expedited_workdone1);
-		s2 += atomic_long_read(&rdp->expedited_workdone2);
-		s3 += atomic_long_read(&rdp->expedited_workdone3);
+		s1 += atomic_long_read(&rdp->exp_workdone1);
+		s2 += atomic_long_read(&rdp->exp_workdone2);
+		s3 += atomic_long_read(&rdp->exp_workdone3);
 	}
-	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
-		   rsp->expedited_sequence, s0, s1, s2, s3,
+	seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+		   rsp->expedited_sequence, s1, s2, s3,
 		   atomic_long_read(&rsp->expedited_normal),
 		   atomic_read(&rsp->expedited_need_qs),
 		   rsp->expedited_sequence / 2);

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index ca828b4..3ccdc8e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c

@@ -67,7 +67,7 @@
 module_param(rcu_normal_after_boot, int, 0);
 #endif /* #ifndef CONFIG_TINY_RCU */
 
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 /**
  * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
  *
@@ -111,7 +111,7 @@
 		return 0;
 	if (debug_locks)
 		lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
-	return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
+	return lockdep_opinion || !preemptible();
 }
 EXPORT_SYMBOL(rcu_read_lock_sched_held);
 #endif

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 414d9c1..5e59b83 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile

@@ -24,3 +24,4 @@
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
+obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o

diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index fedb967..e85a725 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c

@@ -318,6 +318,7 @@
 
 	return clock;
 }
+EXPORT_SYMBOL_GPL(sched_clock_cpu);
 
 void sched_clock_tick(void)
 {
@@ -363,39 +364,6 @@
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
-/*
- * As outlined at the top, provides a fast, high resolution, nanosecond
- * time source that is monotonic per cpu argument and has bounded drift
- * between cpus.
- *
- * ######################### BIG FAT WARNING ##########################
- * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
- * # go backwards !!                                                  #
- * ####################################################################
- */
-u64 cpu_clock(int cpu)
-{
-	if (!sched_clock_stable())
-		return sched_clock_cpu(cpu);
-
-	return sched_clock();
-}
-
-/*
- * Similar to cpu_clock() for the current cpu. Time will only be observed
- * to be monotonic if care is taken to only compare timestampt taken on the
- * same CPU.
- *
- * See cpu_clock().
- */
-u64 local_clock(void)
-{
-	if (!sched_clock_stable())
-		return sched_clock_cpu(raw_smp_processor_id());
-
-	return sched_clock();
-}
-
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 void sched_clock_init(void)
@@ -410,22 +378,8 @@
 
 	return sched_clock();
 }
-
-u64 cpu_clock(int cpu)
-{
-	return sched_clock();
-}
-
-u64 local_clock(void)
-{
-	return sched_clock();
-}
-
 #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
-EXPORT_SYMBOL_GPL(cpu_clock);
-EXPORT_SYMBOL_GPL(local_clock);
-
 /*
  * Running clock - returns the time that has elapsed while a guest has been
  * running.

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d1f7149..404c078 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c

@@ -33,7 +33,7 @@
 #include <linux/init.h>
 #include <linux/uaccess.h>
 #include <linux/highmem.h>
-#include <asm/mmu_context.h>
+#include <linux/mmu_context.h>
 #include <linux/interrupt.h>
 #include <linux/capability.h>
 #include <linux/completion.h>
@@ -170,6 +170,71 @@
 	return rq;
 }
 
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	lockdep_assert_held(&p->pi_lock);
+
+	for (;;) {
+		rq = task_rq(p);
+		raw_spin_lock(&rq->lock);
+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+			rf->cookie = lockdep_pin_lock(&rq->lock);
+			return rq;
+		}
+		raw_spin_unlock(&rq->lock);
+
+		while (unlikely(task_on_rq_migrating(p)))
+			cpu_relax();
+	}
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+	__acquires(p->pi_lock)
+	__acquires(rq->lock)
+{
+	struct rq *rq;
+
+	for (;;) {
+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
+		rq = task_rq(p);
+		raw_spin_lock(&rq->lock);
+		/*
+		 *	move_queued_task()		task_rq_lock()
+		 *
+		 *	ACQUIRE (rq->lock)
+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
+		 *	[S] ->cpu = new_cpu		[L] task_rq()
+		 *					[L] ->on_rq
+		 *	RELEASE (rq->lock)
+		 *
+		 * If we observe the old cpu in task_rq_lock, the acquire of
+		 * the old rq->lock will fully serialize against the stores.
+		 *
+		 * If we observe the new cpu in task_rq_lock, the acquire will
+		 * pair with the WMB to ensure we must then also see migrating.
+		 */
+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+			rf->cookie = lockdep_pin_lock(&rq->lock);
+			return rq;
+		}
+		raw_spin_unlock(&rq->lock);
+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
+
+		while (unlikely(task_on_rq_migrating(p)))
+			cpu_relax();
+	}
+}
+
 #ifdef CONFIG_SCHED_HRTICK
 /*
  * Use HR-timers to deliver accurate preemption points.
@@ -249,29 +314,6 @@
 	}
 }
 
-static int
-hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
-	int cpu = (int)(long)hcpu;
-
-	switch (action) {
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		hrtick_clear(cpu_rq(cpu));
-		return NOTIFY_OK;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static __init void init_hrtick(void)
-{
-	hotcpu_notifier(hotplug_hrtick, 0);
-}
 #else
 /*
  * Called to set the hrtick timer state.
@@ -288,10 +330,6 @@
 	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
 		      HRTIMER_MODE_REL_PINNED);
 }
-
-static inline void init_hrtick(void)
-{
-}
 #endif /* CONFIG_SMP */
 
 static void init_rq_hrtick(struct rq *rq)
@@ -315,10 +353,6 @@
 static inline void init_rq_hrtick(struct rq *rq)
 {
 }
-
-static inline void init_hrtick(void)
-{
-}
 #endif	/* CONFIG_SCHED_HRTICK */
 
 /*
@@ -400,7 +434,7 @@
 	 * wakeup due to that.
 	 *
 	 * This cmpxchg() implies a full barrier, which pairs with the write
-	 * barrier implied by the wakeup in wake_up_list().
+	 * barrier implied by the wakeup in wake_up_q().
 	 */
 	if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
 		return;
@@ -499,7 +533,10 @@
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
 		for_each_cpu(i, sched_domain_span(sd)) {
-			if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
+			if (cpu == i)
+				continue;
+
+			if (!idle_cpu(i) && is_housekeeping_cpu(i)) {
 				cpu = i;
 				goto unlock;
 			}
@@ -1085,12 +1122,20 @@
 static int __set_cpus_allowed_ptr(struct task_struct *p,
 				  const struct cpumask *new_mask, bool check)
 {
-	unsigned long flags;
-	struct rq *rq;
+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
 	unsigned int dest_cpu;
+	struct rq_flags rf;
+	struct rq *rq;
 	int ret = 0;
 
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
+
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * Kernel threads are allowed on online && !active CPUs
+		 */
+		cpu_valid_mask = cpu_online_mask;
+	}
 
 	/*
 	 * Must re-check here, to close a race against __kthread_bind(),
@@ -1104,22 +1149,32 @@
 	if (cpumask_equal(&p->cpus_allowed, new_mask))
 		goto out;
 
-	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+	if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	do_set_cpus_allowed(p, new_mask);
 
+	if (p->flags & PF_KTHREAD) {
+		/*
+		 * For kernel threads that do indeed end up on online &&
+		 * !active we want to ensure they are strict per-cpu threads.
+		 */
+		WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+			!cpumask_intersects(new_mask, cpu_active_mask) &&
+			p->nr_cpus_allowed != 1);
+	}
+
 	/* Can the task run on the task's current CPU? If so, we're done */
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
 	if (task_running(rq, p) || p->state == TASK_WAKING) {
 		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
-		task_rq_unlock(rq, p, &flags);
+		task_rq_unlock(rq, p, &rf);
 		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 		tlb_migrate_finish(p->mm);
 		return 0;
@@ -1128,12 +1183,12 @@
 		 * OK, since we're going to drop the lock immediately
 		 * afterwards anyway.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, rf.cookie);
 		rq = move_queued_task(rq, p, dest_cpu);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, rf.cookie);
 	}
 out:
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 
 	return ret;
 }
@@ -1317,8 +1372,8 @@
  */
 unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 {
-	unsigned long flags;
 	int running, queued;
+	struct rq_flags rf;
 	unsigned long ncsw;
 	struct rq *rq;
 
@@ -1353,14 +1408,14 @@
 		 * lock now, to be *sure*. If we're wrong, we'll
 		 * just go back and repeat.
 		 */
-		rq = task_rq_lock(p, &flags);
+		rq = task_rq_lock(p, &rf);
 		trace_sched_wait_task(p);
 		running = task_running(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
 		if (!match_state || p->state == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-		task_rq_unlock(rq, p, &flags);
+		task_rq_unlock(rq, p, &rf);
 
 		/*
 		 * If it changed from the expected state, bail out now.
@@ -1434,6 +1489,25 @@
 
 /*
  * ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ * A few notes on cpu_active vs cpu_online:
+ *
+ *  - cpu_active must be a subset of cpu_online
+ *
+ *  - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
+ *    see __set_cpus_allowed_ptr(). At this point the newly online
+ *    cpu isn't yet part of the sched domains, and balancing will not
+ *    see it.
+ *
+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
+ *    avoid the load balancer to place new tasks on the to be removed
+ *    cpu. Existing tasks will remain running there and will be taken
+ *    off.
+ *
+ * This means that fallback selection must not select !active CPUs.
+ * And can assume that any active CPU must be online. Conversely
+ * select_task_rq() below may allow selection of !active CPUs in order
+ * to satisfy the above rules.
  */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
@@ -1452,8 +1526,6 @@
 
 		/* Look for allowed, online CPU in same node. */
 		for_each_cpu(dest_cpu, nodemask) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1464,8 +1536,6 @@
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_online(dest_cpu))
-				continue;
 			if (!cpu_active(dest_cpu))
 				continue;
 			goto out;
@@ -1515,8 +1585,10 @@
 {
 	lockdep_assert_held(&p->pi_lock);
 
-	if (p->nr_cpus_allowed > 1)
+	if (tsk_nr_cpus_allowed(p) > 1)
 		cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+	else
+		cpu = cpumask_any(tsk_cpus_allowed(p));
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -1604,8 +1676,8 @@
 /*
  * Mark the task runnable and perform wakeup-preemption.
  */
-static void
-ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
+			   struct pin_cookie cookie)
 {
 	check_preempt_curr(rq, p, wake_flags);
 	p->state = TASK_RUNNING;
@@ -1617,9 +1689,9 @@
 		 * Our task @p is fully woken up and running; so its safe to
 		 * drop the rq->lock, hereafter rq is only used for statistics.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, cookie);
 		p->sched_class->task_woken(rq, p);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, cookie);
 	}
 
 	if (rq->idle_stamp) {
@@ -1637,17 +1709,23 @@
 }
 
 static void
-ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+		 struct pin_cookie cookie)
 {
+	int en_flags = ENQUEUE_WAKEUP;
+
 	lockdep_assert_held(&rq->lock);
 
 #ifdef CONFIG_SMP
 	if (p->sched_contributes_to_load)
 		rq->nr_uninterruptible--;
+
+	if (wake_flags & WF_MIGRATED)
+		en_flags |= ENQUEUE_MIGRATED;
 #endif
 
-	ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
-	ttwu_do_wakeup(rq, p, wake_flags);
+	ttwu_activate(rq, p, en_flags);
+	ttwu_do_wakeup(rq, p, wake_flags, cookie);
 }
 
 /*
@@ -1658,17 +1736,18 @@
  */
 static int ttwu_remote(struct task_struct *p, int wake_flags)
 {
+	struct rq_flags rf;
 	struct rq *rq;
 	int ret = 0;
 
-	rq = __task_rq_lock(p);
+	rq = __task_rq_lock(p, &rf);
 	if (task_on_rq_queued(p)) {
 		/* check_preempt_curr() may use rq clock */
 		update_rq_clock(rq);
-		ttwu_do_wakeup(rq, p, wake_flags);
+		ttwu_do_wakeup(rq, p, wake_flags, rf.cookie);
 		ret = 1;
 	}
-	__task_rq_unlock(rq);
+	__task_rq_unlock(rq, &rf);
 
 	return ret;
 }
@@ -1678,6 +1757,7 @@
 {
 	struct rq *rq = this_rq();
 	struct llist_node *llist = llist_del_all(&rq->wake_list);
+	struct pin_cookie cookie;
 	struct task_struct *p;
 	unsigned long flags;
 
@@ -1685,15 +1765,19 @@
 		return;
 
 	raw_spin_lock_irqsave(&rq->lock, flags);
-	lockdep_pin_lock(&rq->lock);
+	cookie = lockdep_pin_lock(&rq->lock);
 
 	while (llist) {
 		p = llist_entry(llist, struct task_struct, wake_entry);
 		llist = llist_next(llist);
-		ttwu_do_activate(rq, p, 0);
+		/*
+		 * See ttwu_queue(); we only call ttwu_queue_remote() when
+		 * its a x-cpu wakeup.
+		 */
+		ttwu_do_activate(rq, p, WF_MIGRATED, cookie);
 	}
 
-	lockdep_unpin_lock(&rq->lock);
+	lockdep_unpin_lock(&rq->lock, cookie);
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -1777,9 +1861,10 @@
 }
 #endif /* CONFIG_SMP */
 
-static void ttwu_queue(struct task_struct *p, int cpu)
+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 {
 	struct rq *rq = cpu_rq(cpu);
+	struct pin_cookie cookie;
 
 #if defined(CONFIG_SMP)
 	if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
@@ -1790,9 +1875,9 @@
 #endif
 
 	raw_spin_lock(&rq->lock);
-	lockdep_pin_lock(&rq->lock);
-	ttwu_do_activate(rq, p, 0);
-	lockdep_unpin_lock(&rq->lock);
+	cookie = lockdep_pin_lock(&rq->lock);
+	ttwu_do_activate(rq, p, wake_flags, cookie);
+	lockdep_unpin_lock(&rq->lock, cookie);
 	raw_spin_unlock(&rq->lock);
 }
 
@@ -1961,9 +2046,6 @@
 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
 	p->state = TASK_WAKING;
 
-	if (p->sched_class->task_waking)
-		p->sched_class->task_waking(p);
-
 	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
@@ -1971,7 +2053,7 @@
 	}
 #endif /* CONFIG_SMP */
 
-	ttwu_queue(p, cpu);
+	ttwu_queue(p, cpu, wake_flags);
 stat:
 	if (schedstat_enabled())
 		ttwu_stat(p, cpu, wake_flags);
@@ -1989,7 +2071,7 @@
  * ensure that this_rq() is locked, @p is bound to this_rq() and not
  * the current task.
  */
-static void try_to_wake_up_local(struct task_struct *p)
+static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie)
 {
 	struct rq *rq = task_rq(p);
 
@@ -2006,11 +2088,11 @@
 		 * disabled avoiding further scheduler activity on it and we've
 		 * not yet picked a replacement task.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, cookie);
 		raw_spin_unlock(&rq->lock);
 		raw_spin_lock(&p->pi_lock);
 		raw_spin_lock(&rq->lock);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, cookie);
 	}
 
 	if (!(p->state & TASK_NORMAL))
@@ -2021,7 +2103,7 @@
 	if (!task_on_rq_queued(p))
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
-	ttwu_do_wakeup(rq, p, 0);
+	ttwu_do_wakeup(rq, p, 0, cookie);
 	if (schedstat_enabled())
 		ttwu_stat(p, smp_processor_id(), 0);
 out:
@@ -2381,7 +2463,8 @@
 	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
 	int cpus, err = -1;
 
-	if (new_bw == p->dl.dl_bw)
+	/* !deadline task may carry old deadline bandwidth */
+	if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
 		return 0;
 
 	/*
@@ -2420,12 +2503,12 @@
  */
 void wake_up_new_task(struct task_struct *p)
 {
-	unsigned long flags;
+	struct rq_flags rf;
 	struct rq *rq;
 
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	/* Initialize new task's runnable average */
 	init_entity_runnable_average(&p->se);
+	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 #ifdef CONFIG_SMP
 	/*
 	 * Fork balancing, do it here and not earlier because:
@@ -2434,8 +2517,10 @@
 	 */
 	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
+	/* Post initialize new task's util average when its cfs_rq is set */
+	post_init_entity_util_avg(&p->se);
 
-	rq = __task_rq_lock(p);
+	rq = __task_rq_lock(p, &rf);
 	activate_task(rq, p, 0);
 	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
@@ -2446,12 +2531,12 @@
 		 * Nothing relies on rq->lock after this, so its fine to
 		 * drop it.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, rf.cookie);
 		p->sched_class->task_woken(rq, p);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, rf.cookie);
 	}
 #endif
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 }
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -2713,7 +2798,7 @@
  */
 static __always_inline struct rq *
 context_switch(struct rq *rq, struct task_struct *prev,
-	       struct task_struct *next)
+	       struct task_struct *next, struct pin_cookie cookie)
 {
 	struct mm_struct *mm, *oldmm;
 
@@ -2733,7 +2818,7 @@
 		atomic_inc(&oldmm->mm_count);
 		enter_lazy_tlb(oldmm, next);
 	} else
-		switch_mm(oldmm, mm, next);
+		switch_mm_irqs_off(oldmm, mm, next);
 
 	if (!prev->mm) {
 		prev->active_mm = NULL;
@@ -2745,7 +2830,7 @@
 	 * of the scheduler it's an obvious special-case), so we
 	 * do an early lockdep release here:
 	 */
-	lockdep_unpin_lock(&rq->lock);
+	lockdep_unpin_lock(&rq->lock, cookie);
 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 
 	/* Here we just switch the register state and the stack. */
@@ -2867,7 +2952,7 @@
  */
 unsigned long long task_sched_runtime(struct task_struct *p)
 {
-	unsigned long flags;
+	struct rq_flags rf;
 	struct rq *rq;
 	u64 ns;
 
@@ -2887,7 +2972,7 @@
 		return p->se.sum_exec_runtime;
 #endif
 
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
 	/*
 	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
 	 * project cycles that may never be accounted to this
@@ -2898,7 +2983,7 @@
 		p->sched_class->update_curr(rq);
 	}
 	ns = p->se.sum_exec_runtime;
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 
 	return ns;
 }
@@ -2918,7 +3003,7 @@
 	raw_spin_lock(&rq->lock);
 	update_rq_clock(rq);
 	curr->sched_class->task_tick(rq, curr, 0);
-	update_cpu_load_active(rq);
+	cpu_load_update_active(rq);
 	calc_global_load_tick(rq);
 	raw_spin_unlock(&rq->lock);
 
@@ -2961,6 +3046,20 @@
 
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_PREEMPT_TRACER))
+/*
+ * If the value passed in is equal to the current preempt count
+ * then we just disabled preemption. Start timing the latency.
+ */
+static inline void preempt_latency_start(int val)
+{
+	if (preempt_count() == val) {
+		unsigned long ip = get_lock_parent_ip();
+#ifdef CONFIG_DEBUG_PREEMPT
+		current->preempt_disable_ip = ip;
+#endif
+		trace_preempt_off(CALLER_ADDR0, ip);
+	}
+}
 
 void preempt_count_add(int val)
 {
@@ -2979,17 +3078,21 @@
 	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
 				PREEMPT_MASK - 10);
 #endif
-	if (preempt_count() == val) {
-		unsigned long ip = get_lock_parent_ip();
-#ifdef CONFIG_DEBUG_PREEMPT
-		current->preempt_disable_ip = ip;
-#endif
-		trace_preempt_off(CALLER_ADDR0, ip);
-	}
+	preempt_latency_start(val);
 }
 EXPORT_SYMBOL(preempt_count_add);
 NOKPROBE_SYMBOL(preempt_count_add);
 
+/*
+ * If the value passed in equals to the current preempt count
+ * then we just enabled preemption. Stop timing the latency.
+ */
+static inline void preempt_latency_stop(int val)
+{
+	if (preempt_count() == val)
+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+}
+
 void preempt_count_sub(int val)
 {
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -3006,13 +3109,15 @@
 		return;
 #endif
 
-	if (preempt_count() == val)
-		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+	preempt_latency_stop(val);
 	__preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
 NOKPROBE_SYMBOL(preempt_count_sub);
 
+#else
+static inline void preempt_latency_start(int val) { }
+static inline void preempt_latency_stop(int val) { }
 #endif
 
 /*
@@ -3065,7 +3170,7 @@
  * Pick up the highest-prio task:
  */
 static inline struct task_struct *
-pick_next_task(struct rq *rq, struct task_struct *prev)
+pick_next_task(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	const struct sched_class *class = &fair_sched_class;
 	struct task_struct *p;
@@ -3076,20 +3181,20 @@
 	 */
 	if (likely(prev->sched_class == class &&
 		   rq->nr_running == rq->cfs.h_nr_running)) {
-		p = fair_sched_class.pick_next_task(rq, prev);
+		p = fair_sched_class.pick_next_task(rq, prev, cookie);
 		if (unlikely(p == RETRY_TASK))
 			goto again;
 
 		/* assumes fair_sched_class->next == idle_sched_class */
 		if (unlikely(!p))
-			p = idle_sched_class.pick_next_task(rq, prev);
+			p = idle_sched_class.pick_next_task(rq, prev, cookie);
 
 		return p;
 	}
 
 again:
 	for_each_class(class) {
-		p = class->pick_next_task(rq, prev);
+		p = class->pick_next_task(rq, prev, cookie);
 		if (p) {
 			if (unlikely(p == RETRY_TASK))
 				goto again;
@@ -3143,6 +3248,7 @@
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
+	struct pin_cookie cookie;
 	struct rq *rq;
 	int cpu;
 
@@ -3176,7 +3282,7 @@
 	 */
 	smp_mb__before_spinlock();
 	raw_spin_lock(&rq->lock);
-	lockdep_pin_lock(&rq->lock);
+	cookie = lockdep_pin_lock(&rq->lock);
 
 	rq->clock_skip_update <<= 1; /* promote REQ to ACT */
 
@@ -3198,7 +3304,7 @@
 
 				to_wakeup = wq_worker_sleeping(prev);
 				if (to_wakeup)
-					try_to_wake_up_local(to_wakeup);
+					try_to_wake_up_local(to_wakeup, cookie);
 			}
 		}
 		switch_count = &prev->nvcsw;
@@ -3207,7 +3313,7 @@
 	if (task_on_rq_queued(prev))
 		update_rq_clock(rq);
 
-	next = pick_next_task(rq, prev);
+	next = pick_next_task(rq, prev, cookie);
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 	rq->clock_skip_update = 0;
@@ -3218,9 +3324,9 @@
 		++*switch_count;
 
 		trace_sched_switch(preempt, prev, next);
-		rq = context_switch(rq, prev, next); /* unlocks the rq */
+		rq = context_switch(rq, prev, next, cookie); /* unlocks the rq */
 	} else {
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, cookie);
 		raw_spin_unlock_irq(&rq->lock);
 	}
 
@@ -3287,8 +3393,23 @@
 static void __sched notrace preempt_schedule_common(void)
 {
 	do {
+		/*
+		 * Because the function tracer can trace preempt_count_sub()
+		 * and it also uses preempt_enable/disable_notrace(), if
+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
+		 * by the function tracer will call this function again and
+		 * cause infinite recursion.
+		 *
+		 * Preemption must be disabled here before the function
+		 * tracer can trace. Break up preempt_disable() into two
+		 * calls. One to disable preemption without fear of being
+		 * traced. The other to still record the preemption latency,
+		 * which can also be traced by the function tracer.
+		 */
 		preempt_disable_notrace();
+		preempt_latency_start(1);
 		__schedule(true);
+		preempt_latency_stop(1);
 		preempt_enable_no_resched_notrace();
 
 		/*
@@ -3340,7 +3461,21 @@
 		return;
 
 	do {
+		/*
+		 * Because the function tracer can trace preempt_count_sub()
+		 * and it also uses preempt_enable/disable_notrace(), if
+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
+		 * by the function tracer will call this function again and
+		 * cause infinite recursion.
+		 *
+		 * Preemption must be disabled here before the function
+		 * tracer can trace. Break up preempt_disable() into two
+		 * calls. One to disable preemption without fear of being
+		 * traced. The other to still record the preemption latency,
+		 * which can also be traced by the function tracer.
+		 */
 		preempt_disable_notrace();
+		preempt_latency_start(1);
 		/*
 		 * Needs preempt disabled in case user_exit() is traced
 		 * and the tracer calls preempt_enable_notrace() causing
@@ -3350,6 +3485,7 @@
 		__schedule(true);
 		exception_exit(prev_ctx);
 
+		preempt_latency_stop(1);
 		preempt_enable_no_resched_notrace();
 	} while (need_resched());
 }
@@ -3406,12 +3542,13 @@
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
 	int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
-	struct rq *rq;
 	const struct sched_class *prev_class;
+	struct rq_flags rf;
+	struct rq *rq;
 
 	BUG_ON(prio > MAX_PRIO);
 
-	rq = __task_rq_lock(p);
+	rq = __task_rq_lock(p, &rf);
 
 	/*
 	 * Idle task boosting is a nono in general. There is one
@@ -3487,7 +3624,7 @@
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
 	preempt_disable(); /* avoid rq from going away on us */
-	__task_rq_unlock(rq);
+	__task_rq_unlock(rq, &rf);
 
 	balance_callback(rq);
 	preempt_enable();
@@ -3497,7 +3634,7 @@
 void set_user_nice(struct task_struct *p, long nice)
 {
 	int old_prio, delta, queued;
-	unsigned long flags;
+	struct rq_flags rf;
 	struct rq *rq;
 
 	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
@@ -3506,7 +3643,7 @@
 	 * We have to be careful, if called from sys_setpriority(),
 	 * the task might be in the middle of scheduling on another CPU.
 	 */
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
 	/*
 	 * The RT priorities are set via sched_setscheduler(), but we still
 	 * allow the 'normal' nice value to be set - but as expected
@@ -3537,7 +3674,7 @@
 			resched_curr(rq);
 	}
 out_unlock:
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 }
 EXPORT_SYMBOL(set_user_nice);
 
@@ -3834,11 +3971,11 @@
 		      MAX_RT_PRIO - 1 - attr->sched_priority;
 	int retval, oldprio, oldpolicy = -1, queued, running;
 	int new_effective_prio, policy = attr->sched_policy;
-	unsigned long flags;
 	const struct sched_class *prev_class;
-	struct rq *rq;
+	struct rq_flags rf;
 	int reset_on_fork;
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
+	struct rq *rq;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
@@ -3933,13 +4070,13 @@
 	 * To be able to change p->policy safely, the appropriate
 	 * runqueue lock must be held.
 	 */
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
 
 	/*
 	 * Changing the policy of the stop threads its a very bad idea
 	 */
 	if (p == rq->stop) {
-		task_rq_unlock(rq, p, &flags);
+		task_rq_unlock(rq, p, &rf);
 		return -EINVAL;
 	}
 
@@ -3956,7 +4093,7 @@
 			goto change;
 
 		p->sched_reset_on_fork = reset_on_fork;
-		task_rq_unlock(rq, p, &flags);
+		task_rq_unlock(rq, p, &rf);
 		return 0;
 	}
 change:
@@ -3970,7 +4107,7 @@
 		if (rt_bandwidth_enabled() && rt_policy(policy) &&
 				task_group(p)->rt_bandwidth.rt_runtime == 0 &&
 				!task_group_is_autogroup(task_group(p))) {
-			task_rq_unlock(rq, p, &flags);
+			task_rq_unlock(rq, p, &rf);
 			return -EPERM;
 		}
 #endif
@@ -3985,7 +4122,7 @@
 			 */
 			if (!cpumask_subset(span, &p->cpus_allowed) ||
 			    rq->rd->dl_bw.bw == 0) {
-				task_rq_unlock(rq, p, &flags);
+				task_rq_unlock(rq, p, &rf);
 				return -EPERM;
 			}
 		}
@@ -3995,7 +4132,7 @@
 	/* recheck policy now with rq lock held */
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
-		task_rq_unlock(rq, p, &flags);
+		task_rq_unlock(rq, p, &rf);
 		goto recheck;
 	}
 
@@ -4005,7 +4142,7 @@
 	 * is available.
 	 */
 	if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) {
-		task_rq_unlock(rq, p, &flags);
+		task_rq_unlock(rq, p, &rf);
 		return -EBUSY;
 	}
 
@@ -4050,7 +4187,7 @@
 
 	check_class_changed(rq, p, prev_class, oldprio);
 	preempt_disable(); /* avoid rq from going away on us */
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 
 	if (pi)
 		rt_mutex_adjust_pi(p);
@@ -4903,10 +5040,10 @@
 {
 	struct task_struct *p;
 	unsigned int time_slice;
-	unsigned long flags;
+	struct rq_flags rf;
+	struct timespec t;
 	struct rq *rq;
 	int retval;
-	struct timespec t;
 
 	if (pid < 0)
 		return -EINVAL;
@@ -4921,11 +5058,11 @@
 	if (retval)
 		goto out_unlock;
 
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
 	time_slice = 0;
 	if (p->sched_class->get_rr_interval)
 		time_slice = p->sched_class->get_rr_interval(rq, p);
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 
 	rcu_read_unlock();
 	jiffies_to_timespec(time_slice, &t);
@@ -5001,7 +5138,8 @@
 	touch_all_softlockup_watchdogs();
 
 #ifdef CONFIG_SCHED_DEBUG
-	sysrq_sched_debug_show();
+	if (!state_filter)
+		sysrq_sched_debug_show();
 #endif
 	rcu_read_unlock();
 	/*
@@ -5163,6 +5301,8 @@
 
 #ifdef CONFIG_SMP
 
+static bool sched_smp_initialized __read_mostly;
+
 #ifdef CONFIG_NUMA_BALANCING
 /* Migrate current task p to target_cpu */
 int migrate_task_to(struct task_struct *p, int target_cpu)
@@ -5188,11 +5328,11 @@
  */
 void sched_setnuma(struct task_struct *p, int nid)
 {
-	struct rq *rq;
-	unsigned long flags;
 	bool queued, running;
+	struct rq_flags rf;
+	struct rq *rq;
 
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
 	queued = task_on_rq_queued(p);
 	running = task_current(rq, p);
 
@@ -5207,7 +5347,7 @@
 		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, ENQUEUE_RESTORE);
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
@@ -5223,7 +5363,7 @@
 	BUG_ON(cpu_online(smp_processor_id()));
 
 	if (mm != &init_mm) {
-		switch_mm(mm, &init_mm, current);
+		switch_mm_irqs_off(mm, &init_mm, current);
 		finish_arch_post_lock_switch();
 	}
 	mmdrop(mm);
@@ -5271,6 +5411,7 @@
 {
 	struct rq *rq = dead_rq;
 	struct task_struct *next, *stop = rq->stop;
+	struct pin_cookie cookie;
 	int dest_cpu;
 
 	/*
@@ -5302,8 +5443,8 @@
 		/*
 		 * pick_next_task assumes pinned rq->lock.
 		 */
-		lockdep_pin_lock(&rq->lock);
-		next = pick_next_task(rq, &fake_task);
+		cookie = lockdep_pin_lock(&rq->lock);
+		next = pick_next_task(rq, &fake_task, cookie);
 		BUG_ON(!next);
 		next->sched_class->put_prev_task(rq, next);
 
@@ -5316,7 +5457,7 @@
 		 * because !cpu_active at this point, which means load-balance
 		 * will not interfere. Also, stop-machine.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, cookie);
 		raw_spin_unlock(&rq->lock);
 		raw_spin_lock(&next->pi_lock);
 		raw_spin_lock(&rq->lock);
@@ -5377,127 +5518,13 @@
 	}
 }
 
-/*
- * migration_call - callback that gets triggered when a CPU is added.
- * Here we can start up the necessary migration thread for the new CPU.
- */
-static int
-migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static void set_cpu_rq_start_time(unsigned int cpu)
 {
-	int cpu = (long)hcpu;
-	unsigned long flags;
 	struct rq *rq = cpu_rq(cpu);
 
-	switch (action & ~CPU_TASKS_FROZEN) {
-
-	case CPU_UP_PREPARE:
-		rq->calc_load_update = calc_load_update;
-		account_reset_rq(rq);
-		break;
-
-	case CPU_ONLINE:
-		/* Update our root-domain */
-		raw_spin_lock_irqsave(&rq->lock, flags);
-		if (rq->rd) {
-			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-
-			set_rq_online(rq);
-		}
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-		break;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DYING:
-		sched_ttwu_pending();
-		/* Update our root-domain */
-		raw_spin_lock_irqsave(&rq->lock, flags);
-		if (rq->rd) {
-			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-			set_rq_offline(rq);
-		}
-		migrate_tasks(rq);
-		BUG_ON(rq->nr_running != 1); /* the migration thread */
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-		break;
-
-	case CPU_DEAD:
-		calc_load_migrate(rq);
-		break;
-#endif
-	}
-
-	update_max_interval();
-
-	return NOTIFY_OK;
-}
-
-/*
- * Register at high priority so that task migration (migrate_all_tasks)
- * happens before everything else.  This has to be lower priority than
- * the notifier in the perf_event subsystem, though.
- */
-static struct notifier_block migration_notifier = {
-	.notifier_call = migration_call,
-	.priority = CPU_PRI_MIGRATION,
-};
-
-static void set_cpu_rq_start_time(void)
-{
-	int cpu = smp_processor_id();
-	struct rq *rq = cpu_rq(cpu);
 	rq->age_stamp = sched_clock_cpu(cpu);
 }
 
-static int sched_cpu_active(struct notifier_block *nfb,
-				      unsigned long action, void *hcpu)
-{
-	int cpu = (long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_STARTING:
-		set_cpu_rq_start_time();
-		return NOTIFY_OK;
-
-	case CPU_DOWN_FAILED:
-		set_cpu_active(cpu, true);
-		return NOTIFY_OK;
-
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static int sched_cpu_inactive(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
-{
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_PREPARE:
-		set_cpu_active((long)hcpu, false);
-		return NOTIFY_OK;
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static int __init migration_init(void)
-{
-	void *cpu = (void *)(long)smp_processor_id();
-	int err;
-
-	/* Initialize migration for the boot CPU */
-	err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
-	BUG_ON(err == NOTIFY_BAD);
-	migration_call(&migration_notifier, CPU_ONLINE, cpu);
-	register_cpu_notifier(&migration_notifier);
-
-	/* Register cpu active notifiers */
-	cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
-	cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
-
-	return 0;
-}
-early_initcall(migration_init);
-
 static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
 
 #ifdef CONFIG_SCHED_DEBUG
@@ -6645,10 +6672,10 @@
 	init_numa_topology_type();
 }
 
-static void sched_domains_numa_masks_set(int cpu)
+static void sched_domains_numa_masks_set(unsigned int cpu)
 {
-	int i, j;
 	int node = cpu_to_node(cpu);
+	int i, j;
 
 	for (i = 0; i < sched_domains_numa_levels; i++) {
 		for (j = 0; j < nr_node_ids; j++) {
@@ -6658,51 +6685,20 @@
 	}
 }
 
-static void sched_domains_numa_masks_clear(int cpu)
+static void sched_domains_numa_masks_clear(unsigned int cpu)
 {
 	int i, j;
+
 	for (i = 0; i < sched_domains_numa_levels; i++) {
 		for (j = 0; j < nr_node_ids; j++)
 			cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
 	}
 }
 
-/*
- * Update sched_domains_numa_masks[level][node] array when new cpus
- * are onlined.
- */
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
-					   unsigned long action,
-					   void *hcpu)
-{
-	int cpu = (long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-		sched_domains_numa_masks_set(cpu);
-		break;
-
-	case CPU_DEAD:
-		sched_domains_numa_masks_clear(cpu);
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	return NOTIFY_OK;
-}
 #else
-static inline void sched_init_numa(void)
-{
-}
-
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
-					   unsigned long action,
-					   void *hcpu)
-{
-	return 0;
-}
+static inline void sched_init_numa(void) { }
+static void sched_domains_numa_masks_set(unsigned int cpu) { }
+static void sched_domains_numa_masks_clear(unsigned int cpu) { }
 #endif /* CONFIG_NUMA */
 
 static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -7092,13 +7088,9 @@
  * If we come here as part of a suspend/resume, don't touch cpusets because we
  * want to restore it back to its original state upon resume anyway.
  */
-static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
-			     void *hcpu)
+static void cpuset_cpu_active(void)
 {
-	switch (action) {
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_FAILED_FROZEN:
-
+	if (cpuhp_tasks_frozen) {
 		/*
 		 * num_cpus_frozen tracks how many CPUs are involved in suspend
 		 * resume sequence. As long as this is not the last online
@@ -7108,35 +7100,25 @@
 		num_cpus_frozen--;
 		if (likely(num_cpus_frozen)) {
 			partition_sched_domains(1, NULL, NULL);
-			break;
+			return;
 		}
-
 		/*
 		 * This is the last CPU online operation. So fall through and
 		 * restore the original sched domains by considering the
 		 * cpuset configurations.
 		 */
-
-	case CPU_ONLINE:
-		cpuset_update_active_cpus(true);
-		break;
-	default:
-		return NOTIFY_DONE;
 	}
-	return NOTIFY_OK;
+	cpuset_update_active_cpus(true);
 }
 
-static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
-			       void *hcpu)
+static int cpuset_cpu_inactive(unsigned int cpu)
 {
 	unsigned long flags;
-	long cpu = (long)hcpu;
 	struct dl_bw *dl_b;
 	bool overflow;
 	int cpus;
 
-	switch (action) {
-	case CPU_DOWN_PREPARE:
+	if (!cpuhp_tasks_frozen) {
 		rcu_read_lock_sched();
 		dl_b = dl_bw_of(cpu);
 
@@ -7148,19 +7130,120 @@
 		rcu_read_unlock_sched();
 
 		if (overflow)
-			return notifier_from_errno(-EBUSY);
+			return -EBUSY;
 		cpuset_update_active_cpus(false);
-		break;
-	case CPU_DOWN_PREPARE_FROZEN:
+	} else {
 		num_cpus_frozen++;
 		partition_sched_domains(1, NULL, NULL);
-		break;
-	default:
-		return NOTIFY_DONE;
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
+int sched_cpu_activate(unsigned int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	set_cpu_active(cpu, true);
+
+	if (sched_smp_initialized) {
+		sched_domains_numa_masks_set(cpu);
+		cpuset_cpu_active();
+	}
+
+	/*
+	 * Put the rq online, if not already. This happens:
+	 *
+	 * 1) In the early boot process, because we build the real domains
+	 *    after all cpus have been brought up.
+	 *
+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
+	 *    domains.
+	 */
+	raw_spin_lock_irqsave(&rq->lock, flags);
+	if (rq->rd) {
+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+		set_rq_online(rq);
+	}
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+	update_max_interval();
+
+	return 0;
+}
+
+int sched_cpu_deactivate(unsigned int cpu)
+{
+	int ret;
+
+	set_cpu_active(cpu, false);
+	/*
+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
+	 * users of this state to go away such that all new such users will
+	 * observe it.
+	 *
+	 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
+	 * not imply sync_sched(), so wait for both.
+	 *
+	 * Do sync before park smpboot threads to take care the rcu boost case.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT))
+		synchronize_rcu_mult(call_rcu, call_rcu_sched);
+	else
+		synchronize_rcu();
+
+	if (!sched_smp_initialized)
+		return 0;
+
+	ret = cpuset_cpu_inactive(cpu);
+	if (ret) {
+		set_cpu_active(cpu, true);
+		return ret;
+	}
+	sched_domains_numa_masks_clear(cpu);
+	return 0;
+}
+
+static void sched_rq_cpu_starting(unsigned int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	rq->calc_load_update = calc_load_update;
+	account_reset_rq(rq);
+	update_max_interval();
+}
+
+int sched_cpu_starting(unsigned int cpu)
+{
+	set_cpu_rq_start_time(cpu);
+	sched_rq_cpu_starting(cpu);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+int sched_cpu_dying(unsigned int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	/* Handle pending wakeups and then migrate everything off */
+	sched_ttwu_pending();
+	raw_spin_lock_irqsave(&rq->lock, flags);
+	if (rq->rd) {
+		BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+		set_rq_offline(rq);
+	}
+	migrate_tasks(rq);
+	BUG_ON(rq->nr_running != 1);
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	calc_load_migrate(rq);
+	update_max_interval();
+	nohz_balance_exit_idle(cpu);
+	hrtick_clear(rq);
+	return 0;
+}
+#endif
+
 void __init sched_init_smp(void)
 {
 	cpumask_var_t non_isolated_cpus;
@@ -7182,12 +7265,6 @@
 		cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
 	mutex_unlock(&sched_domains_mutex);
 
-	hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
-	hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
-	hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
-
-	init_hrtick();
-
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
 		BUG();
@@ -7196,7 +7273,16 @@
 
 	init_sched_rt_class();
 	init_sched_dl_class();
+	sched_smp_initialized = true;
 }
+
+static int __init migration_init(void)
+{
+	sched_rq_cpu_starting(smp_processor_id());
+	return 0;
+}
+early_initcall(migration_init);
+
 #else
 void __init sched_init_smp(void)
 {
@@ -7331,8 +7417,6 @@
 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
 			rq->cpu_load[j] = 0;
 
-		rq->last_load_update_tick = jiffies;
-
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
@@ -7351,12 +7435,13 @@
 
 		rq_attach_root(rq, &def_root_domain);
 #ifdef CONFIG_NO_HZ_COMMON
+		rq->last_load_update_tick = jiffies;
 		rq->nohz_flags = 0;
 #endif
 #ifdef CONFIG_NO_HZ_FULL
 		rq->last_sched_tick = 0;
 #endif
-#endif
+#endif /* CONFIG_SMP */
 		init_rq_hrtick(rq);
 		atomic_set(&rq->nr_iowait, 0);
 	}
@@ -7394,7 +7479,7 @@
 	if (cpu_isolated_map == NULL)
 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 	idle_thread_set_boot_cpu();
-	set_cpu_rq_start_time();
+	set_cpu_rq_start_time(smp_processor_id());
 #endif
 	init_sched_fair_class();
 
@@ -7639,10 +7724,10 @@
 {
 	struct task_group *tg;
 	int queued, running;
-	unsigned long flags;
+	struct rq_flags rf;
 	struct rq *rq;
 
-	rq = task_rq_lock(tsk, &flags);
+	rq = task_rq_lock(tsk, &rf);
 
 	running = task_current(rq, tsk);
 	queued = task_on_rq_queued(tsk);
@@ -7674,7 +7759,7 @@
 	if (queued)
 		enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
 
-	task_rq_unlock(rq, tsk, &flags);
+	task_rq_unlock(rq, tsk, &rf);
 }
 #endif /* CONFIG_CGROUP_SCHED */
 
@@ -7894,7 +7979,7 @@
 static int sched_rt_global_constraints(void)
 {
 	unsigned long flags;
-	int i, ret = 0;
+	int i;
 
 	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
 	for_each_possible_cpu(i) {
@@ -7906,7 +7991,7 @@
 	}
 	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
 
-	return ret;
+	return 0;
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 

diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 4a81120..41f85c4 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c

@@ -25,11 +25,22 @@
 	CPUACCT_STAT_NSTATS,
 };
 
+enum cpuacct_usage_index {
+	CPUACCT_USAGE_USER,	/* ... user mode */
+	CPUACCT_USAGE_SYSTEM,	/* ... kernel mode */
+
+	CPUACCT_USAGE_NRUSAGE,
+};
+
+struct cpuacct_usage {
+	u64	usages[CPUACCT_USAGE_NRUSAGE];
+};
+
 /* track cpu usage of a group of tasks and its child groups */
 struct cpuacct {
 	struct cgroup_subsys_state css;
 	/* cpuusage holds pointer to a u64-type object on every cpu */
-	u64 __percpu *cpuusage;
+	struct cpuacct_usage __percpu *cpuusage;
 	struct kernel_cpustat __percpu *cpustat;
 };
 
@@ -49,7 +60,7 @@
 	return css_ca(ca->css.parent);
 }
 
-static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
+static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
 static struct cpuacct root_cpuacct = {
 	.cpustat	= &kernel_cpustat,
 	.cpuusage	= &root_cpuacct_cpuusage,
@@ -68,7 +79,7 @@
 	if (!ca)
 		goto out;
 
-	ca->cpuusage = alloc_percpu(u64);
+	ca->cpuusage = alloc_percpu(struct cpuacct_usage);
 	if (!ca->cpuusage)
 		goto out_free_ca;
 
@@ -96,20 +107,37 @@
 	kfree(ca);
 }
 
-static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
+				 enum cpuacct_usage_index index)
 {
-	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
 	u64 data;
 
+	/*
+	 * We allow index == CPUACCT_USAGE_NRUSAGE here to read
+	 * the sum of suages.
+	 */
+	BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
+
 #ifndef CONFIG_64BIT
 	/*
 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
 	 */
 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
-	data = *cpuusage;
+#endif
+
+	if (index == CPUACCT_USAGE_NRUSAGE) {
+		int i = 0;
+
+		data = 0;
+		for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+			data += cpuusage->usages[i];
+	} else {
+		data = cpuusage->usages[index];
+	}
+
+#ifndef CONFIG_64BIT
 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
-#else
-	data = *cpuusage;
 #endif
 
 	return data;
@@ -117,69 +145,103 @@
 
 static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
 {
-	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+	struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+	int i;
 
 #ifndef CONFIG_64BIT
 	/*
 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
 	 */
 	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
-	*cpuusage = val;
+#endif
+
+	for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+		cpuusage->usages[i] = val;
+
+#ifndef CONFIG_64BIT
 	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
-#else
-	*cpuusage = val;
 #endif
 }
 
 /* return total cpu usage (in nanoseconds) of a group */
-static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
+static u64 __cpuusage_read(struct cgroup_subsys_state *css,
+			   enum cpuacct_usage_index index)
 {
 	struct cpuacct *ca = css_ca(css);
 	u64 totalcpuusage = 0;
 	int i;
 
-	for_each_present_cpu(i)
-		totalcpuusage += cpuacct_cpuusage_read(ca, i);
+	for_each_possible_cpu(i)
+		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
 
 	return totalcpuusage;
 }
 
+static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
+			      struct cftype *cft)
+{
+	return __cpuusage_read(css, CPUACCT_USAGE_USER);
+}
+
+static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
+			     struct cftype *cft)
+{
+	return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
+}
+
+static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
+}
+
 static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			  u64 val)
 {
 	struct cpuacct *ca = css_ca(css);
-	int err = 0;
-	int i;
+	int cpu;
 
 	/*
 	 * Only allow '0' here to do a reset.
 	 */
-	if (val) {
-		err = -EINVAL;
-		goto out;
-	}
+	if (val)
+		return -EINVAL;
 
-	for_each_present_cpu(i)
-		cpuacct_cpuusage_write(ca, i, 0);
+	for_each_possible_cpu(cpu)
+		cpuacct_cpuusage_write(ca, cpu, 0);
 
-out:
-	return err;
+	return 0;
 }
 
-static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+static int __cpuacct_percpu_seq_show(struct seq_file *m,
+				     enum cpuacct_usage_index index)
 {
 	struct cpuacct *ca = css_ca(seq_css(m));
 	u64 percpu;
 	int i;
 
-	for_each_present_cpu(i) {
-		percpu = cpuacct_cpuusage_read(ca, i);
+	for_each_possible_cpu(i) {
+		percpu = cpuacct_cpuusage_read(ca, i, index);
 		seq_printf(m, "%llu ", (unsigned long long) percpu);
 	}
 	seq_printf(m, "\n");
 	return 0;
 }
 
+static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
+{
+	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
+}
+
+static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
+{
+	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
+}
+
+static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+{
+	return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+}
+
 static const char * const cpuacct_stat_desc[] = {
 	[CPUACCT_STAT_USER] = "user",
 	[CPUACCT_STAT_SYSTEM] = "system",
@@ -191,7 +253,7 @@
 	int cpu;
 	s64 val = 0;
 
-	for_each_online_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
 		val += kcpustat->cpustat[CPUTIME_USER];
 		val += kcpustat->cpustat[CPUTIME_NICE];
@@ -200,7 +262,7 @@
 	seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
 
 	val = 0;
-	for_each_online_cpu(cpu) {
+	for_each_possible_cpu(cpu) {
 		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
 		val += kcpustat->cpustat[CPUTIME_SYSTEM];
 		val += kcpustat->cpustat[CPUTIME_IRQ];
@@ -220,10 +282,26 @@
 		.write_u64 = cpuusage_write,
 	},
 	{
+		.name = "usage_user",
+		.read_u64 = cpuusage_user_read,
+	},
+	{
+		.name = "usage_sys",
+		.read_u64 = cpuusage_sys_read,
+	},
+	{
 		.name = "usage_percpu",
 		.seq_show = cpuacct_percpu_seq_show,
 	},
 	{
+		.name = "usage_percpu_user",
+		.seq_show = cpuacct_percpu_user_seq_show,
+	},
+	{
+		.name = "usage_percpu_sys",
+		.seq_show = cpuacct_percpu_sys_seq_show,
+	},
+	{
 		.name = "stat",
 		.seq_show = cpuacct_stats_show,
 	},
@@ -238,10 +316,17 @@
 void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 {
 	struct cpuacct *ca;
+	int index = CPUACCT_USAGE_SYSTEM;
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	if (regs && user_mode(regs))
+		index = CPUACCT_USAGE_USER;
 
 	rcu_read_lock();
+
 	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
-		*this_cpu_ptr(ca->cpuusage) += cputime;
+		this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
+
 	rcu_read_unlock();
 }
 

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5a75b08..5be5882 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c

@@ -103,10 +103,10 @@
 	const struct sched_dl_entity *dl_se = &p->dl;
 
 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+	    cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
 		best_cpu = cpumask_any(later_mask);
 		goto out;
-	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
+	} else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
 			dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
 		best_cpu = cpudl_maximum(cp);
 		if (later_mask)

diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 928c4ba..1141954 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c

@@ -14,24 +14,50 @@
 DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
 /**
- * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
+ * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer.
  * @cpu: The CPU to set the pointer for.
  * @data: New pointer value.
+ * @func: Callback function to set for the CPU.
  *
- * Set and publish the update_util_data pointer for the given CPU.  That pointer
- * points to a struct update_util_data object containing a callback function
- * to call from cpufreq_update_util().  That function will be called from an RCU
- * read-side critical section, so it must not sleep.
+ * Set and publish the update_util_data pointer for the given CPU.
+ *
+ * The update_util_data pointer of @cpu is set to @data and the callback
+ * function pointer in the target struct update_util_data is set to @func.
+ * That function will be called by cpufreq_update_util() from RCU-sched
+ * read-side critical sections, so it must not sleep.  @data will always be
+ * passed to it as the first argument which allows the function to get to the
+ * target update_util_data structure and its container.
+ *
+ * The update_util_data pointer of @cpu must be NULL when this function is
+ * called or it will WARN() and return with no effect.
+ */
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+			void (*func)(struct update_util_data *data, u64 time,
+				     unsigned long util, unsigned long max))
+{
+	if (WARN_ON(!data || !func))
+		return;
+
+	if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
+		return;
+
+	data->func = func;
+	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+}
+EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
+
+/**
+ * cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer.
+ * @cpu: The CPU to clear the pointer for.
+ *
+ * Clear the update_util_data pointer for the given CPU.
  *
  * Callers must use RCU-sched callbacks to free any memory that might be
  * accessed via the old update_util_data pointer or invoke synchronize_sched()
  * right after this function to avoid use-after-free.
  */
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
+void cpufreq_remove_update_util_hook(int cpu)
 {
-	if (WARN_ON(data && !data->func))
-		return;
-
-	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
 }
-EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
+EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
new file mode 100644
index 0000000..154ae3a
--- /dev/null
+++ b/kernel/sched/cpufreq_schedutil.c

@@ -0,0 +1,530 @@
+/*
+ * CPUFreq governor based on scheduler-provided CPU utilization data.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <trace/events/power.h>
+
+#include "sched.h"
+
+struct sugov_tunables {
+	struct gov_attr_set attr_set;
+	unsigned int rate_limit_us;
+};
+
+struct sugov_policy {
+	struct cpufreq_policy *policy;
+
+	struct sugov_tunables *tunables;
+	struct list_head tunables_hook;
+
+	raw_spinlock_t update_lock;  /* For shared policies */
+	u64 last_freq_update_time;
+	s64 freq_update_delay_ns;
+	unsigned int next_freq;
+
+	/* The next fields are only needed if fast switch cannot be used. */
+	struct irq_work irq_work;
+	struct work_struct work;
+	struct mutex work_lock;
+	bool work_in_progress;
+
+	bool need_freq_update;
+};
+
+struct sugov_cpu {
+	struct update_util_data update_util;
+	struct sugov_policy *sg_policy;
+
+	/* The fields below are only needed when sharing a policy. */
+	unsigned long util;
+	unsigned long max;
+	u64 last_update;
+};
+
+static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
+
+/************************ Governor internals ***********************/
+
+static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
+{
+	s64 delta_ns;
+
+	if (sg_policy->work_in_progress)
+		return false;
+
+	if (unlikely(sg_policy->need_freq_update)) {
+		sg_policy->need_freq_update = false;
+		/*
+		 * This happens when limits change, so forget the previous
+		 * next_freq value and force an update.
+		 */
+		sg_policy->next_freq = UINT_MAX;
+		return true;
+	}
+
+	delta_ns = time - sg_policy->last_freq_update_time;
+	return delta_ns >= sg_policy->freq_update_delay_ns;
+}
+
+static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
+				unsigned int next_freq)
+{
+	struct cpufreq_policy *policy = sg_policy->policy;
+
+	sg_policy->last_freq_update_time = time;
+
+	if (policy->fast_switch_enabled) {
+		if (sg_policy->next_freq == next_freq) {
+			trace_cpu_frequency(policy->cur, smp_processor_id());
+			return;
+		}
+		sg_policy->next_freq = next_freq;
+		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
+		if (next_freq == CPUFREQ_ENTRY_INVALID)
+			return;
+
+		policy->cur = next_freq;
+		trace_cpu_frequency(next_freq, smp_processor_id());
+	} else if (sg_policy->next_freq != next_freq) {
+		sg_policy->next_freq = next_freq;
+		sg_policy->work_in_progress = true;
+		irq_work_queue(&sg_policy->irq_work);
+	}
+}
+
+/**
+ * get_next_freq - Compute a new frequency for a given cpufreq policy.
+ * @policy: cpufreq policy object to compute the new frequency for.
+ * @util: Current CPU utilization.
+ * @max: CPU capacity.
+ *
+ * If the utilization is frequency-invariant, choose the new frequency to be
+ * proportional to it, that is
+ *
+ * next_freq = C * max_freq * util / max
+ *
+ * Otherwise, approximate the would-be frequency-invariant utilization by
+ * util_raw * (curr_freq / max_freq) which leads to
+ *
+ * next_freq = C * curr_freq * util_raw / max
+ *
+ * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
+ */
+static unsigned int get_next_freq(struct cpufreq_policy *policy,
+				  unsigned long util, unsigned long max)
+{
+	unsigned int freq = arch_scale_freq_invariant() ?
+				policy->cpuinfo.max_freq : policy->cur;
+
+	return (freq + (freq >> 2)) * util / max;
+}
+
+static void sugov_update_single(struct update_util_data *hook, u64 time,
+				unsigned long util, unsigned long max)
+{
+	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	struct cpufreq_policy *policy = sg_policy->policy;
+	unsigned int next_f;
+
+	if (!sugov_should_update_freq(sg_policy, time))
+		return;
+
+	next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
+			get_next_freq(policy, util, max);
+	sugov_update_commit(sg_policy, time, next_f);
+}
+
+static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
+					   unsigned long util, unsigned long max)
+{
+	struct cpufreq_policy *policy = sg_policy->policy;
+	unsigned int max_f = policy->cpuinfo.max_freq;
+	u64 last_freq_update_time = sg_policy->last_freq_update_time;
+	unsigned int j;
+
+	if (util == ULONG_MAX)
+		return max_f;
+
+	for_each_cpu(j, policy->cpus) {
+		struct sugov_cpu *j_sg_cpu;
+		unsigned long j_util, j_max;
+		s64 delta_ns;
+
+		if (j == smp_processor_id())
+			continue;
+
+		j_sg_cpu = &per_cpu(sugov_cpu, j);
+		/*
+		 * If the CPU utilization was last updated before the previous
+		 * frequency update and the time elapsed between the last update
+		 * of the CPU utilization and the last frequency update is long
+		 * enough, don't take the CPU into account as it probably is
+		 * idle now.
+		 */
+		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		if (delta_ns > TICK_NSEC)
+			continue;
+
+		j_util = j_sg_cpu->util;
+		if (j_util == ULONG_MAX)
+			return max_f;
+
+		j_max = j_sg_cpu->max;
+		if (j_util * max > j_max * util) {
+			util = j_util;
+			max = j_max;
+		}
+	}
+
+	return get_next_freq(policy, util, max);
+}
+
+static void sugov_update_shared(struct update_util_data *hook, u64 time,
+				unsigned long util, unsigned long max)
+{
+	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	unsigned int next_f;
+
+	raw_spin_lock(&sg_policy->update_lock);
+
+	sg_cpu->util = util;
+	sg_cpu->max = max;
+	sg_cpu->last_update = time;
+
+	if (sugov_should_update_freq(sg_policy, time)) {
+		next_f = sugov_next_freq_shared(sg_policy, util, max);
+		sugov_update_commit(sg_policy, time, next_f);
+	}
+
+	raw_spin_unlock(&sg_policy->update_lock);
+}
+
+static void sugov_work(struct work_struct *work)
+{
+	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
+
+	mutex_lock(&sg_policy->work_lock);
+	__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
+				CPUFREQ_RELATION_L);
+	mutex_unlock(&sg_policy->work_lock);
+
+	sg_policy->work_in_progress = false;
+}
+
+static void sugov_irq_work(struct irq_work *irq_work)
+{
+	struct sugov_policy *sg_policy;
+
+	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
+	schedule_work_on(smp_processor_id(), &sg_policy->work);
+}
+
+/************************** sysfs interface ************************/
+
+static struct sugov_tunables *global_tunables;
+static DEFINE_MUTEX(global_tunables_lock);
+
+static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
+{
+	return container_of(attr_set, struct sugov_tunables, attr_set);
+}
+
+static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+	return sprintf(buf, "%u\n", tunables->rate_limit_us);
+}
+
+static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
+				   size_t count)
+{
+	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+	struct sugov_policy *sg_policy;
+	unsigned int rate_limit_us;
+
+	if (kstrtouint(buf, 10, &rate_limit_us))
+		return -EINVAL;
+
+	tunables->rate_limit_us = rate_limit_us;
+
+	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
+
+	return count;
+}
+
+static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+
+static struct attribute *sugov_attributes[] = {
+	&rate_limit_us.attr,
+	NULL
+};
+
+static struct kobj_type sugov_tunables_ktype = {
+	.default_attrs = sugov_attributes,
+	.sysfs_ops = &governor_sysfs_ops,
+};
+
+/********************** cpufreq governor interface *********************/
+
+static struct cpufreq_governor schedutil_gov;
+
+static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy;
+
+	sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
+	if (!sg_policy)
+		return NULL;
+
+	sg_policy->policy = policy;
+	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
+	INIT_WORK(&sg_policy->work, sugov_work);
+	mutex_init(&sg_policy->work_lock);
+	raw_spin_lock_init(&sg_policy->update_lock);
+	return sg_policy;
+}
+
+static void sugov_policy_free(struct sugov_policy *sg_policy)
+{
+	mutex_destroy(&sg_policy->work_lock);
+	kfree(sg_policy);
+}
+
+static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
+{
+	struct sugov_tunables *tunables;
+
+	tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
+	if (tunables) {
+		gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
+		if (!have_governor_per_policy())
+			global_tunables = tunables;
+	}
+	return tunables;
+}
+
+static void sugov_tunables_free(struct sugov_tunables *tunables)
+{
+	if (!have_governor_per_policy())
+		global_tunables = NULL;
+
+	kfree(tunables);
+}
+
+static int sugov_init(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy;
+	struct sugov_tunables *tunables;
+	unsigned int lat;
+	int ret = 0;
+
+	/* State should be equivalent to EXIT */
+	if (policy->governor_data)
+		return -EBUSY;
+
+	sg_policy = sugov_policy_alloc(policy);
+	if (!sg_policy)
+		return -ENOMEM;
+
+	mutex_lock(&global_tunables_lock);
+
+	if (global_tunables) {
+		if (WARN_ON(have_governor_per_policy())) {
+			ret = -EINVAL;
+			goto free_sg_policy;
+		}
+		policy->governor_data = sg_policy;
+		sg_policy->tunables = global_tunables;
+
+		gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
+		goto out;
+	}
+
+	tunables = sugov_tunables_alloc(sg_policy);
+	if (!tunables) {
+		ret = -ENOMEM;
+		goto free_sg_policy;
+	}
+
+	tunables->rate_limit_us = LATENCY_MULTIPLIER;
+	lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+	if (lat)
+		tunables->rate_limit_us *= lat;
+
+	policy->governor_data = sg_policy;
+	sg_policy->tunables = tunables;
+
+	ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
+				   get_governor_parent_kobj(policy), "%s",
+				   schedutil_gov.name);
+	if (ret)
+		goto fail;
+
+ out:
+	mutex_unlock(&global_tunables_lock);
+
+	cpufreq_enable_fast_switch(policy);
+	return 0;
+
+ fail:
+	policy->governor_data = NULL;
+	sugov_tunables_free(tunables);
+
+ free_sg_policy:
+	mutex_unlock(&global_tunables_lock);
+
+	sugov_policy_free(sg_policy);
+	pr_err("cpufreq: schedutil governor initialization failed (error %d)\n", ret);
+	return ret;
+}
+
+static int sugov_exit(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+	struct sugov_tunables *tunables = sg_policy->tunables;
+	unsigned int count;
+
+	cpufreq_disable_fast_switch(policy);
+
+	mutex_lock(&global_tunables_lock);
+
+	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
+	policy->governor_data = NULL;
+	if (!count)
+		sugov_tunables_free(tunables);
+
+	mutex_unlock(&global_tunables_lock);
+
+	sugov_policy_free(sg_policy);
+	return 0;
+}
+
+static int sugov_start(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+	unsigned int cpu;
+
+	sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
+	sg_policy->last_freq_update_time = 0;
+	sg_policy->next_freq = UINT_MAX;
+	sg_policy->work_in_progress = false;
+	sg_policy->need_freq_update = false;
+
+	for_each_cpu(cpu, policy->cpus) {
+		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+
+		sg_cpu->sg_policy = sg_policy;
+		if (policy_is_shared(policy)) {
+			sg_cpu->util = ULONG_MAX;
+			sg_cpu->max = 0;
+			sg_cpu->last_update = 0;
+			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+						     sugov_update_shared);
+		} else {
+			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+						     sugov_update_single);
+		}
+	}
+	return 0;
+}
+
+static int sugov_stop(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+	unsigned int cpu;
+
+	for_each_cpu(cpu, policy->cpus)
+		cpufreq_remove_update_util_hook(cpu);
+
+	synchronize_sched();
+
+	irq_work_sync(&sg_policy->irq_work);
+	cancel_work_sync(&sg_policy->work);
+	return 0;
+}
+
+static int sugov_limits(struct cpufreq_policy *policy)
+{
+	struct sugov_policy *sg_policy = policy->governor_data;
+
+	if (!policy->fast_switch_enabled) {
+		mutex_lock(&sg_policy->work_lock);
+
+		if (policy->max < policy->cur)
+			__cpufreq_driver_target(policy, policy->max,
+						CPUFREQ_RELATION_H);
+		else if (policy->min > policy->cur)
+			__cpufreq_driver_target(policy, policy->min,
+						CPUFREQ_RELATION_L);
+
+		mutex_unlock(&sg_policy->work_lock);
+	}
+
+	sg_policy->need_freq_update = true;
+	return 0;
+}
+
+int sugov_governor(struct cpufreq_policy *policy, unsigned int event)
+{
+	if (event == CPUFREQ_GOV_POLICY_INIT) {
+		return sugov_init(policy);
+	} else if (policy->governor_data) {
+		switch (event) {
+		case CPUFREQ_GOV_POLICY_EXIT:
+			return sugov_exit(policy);
+		case CPUFREQ_GOV_START:
+			return sugov_start(policy);
+		case CPUFREQ_GOV_STOP:
+			return sugov_stop(policy);
+		case CPUFREQ_GOV_LIMITS:
+			return sugov_limits(policy);
+		}
+	}
+	return -EINVAL;
+}
+
+static struct cpufreq_governor schedutil_gov = {
+	.name = "schedutil",
+	.governor = sugov_governor,
+	.owner = THIS_MODULE,
+};
+
+static int __init sugov_module_init(void)
+{
+	return cpufreq_register_governor(&schedutil_gov);
+}
+
+static void __exit sugov_module_exit(void)
+{
+	cpufreq_unregister_governor(&schedutil_gov);
+}
+
+MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
+MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+	return &schedutil_gov;
+}
+
+fs_initcall(sugov_module_init);
+#else
+module_init(sugov_module_init);
+#endif
+module_exit(sugov_module_exit);

diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 981fcd7..11e9705 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c

@@ -103,11 +103,11 @@
 		if (skip)
 			continue;
 
-		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+		if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
 			continue;
 
 		if (lowest_mask) {
-			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+			cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
 
 			/*
 			 * We have to ensure that we have at least one bit

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 686ec8a..fcb7f02 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c

@@ -134,7 +134,7 @@
 {
 	struct task_struct *p = dl_task_of(dl_se);
 
-	if (p->nr_cpus_allowed > 1)
+	if (tsk_nr_cpus_allowed(p) > 1)
 		dl_rq->dl_nr_migratory++;
 
 	update_dl_migration(dl_rq);
@@ -144,7 +144,7 @@
 {
 	struct task_struct *p = dl_task_of(dl_se);
 
-	if (p->nr_cpus_allowed > 1)
+	if (tsk_nr_cpus_allowed(p) > 1)
 		dl_rq->dl_nr_migratory--;
 
 	update_dl_migration(dl_rq);
@@ -591,10 +591,10 @@
 						     struct sched_dl_entity,
 						     dl_timer);
 	struct task_struct *p = dl_task_of(dl_se);
-	unsigned long flags;
+	struct rq_flags rf;
 	struct rq *rq;
 
-	rq = task_rq_lock(p, &flags);
+	rq = task_rq_lock(p, &rf);
 
 	/*
 	 * The task might have changed its scheduling policy to something
@@ -670,14 +670,14 @@
 		 * Nothing relies on rq->lock after this, so its safe to drop
 		 * rq->lock.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, rf.cookie);
 		push_dl_task(rq);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, rf.cookie);
 	}
 #endif
 
 unlock:
-	task_rq_unlock(rq, p, &flags);
+	task_rq_unlock(rq, p, &rf);
 
 	/*
 	 * This can free the task_struct, including this hrtimer, do not touch
@@ -717,10 +717,6 @@
 	if (!dl_task(curr) || !on_dl_rq(dl_se))
 		return;
 
-	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_trigger_update(rq_clock(rq));
-
 	/*
 	 * Consumed budget is computed considering the time as
 	 * observed by schedulable tasks (excluding time spent
@@ -736,6 +732,10 @@
 		return;
 	}
 
+	/* kick cpufreq (see the comment in linux/cpufreq.h). */
+	if (cpu_of(rq) == smp_processor_id())
+		cpufreq_trigger_update(rq_clock(rq));
+
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
 
@@ -966,7 +966,7 @@
 
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 
-	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 
@@ -1040,9 +1040,9 @@
 	 * try to make it stay here, it might be important.
 	 */
 	if (unlikely(dl_task(curr)) &&
-	    (curr->nr_cpus_allowed < 2 ||
+	    (tsk_nr_cpus_allowed(curr) < 2 ||
 	     !dl_entity_preempt(&p->dl, &curr->dl)) &&
-	    (p->nr_cpus_allowed > 1)) {
+	    (tsk_nr_cpus_allowed(p) > 1)) {
 		int target = find_later_rq(p);
 
 		if (target != -1 &&
@@ -1063,7 +1063,7 @@
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
-	if (rq->curr->nr_cpus_allowed == 1 ||
+	if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
 	    cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
 		return;
 
@@ -1071,7 +1071,7 @@
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
-	if (p->nr_cpus_allowed != 1 &&
+	if (tsk_nr_cpus_allowed(p) != 1 &&
 	    cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
 		return;
 
@@ -1125,7 +1125,8 @@
 	return rb_entry(left, struct sched_dl_entity, rb_node);
 }
 
-struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
+struct task_struct *
+pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	struct sched_dl_entity *dl_se;
 	struct task_struct *p;
@@ -1140,9 +1141,9 @@
 		 * disabled avoiding further scheduler activity on it and we're
 		 * being very careful to re-start the picking loop.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, cookie);
 		pull_dl_task(rq);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, cookie);
 		/*
 		 * pull_rt_task() can drop (and re-acquire) rq->lock; this
 		 * means a stop task can slip in, in which case we need to
@@ -1185,7 +1186,7 @@
 {
 	update_curr_dl(rq);
 
-	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
+	if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
 
@@ -1286,7 +1287,7 @@
 	if (unlikely(!later_mask))
 		return -1;
 
-	if (task->nr_cpus_allowed == 1)
+	if (tsk_nr_cpus_allowed(task) == 1)
 		return -1;
 
 	/*
@@ -1392,7 +1393,7 @@
 		if (double_lock_balance(rq, later_rq)) {
 			if (unlikely(task_rq(task) != rq ||
 				     !cpumask_test_cpu(later_rq->cpu,
-				                       &task->cpus_allowed) ||
+						       tsk_cpus_allowed(task)) ||
 				     task_running(rq, task) ||
 				     !dl_task(task) ||
 				     !task_on_rq_queued(task))) {
@@ -1432,7 +1433,7 @@
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(p->nr_cpus_allowed <= 1);
+	BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
 
 	BUG_ON(!task_on_rq_queued(p));
 	BUG_ON(!dl_task(p));
@@ -1471,7 +1472,7 @@
 	 */
 	if (dl_task(rq->curr) &&
 	    dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
-	    rq->curr->nr_cpus_allowed > 1) {
+	    tsk_nr_cpus_allowed(rq->curr) > 1) {
 		resched_curr(rq);
 		return 0;
 	}
@@ -1618,9 +1619,9 @@
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    p->nr_cpus_allowed > 1 &&
+	    tsk_nr_cpus_allowed(p) > 1 &&
 	    dl_task(rq->curr) &&
-	    (rq->curr->nr_cpus_allowed < 2 ||
+	    (tsk_nr_cpus_allowed(rq->curr) < 2 ||
 	     !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
 		push_dl_tasks(rq);
 	}
@@ -1724,7 +1725,7 @@
 
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
-		if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
+		if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
 			queue_push_tasks(rq);
 #else
 		if (dl_task(rq->curr))

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4fbc3bd..cf905f6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c

@@ -626,15 +626,16 @@
 #undef P
 #undef PN
 
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
-#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
-
 #ifdef CONFIG_SMP
+#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
 	P64(avg_idle);
 	P64(max_idle_balance_cost);
+#undef P64
 #endif
 
+#ifdef CONFIG_SCHEDSTATS
+#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
+
 	if (schedstat_enabled()) {
 		P(yld_count);
 		P(sched_count);
@@ -644,7 +645,6 @@
 	}
 
 #undef P
-#undef P64
 #endif
 	spin_lock_irqsave(&sched_debug_lock, flags);
 	print_cfs_stats(m, cpu);

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e7dd0ec..218f8e8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c

@@ -204,7 +204,7 @@
  *   OR
  * (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
  *
- * Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case
+ * Either weight := NICE_0_LOAD and lw \e sched_prio_to_wmult[], in which case
  * we're guaranteed shift stays positive because inv_weight is guaranteed to
  * fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
  *
@@ -682,17 +682,68 @@
 	sa->period_contrib = 1023;
 	sa->load_avg = scale_load_down(se->load.weight);
 	sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
-	sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
-	sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+	/*
+	 * At this point, util_avg won't be used in select_task_rq_fair anyway
+	 */
+	sa->util_avg = 0;
+	sa->util_sum = 0;
 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
 }
 
+/*
+ * With new tasks being created, their initial util_avgs are extrapolated
+ * based on the cfs_rq's current util_avg:
+ *
+ *   util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
+ *
+ * However, in many cases, the above util_avg does not give a desired
+ * value. Moreover, the sum of the util_avgs may be divergent, such
+ * as when the series is a harmonic series.
+ *
+ * To solve this problem, we also cap the util_avg of successive tasks to
+ * only 1/2 of the left utilization budget:
+ *
+ *   util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n
+ *
+ * where n denotes the nth task.
+ *
+ * For example, a simplest series from the beginning would be like:
+ *
+ *  task  util_avg: 512, 256, 128,  64,  32,   16,    8, ...
+ * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
+ *
+ * Finally, that extrapolated util_avg is clamped to the cap (util_avg_cap)
+ * if util_avg > util_avg_cap.
+ */
+void post_init_entity_util_avg(struct sched_entity *se)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	struct sched_avg *sa = &se->avg;
+	long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
+
+	if (cap > 0) {
+		if (cfs_rq->avg.util_avg != 0) {
+			sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
+			sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+
+			if (sa->util_avg > cap)
+				sa->util_avg = cap;
+		} else {
+			sa->util_avg = cap;
+		}
+		sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+	}
+}
+
 static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
 static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
 #else
 void init_entity_runnable_average(struct sched_entity *se)
 {
 }
+void post_init_entity_util_avg(struct sched_entity *se)
+{
+}
 #endif
 
 /*
@@ -2437,10 +2488,12 @@
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	if (!parent_entity(se))
 		update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
+#ifdef CONFIG_SMP
 	if (entity_is_task(se)) {
 		account_numa_dequeue(rq_of(cfs_rq), task_of(se));
 		list_del_init(&se->group_node);
 	}
+#endif
 	cfs_rq->nr_running--;
 }
 
@@ -2550,6 +2603,16 @@
 };
 
 /*
+ * Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
+ * lower integers. See Documentation/scheduler/sched-avg.txt how these
+ * were generated:
+ */
+static const u32 __accumulated_sum_N32[] = {
+	    0, 23371, 35056, 40899, 43820, 45281,
+	46011, 46376, 46559, 46650, 46696, 46719,
+};
+
+/*
  * Approximate:
  *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
  */
@@ -2597,22 +2660,13 @@
 	else if (unlikely(n >= LOAD_AVG_MAX_N))
 		return LOAD_AVG_MAX;
 
-	/* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */
-	do {
-		contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */
-		contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD];
-
-		n -= LOAD_AVG_PERIOD;
-	} while (n > LOAD_AVG_PERIOD);
-
+	/* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
+	contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
+	n %= LOAD_AVG_PERIOD;
 	contrib = decay_load(contrib, n);
 	return contrib + runnable_avg_yN_sum[n];
 }
 
-#if (SCHED_LOAD_SHIFT - SCHED_LOAD_RESOLUTION) != 10 || SCHED_CAPACITY_SHIFT != 10
-#error "load tracking assumes 2^10 as unit"
-#endif
-
 #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
 
 /*
@@ -2821,55 +2875,11 @@
 
 static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
 
-/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
-static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
-	struct sched_avg *sa = &cfs_rq->avg;
-	int decayed, removed = 0;
-
-	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
-		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
-		sa->load_avg = max_t(long, sa->load_avg - r, 0);
-		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
-		removed = 1;
-	}
-
-	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
-		long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
-		sa->util_avg = max_t(long, sa->util_avg - r, 0);
-		sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
-	}
-
-	decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-		scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
-
-#ifndef CONFIG_64BIT
-	smp_wmb();
-	cfs_rq->load_last_update_time_copy = sa->last_update_time;
-#endif
-
-	return decayed || removed;
-}
-
-/* Update task and its cfs_rq load average */
-static inline void update_load_avg(struct sched_entity *se, int update_tg)
-{
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	u64 now = cfs_rq_clock_task(cfs_rq);
 	struct rq *rq = rq_of(cfs_rq);
 	int cpu = cpu_of(rq);
 
-	/*
-	 * Track task load average for carrying it to new CPU after migrated, and
-	 * track group sched_entity load average for task_h_load calc in migration
-	 */
-	__update_load_avg(now, cpu, &se->avg,
-			  se->on_rq * scale_load_down(se->load.weight),
-			  cfs_rq->curr == se, NULL);
-
-	if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
-		update_tg_load_avg(cfs_rq, 0);
-
 	if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
 		unsigned long max = rq->cpu_capacity_orig;
 
@@ -2894,6 +2904,61 @@
 	}
 }
 
+/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
+static inline int
+update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
+{
+	struct sched_avg *sa = &cfs_rq->avg;
+	int decayed, removed_load = 0, removed_util = 0;
+
+	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
+		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
+		sa->load_avg = max_t(long, sa->load_avg - r, 0);
+		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+		removed_load = 1;
+	}
+
+	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
+		long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
+		sa->util_avg = max_t(long, sa->util_avg - r, 0);
+		sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
+		removed_util = 1;
+	}
+
+	decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
+		scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
+
+#ifndef CONFIG_64BIT
+	smp_wmb();
+	cfs_rq->load_last_update_time_copy = sa->last_update_time;
+#endif
+
+	if (update_freq && (decayed || removed_util))
+		cfs_rq_util_change(cfs_rq);
+
+	return decayed || removed_load;
+}
+
+/* Update task and its cfs_rq load average */
+static inline void update_load_avg(struct sched_entity *se, int update_tg)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+	u64 now = cfs_rq_clock_task(cfs_rq);
+	struct rq *rq = rq_of(cfs_rq);
+	int cpu = cpu_of(rq);
+
+	/*
+	 * Track task load average for carrying it to new CPU after migrated, and
+	 * track group sched_entity load average for task_h_load calc in migration
+	 */
+	__update_load_avg(now, cpu, &se->avg,
+			  se->on_rq * scale_load_down(se->load.weight),
+			  cfs_rq->curr == se, NULL);
+
+	if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
+		update_tg_load_avg(cfs_rq, 0);
+}
+
 static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	if (!sched_feat(ATTACH_AGE_LOAD))
@@ -2919,6 +2984,8 @@
 	cfs_rq->avg.load_sum += se->avg.load_sum;
 	cfs_rq->avg.util_avg += se->avg.util_avg;
 	cfs_rq->avg.util_sum += se->avg.util_sum;
+
+	cfs_rq_util_change(cfs_rq);
 }
 
 static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -2931,6 +2998,8 @@
 	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
 	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
 	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+
+	cfs_rq_util_change(cfs_rq);
 }
 
 /* Add the load generated by se into cfs_rq's load average */
@@ -2948,7 +3017,7 @@
 			cfs_rq->curr == se, NULL);
 	}
 
-	decayed = update_cfs_rq_load_avg(now, cfs_rq);
+	decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);
 
 	cfs_rq->runnable_load_avg += sa->load_avg;
 	cfs_rq->runnable_load_sum += sa->load_sum;
@@ -3185,20 +3254,61 @@
 #endif
 }
 
+
+/*
+ * MIGRATION
+ *
+ *	dequeue
+ *	  update_curr()
+ *	    update_min_vruntime()
+ *	  vruntime -= min_vruntime
+ *
+ *	enqueue
+ *	  update_curr()
+ *	    update_min_vruntime()
+ *	  vruntime += min_vruntime
+ *
+ * this way the vruntime transition between RQs is done when both
+ * min_vruntime are up-to-date.
+ *
+ * WAKEUP (remote)
+ *
+ *	->migrate_task_rq_fair() (p->state == TASK_WAKING)
+ *	  vruntime -= min_vruntime
+ *
+ *	enqueue
+ *	  update_curr()
+ *	    update_min_vruntime()
+ *	  vruntime += min_vruntime
+ *
+ * this way we don't have the most up-to-date min_vruntime on the originating
+ * CPU and an up-to-date min_vruntime on the destination CPU.
+ */
+
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	/*
-	 * Update the normalized vruntime before updating min_vruntime
-	 * through calling update_curr().
-	 */
-	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
-		se->vruntime += cfs_rq->min_vruntime;
+	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
+	bool curr = cfs_rq->curr == se;
 
 	/*
-	 * Update run-time statistics of the 'current'.
+	 * If we're the current task, we must renormalise before calling
+	 * update_curr().
 	 */
+	if (renorm && curr)
+		se->vruntime += cfs_rq->min_vruntime;
+
 	update_curr(cfs_rq);
+
+	/*
+	 * Otherwise, renormalise after, such that we're placed at the current
+	 * moment in time, instead of some random moment in the past. Being
+	 * placed in the past could significantly boost this task to the
+	 * fairness detriment of existing tasks.
+	 */
+	if (renorm && !curr)
+		se->vruntime += cfs_rq->min_vruntime;
+
 	enqueue_entity_load_avg(cfs_rq, se);
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
@@ -3214,7 +3324,7 @@
 		update_stats_enqueue(cfs_rq, se);
 		check_spread(cfs_rq, se);
 	}
-	if (se != cfs_rq->curr)
+	if (!curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
 
@@ -4422,7 +4532,7 @@
 }
 
 #ifdef CONFIG_SMP
-
+#ifdef CONFIG_NO_HZ_COMMON
 /*
  * per rq 'load' arrray crap; XXX kill this.
  */
@@ -4488,13 +4598,13 @@
 	}
 	return load;
 }
+#endif /* CONFIG_NO_HZ_COMMON */
 
 /**
- * __update_cpu_load - update the rq->cpu_load[] statistics
+ * __cpu_load_update - update the rq->cpu_load[] statistics
  * @this_rq: The rq to update statistics for
  * @this_load: The current load
  * @pending_updates: The number of missed updates
- * @active: !0 for NOHZ_FULL
  *
  * Update rq->cpu_load[] statistics. This function is usually called every
  * scheduler tick (TICK_NSEC).
@@ -4523,12 +4633,12 @@
  *   load[i]_n = (1 - 1/2^i)^n * load[i]_0
  *
  * see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
- * term. See the @active paramter.
+ * term.
  */
-static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
-			      unsigned long pending_updates, int active)
+static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
+			    unsigned long pending_updates)
 {
-	unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
+	unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0];
 	int i, scale;
 
 	this_rq->nr_load_updates++;
@@ -4541,6 +4651,7 @@
 		/* scale is effectively 1 << i now, and >> i divides by scale */
 
 		old_load = this_rq->cpu_load[i];
+#ifdef CONFIG_NO_HZ_COMMON
 		old_load = decay_load_missed(old_load, pending_updates - 1, i);
 		if (tickless_load) {
 			old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
@@ -4551,6 +4662,7 @@
 			 */
 			old_load += tickless_load;
 		}
+#endif
 		new_load = this_load;
 		/*
 		 * Round up the averaging division if load is increasing. This
@@ -4573,10 +4685,23 @@
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
-static void __update_cpu_load_nohz(struct rq *this_rq,
-				   unsigned long curr_jiffies,
-				   unsigned long load,
-				   int active)
+/*
+ * There is no sane way to deal with nohz on smp when using jiffies because the
+ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
+ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
+ *
+ * Therefore we need to avoid the delta approach from the regular tick when
+ * possible since that would seriously skew the load calculation. This is why we
+ * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on
+ * jiffies deltas for updates happening while in nohz mode (idle ticks, idle
+ * loop exit, nohz_idle_balance, nohz full exit...)
+ *
+ * This means we might still be one tick off for nohz periods.
+ */
+
+static void cpu_load_update_nohz(struct rq *this_rq,
+				 unsigned long curr_jiffies,
+				 unsigned long load)
 {
 	unsigned long pending_updates;
 
@@ -4588,28 +4713,15 @@
 		 * In the NOHZ_FULL case, we were non-idle, we should consider
 		 * its weighted load.
 		 */
-		__update_cpu_load(this_rq, load, pending_updates, active);
+		cpu_load_update(this_rq, load, pending_updates);
 	}
 }
 
 /*
- * There is no sane way to deal with nohz on smp when using jiffies because the
- * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
- * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
- *
- * Therefore we cannot use the delta approach from the regular tick since that
- * would seriously skew the load calculation. However we'll make do for those
- * updates happening while idle (nohz_idle_balance) or coming out of idle
- * (tick_nohz_idle_exit).
- *
- * This means we might still be one tick off for nohz periods.
- */
-
-/*
  * Called from nohz_idle_balance() to update the load ratings before doing the
  * idle balance.
  */
-static void update_cpu_load_idle(struct rq *this_rq)
+static void cpu_load_update_idle(struct rq *this_rq)
 {
 	/*
 	 * bail if there's load or we're actually up-to-date.
@@ -4617,38 +4729,71 @@
 	if (weighted_cpuload(cpu_of(this_rq)))
 		return;
 
-	__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
+	cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
 }
 
 /*
- * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ * Record CPU load on nohz entry so we know the tickless load to account
+ * on nohz exit. cpu_load[0] happens then to be updated more frequently
+ * than other cpu_load[idx] but it should be fine as cpu_load readers
+ * shouldn't rely into synchronized cpu_load[*] updates.
  */
-void update_cpu_load_nohz(int active)
+void cpu_load_update_nohz_start(void)
 {
 	struct rq *this_rq = this_rq();
+
+	/*
+	 * This is all lockless but should be fine. If weighted_cpuload changes
+	 * concurrently we'll exit nohz. And cpu_load write can race with
+	 * cpu_load_update_idle() but both updater would be writing the same.
+	 */
+	this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
+}
+
+/*
+ * Account the tickless load in the end of a nohz frame.
+ */
+void cpu_load_update_nohz_stop(void)
+{
 	unsigned long curr_jiffies = READ_ONCE(jiffies);
-	unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
+	struct rq *this_rq = this_rq();
+	unsigned long load;
 
 	if (curr_jiffies == this_rq->last_load_update_tick)
 		return;
 
+	load = weighted_cpuload(cpu_of(this_rq));
 	raw_spin_lock(&this_rq->lock);
-	__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
+	update_rq_clock(this_rq);
+	cpu_load_update_nohz(this_rq, curr_jiffies, load);
 	raw_spin_unlock(&this_rq->lock);
 }
-#endif /* CONFIG_NO_HZ */
+#else /* !CONFIG_NO_HZ_COMMON */
+static inline void cpu_load_update_nohz(struct rq *this_rq,
+					unsigned long curr_jiffies,
+					unsigned long load) { }
+#endif /* CONFIG_NO_HZ_COMMON */
+
+static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load)
+{
+#ifdef CONFIG_NO_HZ_COMMON
+	/* See the mess around cpu_load_update_nohz(). */
+	this_rq->last_load_update_tick = READ_ONCE(jiffies);
+#endif
+	cpu_load_update(this_rq, load, 1);
+}
 
 /*
  * Called from scheduler_tick()
  */
-void update_cpu_load_active(struct rq *this_rq)
+void cpu_load_update_active(struct rq *this_rq)
 {
 	unsigned long load = weighted_cpuload(cpu_of(this_rq));
-	/*
-	 * See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
-	 */
-	this_rq->last_load_update_tick = jiffies;
-	__update_cpu_load(this_rq, load, 1, 1);
+
+	if (tick_nohz_tick_stopped())
+		cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
+	else
+		cpu_load_update_periodic(this_rq, load);
 }
 
 /*
@@ -4706,46 +4851,6 @@
 	return 0;
 }
 
-static void record_wakee(struct task_struct *p)
-{
-	/*
-	 * Rough decay (wiping) for cost saving, don't worry
-	 * about the boundary, really active task won't care
-	 * about the loss.
-	 */
-	if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
-		current->wakee_flips >>= 1;
-		current->wakee_flip_decay_ts = jiffies;
-	}
-
-	if (current->last_wakee != p) {
-		current->last_wakee = p;
-		current->wakee_flips++;
-	}
-}
-
-static void task_waking_fair(struct task_struct *p)
-{
-	struct sched_entity *se = &p->se;
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	u64 min_vruntime;
-
-#ifndef CONFIG_64BIT
-	u64 min_vruntime_copy;
-
-	do {
-		min_vruntime_copy = cfs_rq->min_vruntime_copy;
-		smp_rmb();
-		min_vruntime = cfs_rq->min_vruntime;
-	} while (min_vruntime != min_vruntime_copy);
-#else
-	min_vruntime = cfs_rq->min_vruntime;
-#endif
-
-	se->vruntime -= min_vruntime;
-	record_wakee(p);
-}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * effective_load() calculates the load change as seen from the root_task_group
@@ -4861,17 +4966,39 @@
 
 #endif
 
+static void record_wakee(struct task_struct *p)
+{
+	/*
+	 * Only decay a single time; tasks that have less then 1 wakeup per
+	 * jiffy will not have built up many flips.
+	 */
+	if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
+		current->wakee_flips >>= 1;
+		current->wakee_flip_decay_ts = jiffies;
+	}
+
+	if (current->last_wakee != p) {
+		current->last_wakee = p;
+		current->wakee_flips++;
+	}
+}
+
 /*
  * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
+ *
  * A waker of many should wake a different task than the one last awakened
- * at a frequency roughly N times higher than one of its wakees.  In order
- * to determine whether we should let the load spread vs consolodating to
- * shared cache, we look for a minimum 'flip' frequency of llc_size in one
- * partner, and a factor of lls_size higher frequency in the other.  With
- * both conditions met, we can be relatively sure that the relationship is
- * non-monogamous, with partner count exceeding socket size.  Waker/wakee
- * being client/server, worker/dispatcher, interrupt source or whatever is
- * irrelevant, spread criteria is apparent partner count exceeds socket size.
+ * at a frequency roughly N times higher than one of its wakees.
+ *
+ * In order to determine whether we should let the load spread vs consolidating
+ * to shared cache, we look for a minimum 'flip' frequency of llc_size in one
+ * partner, and a factor of lls_size higher frequency in the other.
+ *
+ * With both conditions met, we can be relatively sure that the relationship is
+ * non-monogamous, with partner count exceeding socket size.
+ *
+ * Waker/wakee being client/server, worker/dispatcher, interrupt source or
+ * whatever is irrelevant, spread criteria is apparent partner count exceeds
+ * socket size.
  */
 static int wake_wide(struct task_struct *p)
 {
@@ -5176,8 +5303,10 @@
 	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
 
-	if (sd_flag & SD_BALANCE_WAKE)
+	if (sd_flag & SD_BALANCE_WAKE) {
+		record_wakee(p);
 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+	}
 
 	rcu_read_lock();
 	for_each_domain(cpu, tmp) {
@@ -5257,6 +5386,32 @@
 static void migrate_task_rq_fair(struct task_struct *p)
 {
 	/*
+	 * As blocked tasks retain absolute vruntime the migration needs to
+	 * deal with this by subtracting the old and adding the new
+	 * min_vruntime -- the latter is done by enqueue_entity() when placing
+	 * the task on the new runqueue.
+	 */
+	if (p->state == TASK_WAKING) {
+		struct sched_entity *se = &p->se;
+		struct cfs_rq *cfs_rq = cfs_rq_of(se);
+		u64 min_vruntime;
+
+#ifndef CONFIG_64BIT
+		u64 min_vruntime_copy;
+
+		do {
+			min_vruntime_copy = cfs_rq->min_vruntime_copy;
+			smp_rmb();
+			min_vruntime = cfs_rq->min_vruntime;
+		} while (min_vruntime != min_vruntime_copy);
+#else
+		min_vruntime = cfs_rq->min_vruntime;
+#endif
+
+		se->vruntime -= min_vruntime;
+	}
+
+	/*
 	 * We are supposed to update the task to "current" time, then its up to date
 	 * and ready to go to new CPU/cfs_rq. But we have difficulty in getting
 	 * what current time is, so simply throw away the out-of-date time. This
@@ -5439,7 +5594,7 @@
 }
 
 static struct task_struct *
-pick_next_task_fair(struct rq *rq, struct task_struct *prev)
+pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	struct cfs_rq *cfs_rq = &rq->cfs;
 	struct sched_entity *se;
@@ -5552,9 +5707,9 @@
 	 * further scheduler activity on it and we're being very careful to
 	 * re-start the picking loop.
 	 */
-	lockdep_unpin_lock(&rq->lock);
+	lockdep_unpin_lock(&rq->lock, cookie);
 	new_tasks = idle_balance(rq);
-	lockdep_pin_lock(&rq->lock);
+	lockdep_repin_lock(&rq->lock, cookie);
 	/*
 	 * Because idle_balance() releases (and re-acquires) rq->lock, it is
 	 * possible for any higher priority task to appear. In that case we
@@ -5653,7 +5808,7 @@
  *   W_i,0 = \Sum_j w_i,j                                             (2)
  *
  * Where w_i,j is the weight of the j-th runnable task on cpu i. This weight
- * is derived from the nice value as per prio_to_weight[].
+ * is derived from the nice value as per sched_prio_to_weight[].
  *
  * The weight average is an exponential decay average of the instantaneous
  * weight:
@@ -6155,7 +6310,7 @@
 		if (throttled_hierarchy(cfs_rq))
 			continue;
 
-		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
 			update_tg_load_avg(cfs_rq, 0);
 	}
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -6216,7 +6371,7 @@
 
 	raw_spin_lock_irqsave(&rq->lock, flags);
 	update_rq_clock(rq);
-	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
 	raw_spin_unlock_irqrestore(&rq->lock, flags);
 }
 
@@ -6625,6 +6780,9 @@
 	if (!(env->sd->flags & SD_ASYM_PACKING))
 		return true;
 
+	/* No ASYM_PACKING if target cpu is already busy */
+	if (env->idle == CPU_NOT_IDLE)
+		return true;
 	/*
 	 * ASYM_PACKING needs to move all the work to the lowest
 	 * numbered CPUs in the group, therefore mark all groups
@@ -6634,7 +6792,8 @@
 		if (!sds->busiest)
 			return true;
 
-		if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
+		/* Prefer to move from highest possible cpu's work */
+		if (group_first_cpu(sds->busiest) < group_first_cpu(sg))
 			return true;
 	}
 
@@ -6780,6 +6939,9 @@
 	if (!(env->sd->flags & SD_ASYM_PACKING))
 		return 0;
 
+	if (env->idle == CPU_NOT_IDLE)
+		return 0;
+
 	if (!sds->busiest)
 		return 0;
 
@@ -6888,9 +7050,10 @@
 	}
 
 	/*
-	 * In the presence of smp nice balancing, certain scenarios can have
-	 * max load less than avg load(as we skip the groups at or below
-	 * its cpu_capacity, while calculating max_load..)
+	 * Avg load of busiest sg can be less and avg load of local sg can
+	 * be greater than avg load across all sgs of sd because avg load
+	 * factors in sg capacity and sgs with smaller group_type are
+	 * skipped when updating the busiest sg:
 	 */
 	if (busiest->avg_load <= sds->avg_load ||
 	    local->avg_load >= sds->avg_load) {
@@ -6903,11 +7066,12 @@
 	 */
 	if (busiest->group_type == group_overloaded &&
 	    local->group_type   == group_overloaded) {
-		load_above_capacity = busiest->sum_nr_running *
-					SCHED_LOAD_SCALE;
-		if (load_above_capacity > busiest->group_capacity)
+		load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
+		if (load_above_capacity > busiest->group_capacity) {
 			load_above_capacity -= busiest->group_capacity;
-		else
+			load_above_capacity *= NICE_0_LOAD;
+			load_above_capacity /= busiest->group_capacity;
+		} else
 			load_above_capacity = ~0UL;
 	}
 
@@ -6915,9 +7079,8 @@
 	 * We're trying to get all the cpus to the average_load, so we don't
 	 * want to push ourselves above the average load, nor do we wish to
 	 * reduce the max loaded cpu below the average load. At the same time,
-	 * we also don't want to reduce the group load below the group capacity
-	 * (so that we can implement power-savings policies etc). Thus we look
-	 * for the minimum possible imbalance.
+	 * we also don't want to reduce the group load below the group
+	 * capacity. Thus we look for the minimum possible imbalance.
 	 */
 	max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity);
 
@@ -6941,10 +7104,7 @@
 
 /**
  * find_busiest_group - Returns the busiest group within the sched_domain
- * if there is an imbalance. If there isn't an imbalance, and
- * the user has opted for power-savings, it returns a group whose
- * CPUs can be put to idle by rebalancing those tasks elsewhere, if
- * such a group exists.
+ * if there is an imbalance.
  *
  * Also calculates the amount of weighted load which should be moved
  * to restore balance.
@@ -6952,9 +7112,6 @@
  * @env: The load balancing environment.
  *
  * Return:	- The busiest group if imbalance exists.
- *		- If no imbalance and user has opted for power-savings balance,
- *		   return the least loaded group whose CPUs can be
- *		   put to idle by rebalancing its tasks onto our group.
  */
 static struct sched_group *find_busiest_group(struct lb_env *env)
 {
@@ -6972,8 +7129,7 @@
 	busiest = &sds.busiest_stat;
 
 	/* ASYM feature bypasses nice load balance check */
-	if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
-	    check_asym_packing(env, &sds))
+	if (check_asym_packing(env, &sds))
 		return sds.busiest;
 
 	/* There is no busy sibling group to pull tasks from */
@@ -7398,10 +7554,7 @@
 					&busiest->active_balance_work);
 			}
 
-			/*
-			 * We've kicked active balancing, reset the failure
-			 * counter.
-			 */
+			/* We've kicked active balancing, force task migration. */
 			sd->nr_balance_failed = sd->cache_nice_tries+1;
 		}
 	} else
@@ -7636,10 +7789,13 @@
 		schedstat_inc(sd, alb_count);
 
 		p = detach_one_task(&env);
-		if (p)
+		if (p) {
 			schedstat_inc(sd, alb_pushed);
-		else
+			/* Active balancing done, reset the failure counter. */
+			sd->nr_balance_failed = 0;
+		} else {
 			schedstat_inc(sd, alb_failed);
+		}
 	}
 	rcu_read_unlock();
 out_unlock:
@@ -7710,7 +7866,7 @@
 	return;
 }
 
-static inline void nohz_balance_exit_idle(int cpu)
+void nohz_balance_exit_idle(unsigned int cpu)
 {
 	if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
 		/*
@@ -7783,18 +7939,6 @@
 	atomic_inc(&nohz.nr_cpus);
 	set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
 }
-
-static int sched_ilb_notifier(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
-{
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DYING:
-		nohz_balance_exit_idle(smp_processor_id());
-		return NOTIFY_OK;
-	default:
-		return NOTIFY_DONE;
-	}
-}
 #endif
 
 static DEFINE_SPINLOCK(balancing);
@@ -7956,7 +8100,7 @@
 		if (time_after_eq(jiffies, rq->next_balance)) {
 			raw_spin_lock_irq(&rq->lock);
 			update_rq_clock(rq);
-			update_cpu_load_idle(rq);
+			cpu_load_update_idle(rq);
 			raw_spin_unlock_irq(&rq->lock);
 			rebalance_domains(rq, CPU_IDLE);
 		}
@@ -8381,6 +8525,7 @@
 		init_cfs_rq(cfs_rq);
 		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
 		init_entity_runnable_average(se);
+		post_init_entity_util_avg(se);
 	}
 
 	return 1;
@@ -8537,7 +8682,6 @@
 	.rq_online		= rq_online_fair,
 	.rq_offline		= rq_offline_fair,
 
-	.task_waking		= task_waking_fair,
 	.task_dead		= task_dead_fair,
 	.set_cpus_allowed	= set_cpus_allowed_common,
 #endif
@@ -8599,7 +8743,6 @@
 #ifdef CONFIG_NO_HZ_COMMON
 	nohz.next_balance = jiffies;
 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
-	cpu_notifier(sched_ilb_notifier, 0);
 #endif
 #endif /* SMP */
 

diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 47ce949..2ce5458 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c

@@ -24,7 +24,7 @@
 }
 
 static struct task_struct *
-pick_next_task_idle(struct rq *rq, struct task_struct *prev)
+pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	put_prev_task(rq, prev);
 

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index ef71590..b0b93fd 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c

@@ -99,10 +99,13 @@
 static unsigned long
 calc_load(unsigned long load, unsigned long exp, unsigned long active)
 {
-	load *= exp;
-	load += active * (FIXED_1 - exp);
-	load += 1UL << (FSHIFT - 1);
-	return load >> FSHIFT;
+	unsigned long newload;
+
+	newload = load * exp + active * (FIXED_1 - exp);
+	if (active >= load)
+		newload += FIXED_1-1;
+
+	return newload / FIXED_1;
 }
 
 #ifdef CONFIG_NO_HZ_COMMON

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ec4f538d..d5690b7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c

@@ -334,7 +334,7 @@
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total++;
-	if (p->nr_cpus_allowed > 1)
+	if (tsk_nr_cpus_allowed(p) > 1)
 		rt_rq->rt_nr_migratory++;
 
 	update_rt_migration(rt_rq);
@@ -351,7 +351,7 @@
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total--;
-	if (p->nr_cpus_allowed > 1)
+	if (tsk_nr_cpus_allowed(p) > 1)
 		rt_rq->rt_nr_migratory--;
 
 	update_rt_migration(rt_rq);
@@ -953,14 +953,14 @@
 	if (curr->sched_class != &rt_sched_class)
 		return;
 
-	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_trigger_update(rq_clock(rq));
-
 	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
 	if (unlikely((s64)delta_exec <= 0))
 		return;
 
+	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
+	if (cpu_of(rq) == smp_processor_id())
+		cpufreq_trigger_update(rq_clock(rq));
+
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
 
@@ -1324,7 +1324,7 @@
 
 	enqueue_rt_entity(rt_se, flags);
 
-	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+	if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1413,7 +1413,7 @@
 	 * will have to sort it out.
 	 */
 	if (curr && unlikely(rt_task(curr)) &&
-	    (curr->nr_cpus_allowed < 2 ||
+	    (tsk_nr_cpus_allowed(curr) < 2 ||
 	     curr->prio <= p->prio)) {
 		int target = find_lowest_rq(p);
 
@@ -1437,7 +1437,7 @@
 	 * Current can't be migrated, useless to reschedule,
 	 * let's hope p can move out.
 	 */
-	if (rq->curr->nr_cpus_allowed == 1 ||
+	if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
 	    !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
 		return;
 
@@ -1445,7 +1445,7 @@
 	 * p is migratable, so let's not schedule it and
 	 * see if it is pushed or pulled somewhere else.
 	 */
-	if (p->nr_cpus_allowed != 1
+	if (tsk_nr_cpus_allowed(p) != 1
 	    && cpupri_find(&rq->rd->cpupri, p, NULL))
 		return;
 
@@ -1524,7 +1524,7 @@
 }
 
 static struct task_struct *
-pick_next_task_rt(struct rq *rq, struct task_struct *prev)
+pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	struct task_struct *p;
 	struct rt_rq *rt_rq = &rq->rt;
@@ -1536,9 +1536,9 @@
 		 * disabled avoiding further scheduler activity on it and we're
 		 * being very careful to re-start the picking loop.
 		 */
-		lockdep_unpin_lock(&rq->lock);
+		lockdep_unpin_lock(&rq->lock, cookie);
 		pull_rt_task(rq);
-		lockdep_pin_lock(&rq->lock);
+		lockdep_repin_lock(&rq->lock, cookie);
 		/*
 		 * pull_rt_task() can drop (and re-acquire) rq->lock; this
 		 * means a dl or stop task can slip in, in which case we need
@@ -1579,7 +1579,7 @@
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
+	if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1629,7 +1629,7 @@
 	if (unlikely(!lowest_mask))
 		return -1;
 
-	if (task->nr_cpus_allowed == 1)
+	if (tsk_nr_cpus_allowed(task) == 1)
 		return -1; /* No other targets possible */
 
 	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1762,7 +1762,7 @@
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(p->nr_cpus_allowed <= 1);
+	BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
 
 	BUG_ON(!task_on_rq_queued(p));
 	BUG_ON(!rt_task(p));
@@ -2122,9 +2122,9 @@
 {
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
-	    p->nr_cpus_allowed > 1 &&
+	    tsk_nr_cpus_allowed(p) > 1 &&
 	    (dl_task(rq->curr) || rt_task(rq->curr)) &&
-	    (rq->curr->nr_cpus_allowed < 2 ||
+	    (tsk_nr_cpus_allowed(rq->curr) < 2 ||
 	     rq->curr->prio <= p->prio))
 		push_rt_tasks(rq);
 }
@@ -2197,7 +2197,7 @@
 	 */
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
-		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
+		if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
 			queue_push_tasks(rq);
 #else
 		if (p->prio < rq->curr->prio)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d2..72f1f30 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h

@@ -31,9 +31,9 @@
 extern long calc_load_fold_active(struct rq *this_rq);
 
 #ifdef CONFIG_SMP
-extern void update_cpu_load_active(struct rq *this_rq);
+extern void cpu_load_update_active(struct rq *this_rq);
 #else
-static inline void update_cpu_load_active(struct rq *this_rq) { }
+static inline void cpu_load_update_active(struct rq *this_rq) { }
 #endif
 
 /*
@@ -49,25 +49,32 @@
  * and does not change the user-interface for setting shares/weights.
  *
  * We increase resolution only if we have enough bits to allow this increased
- * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
- * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
- * increased costs.
+ * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
+ * pretty high and the returns do not justify the increased costs.
+ *
+ * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
+ * increase coverage and consistency always enable it on 64bit platforms.
  */
-#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load  */
-# define SCHED_LOAD_RESOLUTION	10
-# define scale_load(w)		((w) << SCHED_LOAD_RESOLUTION)
-# define scale_load_down(w)	((w) >> SCHED_LOAD_RESOLUTION)
+#ifdef CONFIG_64BIT
+# define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
+# define scale_load(w)		((w) << SCHED_FIXEDPOINT_SHIFT)
+# define scale_load_down(w)	((w) >> SCHED_FIXEDPOINT_SHIFT)
 #else
-# define SCHED_LOAD_RESOLUTION	0
+# define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT)
 # define scale_load(w)		(w)
 # define scale_load_down(w)	(w)
 #endif
 
-#define SCHED_LOAD_SHIFT	(10 + SCHED_LOAD_RESOLUTION)
-#define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)
-
-#define NICE_0_LOAD		SCHED_LOAD_SCALE
-#define NICE_0_SHIFT		SCHED_LOAD_SHIFT
+/*
+ * Task weight (visible to users) and its load (invisible to users) have
+ * independent resolution, but they should be well calibrated. We use
+ * scale_load() and scale_load_down(w) to convert between them. The
+ * following must be true:
+ *
+ *  scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
+ *
+ */
+#define NICE_0_LOAD		(1L << NICE_0_LOAD_SHIFT)
 
 /*
  * Single value that decides SCHED_DEADLINE internal math precision.
@@ -585,11 +592,13 @@
 #endif
 	#define CPU_LOAD_IDX_MAX 5
 	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
-	unsigned long last_load_update_tick;
 #ifdef CONFIG_NO_HZ_COMMON
+#ifdef CONFIG_SMP
+	unsigned long last_load_update_tick;
+#endif /* CONFIG_SMP */
 	u64 nohz_stamp;
 	unsigned long nohz_flags;
-#endif
+#endif /* CONFIG_NO_HZ_COMMON */
 #ifdef CONFIG_NO_HZ_FULL
 	unsigned long last_sched_tick;
 #endif
@@ -854,7 +863,7 @@
 struct sched_group_capacity {
 	atomic_t ref;
 	/*
-	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
+	 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
 	 * for a single CPU.
 	 */
 	unsigned int capacity;
@@ -1159,7 +1168,7 @@
  *
  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
- * ENQUEUE_WAKING    - sched_class::task_waking was called
+ * ENQUEUE_MIGRATED  - the task was migrated during wakeup
  *
  */
 
@@ -1174,9 +1183,9 @@
 #define ENQUEUE_HEAD		0x08
 #define ENQUEUE_REPLENISH	0x10
 #ifdef CONFIG_SMP
-#define ENQUEUE_WAKING		0x20
+#define ENQUEUE_MIGRATED	0x20
 #else
-#define ENQUEUE_WAKING		0x00
+#define ENQUEUE_MIGRATED	0x00
 #endif
 
 #define RETRY_TASK		((void *)-1UL)
@@ -1200,14 +1209,14 @@
 	 * tasks.
 	 */
 	struct task_struct * (*pick_next_task) (struct rq *rq,
-						struct task_struct *prev);
+						struct task_struct *prev,
+						struct pin_cookie cookie);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
 	void (*migrate_task_rq)(struct task_struct *p);
 
-	void (*task_waking) (struct task_struct *task);
 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
 
 	void (*set_cpus_allowed)(struct task_struct *p,
@@ -1313,6 +1322,7 @@
 unsigned long to_ratio(u64 period, u64 runtime);
 
 extern void init_entity_runnable_average(struct sched_entity *se);
+extern void post_init_entity_util_avg(struct sched_entity *se);
 
 #ifdef CONFIG_NO_HZ_FULL
 extern bool sched_can_stop_tick(struct rq *rq);
@@ -1448,86 +1458,32 @@
 static inline void sched_avg_update(struct rq *rq) { }
 #endif
 
-/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
+struct rq_flags {
+	unsigned long flags;
+	struct pin_cookie cookie;
+};
 
-	lockdep_assert_held(&p->pi_lock);
-
-	for (;;) {
-		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-			lockdep_pin_lock(&rq->lock);
-			return rq;
-		}
-		raw_spin_unlock(&rq->lock);
-
-		while (unlikely(task_on_rq_migrating(p)))
-			cpu_relax();
-	}
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+	__acquires(rq->lock);
+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 	__acquires(p->pi_lock)
-	__acquires(rq->lock)
-{
-	struct rq *rq;
+	__acquires(rq->lock);
 
-	for (;;) {
-		raw_spin_lock_irqsave(&p->pi_lock, *flags);
-		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
-		/*
-		 *	move_queued_task()		task_rq_lock()
-		 *
-		 *	ACQUIRE (rq->lock)
-		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-		 *	[S] ->cpu = new_cpu		[L] task_rq()
-		 *					[L] ->on_rq
-		 *	RELEASE (rq->lock)
-		 *
-		 * If we observe the old cpu in task_rq_lock, the acquire of
-		 * the old rq->lock will fully serialize against the stores.
-		 *
-		 * If we observe the new cpu in task_rq_lock, the acquire will
-		 * pair with the WMB to ensure we must then also see migrating.
-		 */
-		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-			lockdep_pin_lock(&rq->lock);
-			return rq;
-		}
-		raw_spin_unlock(&rq->lock);
-		raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
-		while (unlikely(task_on_rq_migrating(p)))
-			cpu_relax();
-	}
-}
-
-static inline void __task_rq_unlock(struct rq *rq)
+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
 	__releases(rq->lock)
 {
-	lockdep_unpin_lock(&rq->lock);
+	lockdep_unpin_lock(&rq->lock, rf->cookie);
 	raw_spin_unlock(&rq->lock);
 }
 
 static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
 	__releases(rq->lock)
 	__releases(p->pi_lock)
 {
-	lockdep_unpin_lock(&rq->lock);
+	lockdep_unpin_lock(&rq->lock, rf->cookie);
 	raw_spin_unlock(&rq->lock);
-	raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
 }
 
 #ifdef CONFIG_SMP
@@ -1743,6 +1699,10 @@
 };
 
 #define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
+
+extern void nohz_balance_exit_idle(unsigned int cpu);
+#else
+static inline void nohz_balance_exit_idle(unsigned int cpu) { }
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -1842,6 +1802,14 @@
 static inline void cpufreq_trigger_update(u64 time) {}
 #endif /* CONFIG_CPU_FREQ */
 
+#ifdef arch_scale_freq_capacity
+#ifndef arch_scale_freq_invariant
+#define arch_scale_freq_invariant()	(true)
+#endif
+#else /* arch_scale_freq_capacity */
+#define arch_scale_freq_invariant()	(false)
+#endif
+
 static inline void account_reset_rq(struct rq *rq)
 {
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING

diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index cbc67da..604297a 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c

@@ -24,7 +24,7 @@
 }
 
 static struct task_struct *
-pick_next_task_stop(struct rq *rq, struct task_struct *prev)
+pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	struct task_struct *stop = rq->stop;
 

diff --git a/kernel/signal.c b/kernel/signal.c
index aa9bf00..ab122a2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c

@@ -3099,12 +3099,14 @@
 
 	oss.ss_sp = (void __user *) current->sas_ss_sp;
 	oss.ss_size = current->sas_ss_size;
-	oss.ss_flags = sas_ss_flags(sp);
+	oss.ss_flags = sas_ss_flags(sp) |
+		(current->sas_ss_flags & SS_FLAG_BITS);
 
 	if (uss) {
 		void __user *ss_sp;
 		size_t ss_size;
-		int ss_flags;
+		unsigned ss_flags;
+		int ss_mode;
 
 		error = -EFAULT;
 		if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
@@ -3119,18 +3121,13 @@
 		if (on_sig_stack(sp))
 			goto out;
 
+		ss_mode = ss_flags & ~SS_FLAG_BITS;
 		error = -EINVAL;
-		/*
-		 * Note - this code used to test ss_flags incorrectly:
-		 *  	  old code may have been written using ss_flags==0
-		 *	  to mean ss_flags==SS_ONSTACK (as this was the only
-		 *	  way that worked) - this fix preserves that older
-		 *	  mechanism.
-		 */
-		if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
+		if (ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK &&
+				ss_mode != 0)
 			goto out;
 
-		if (ss_flags == SS_DISABLE) {
+		if (ss_mode == SS_DISABLE) {
 			ss_size = 0;
 			ss_sp = NULL;
 		} else {
@@ -3141,6 +3138,7 @@
 
 		current->sas_ss_sp = (unsigned long) ss_sp;
 		current->sas_ss_size = ss_size;
+		current->sas_ss_flags = ss_flags;
 	}
 
 	error = 0;
@@ -3171,9 +3169,14 @@
 int __save_altstack(stack_t __user *uss, unsigned long sp)
 {
 	struct task_struct *t = current;
-	return  __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
-		__put_user(sas_ss_flags(sp), &uss->ss_flags) |
+	int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
+		__put_user(t->sas_ss_flags, &uss->ss_flags) |
 		__put_user(t->sas_ss_size, &uss->ss_size);
+	if (err)
+		return err;
+	if (t->sas_ss_flags & SS_AUTODISARM)
+		sas_ss_reset(t);
+	return 0;
 }
 
 #ifdef CONFIG_COMPAT

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 725587f..c8b3186 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c

@@ -130,6 +130,9 @@
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 10000;
 #endif
+#ifdef CONFIG_PERF_EVENTS
+static int six_hundred_forty_kb = 640 * 1024;
+#endif
 
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -1144,6 +1147,15 @@
 		.extra1		= &zero,
 		.extra2		= &one_hundred,
 	},
+	{
+		.procname	= "perf_event_max_stack",
+		.data		= NULL, /* filled in by handler */
+		.maxlen		= sizeof(sysctl_perf_event_max_stack),
+		.mode		= 0644,
+		.proc_handler	= perf_event_max_stack_handler,
+		.extra1		= &zero,
+		.extra2		= &six_hundred_forty_kb,
+	},
 #endif
 #ifdef CONFIG_KMEMCHECK
 	{

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 58e3310..536ada8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c

@@ -262,7 +262,7 @@
 {
 	int prev;
 
-	prev = atomic_fetch_or(dep, BIT(bit));
+	prev = atomic_fetch_or(BIT(bit), dep);
 	if (!prev)
 		tick_nohz_full_kick_all();
 }
@@ -292,7 +292,7 @@
 
 	ts = per_cpu_ptr(&tick_cpu_sched, cpu);
 
-	prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
+	prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
 	if (!prev) {
 		preempt_disable();
 		/* Perf needs local kick that is NMI safe */
@@ -776,6 +776,7 @@
 	if (!ts->tick_stopped) {
 		nohz_balance_enter_idle(cpu);
 		calc_load_enter_idle();
+		cpu_load_update_nohz_start();
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 		ts->tick_stopped = 1;
@@ -802,11 +803,11 @@
 	return tick;
 }
 
-static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 {
 	/* Update jiffies first */
 	tick_do_update_jiffies64(now);
-	update_cpu_load_nohz(active);
+	cpu_load_update_nohz_stop();
 
 	calc_load_exit_idle();
 	touch_softlockup_watchdog_sched();
@@ -833,7 +834,7 @@
 	if (can_stop_full_tick(ts))
 		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
 	else if (ts->tick_stopped)
-		tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
+		tick_nohz_restart_sched_tick(ts, ktime_get());
 #endif
 }
 
@@ -1024,7 +1025,7 @@
 		tick_nohz_stop_idle(ts, now);
 
 	if (ts->tick_stopped) {
-		tick_nohz_restart_sched_tick(ts, now, 0);
+		tick_nohz_restart_sched_tick(ts, now);
 		tick_nohz_account_idle_ticks(ts);
 	}
 

diff --git a/kernel/torture.c b/kernel/torture.c
index 44aa462..fa0bdee 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c

@@ -451,6 +451,7 @@
 		torture_shutdown_hook();
 	else
 		VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
+	ftrace_dump(DUMP_ALL);
 	kernel_power_off();	/* Shut down the system. */
 	return 0;
 }
@@ -602,8 +603,9 @@
 {
 	mutex_lock(&fullstop_mutex);
 	if (torture_type != NULL) {
-		pr_alert("torture_init_begin: refusing %s init: %s running",
+		pr_alert("torture_init_begin: Refusing %s init: %s running.\n",
 			 ttype, torture_type);
+		pr_alert("torture_init_begin: One torture test at a time!\n");
 		mutex_unlock(&fullstop_mutex);
 		return false;
 	}

diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 81b8745..0c7dee2 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c

@@ -15,5 +15,6 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
 EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
+EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_frequency);
 EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle);
 

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 00df25f..e11108f 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c

@@ -47,6 +47,9 @@
 		if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
 			return -EPERM;
 
+		if (!is_sampling_event(p_event))
+			return 0;
+
 		/*
 		 * We don't allow user space callchains for  function trace
 		 * event, due to issues with page faults while tracing page

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1e9a607..f4b797a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug

@@ -1289,6 +1289,39 @@
 	tristate
 	default n
 
+config RCU_PERF_TEST
+	tristate "performance tests for RCU"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	select SRCU
+	select TASKS_RCU
+	default n
+	help
+	  This option provides a kernel module that runs performance
+	  tests on the RCU infrastructure.  The kernel module may be built
+	  after the fact on the running kernel to be tested, if desired.
+
+	  Say Y here if you want RCU performance tests to be built into
+	  the kernel.
+	  Say M if you want the RCU performance tests to build as a module.
+	  Say N if you are unsure.
+
+config RCU_PERF_TEST_RUNNABLE
+	bool "performance tests for RCU runnable by default"
+	depends on RCU_PERF_TEST = y
+	default n
+	help
+	  This option provides a way to build the RCU performance tests
+	  directly into the kernel without them starting up at boot time.
+	  You can use /sys/module to manually override this setting.
+	  This /proc file is available only when the RCU performance
+	  tests have been built into the kernel.
+
+	  Say Y here if you want the RCU performance tests to start during
+	  boot (you probably don't).
+	  Say N here if you want the RCU performance tests to start only
+	  after being manually enabled via /sys/module.
+
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL

diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd4..a65e9a8 100644
--- a/lib/Makefile
+++ b/lib/Makefile

@@ -23,7 +23,7 @@
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o md5.o irq_regs.o argv_split.o \
-	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
+	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o
 

diff --git a/lib/proportions.c b/lib/proportions.c
deleted file mode 100644
index efa54f25..0000000
--- a/lib/proportions.c
+++ /dev/null

@@ -1,407 +0,0 @@
-/*
- * Floating proportions
- *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * Description:
- *
- * The floating proportion is a time derivative with an exponentially decaying
- * history:
- *
- *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
- *
- * Where j is an element from {prop_local}, x_{j} is j's number of events,
- * and i the time period over which the differential is taken. So d/dt_{-i} is
- * the differential over the i-th last period.
- *
- * The decaying history gives smooth transitions. The time differential carries
- * the notion of speed.
- *
- * The denominator is 2^(1+i) because we want the series to be normalised, ie.
- *
- *   \Sum_{i=0} 1/2^(1+i) = 1
- *
- * Further more, if we measure time (t) in the same events as x; so that:
- *
- *   t = \Sum_{j} x_{j}
- *
- * we get that:
- *
- *   \Sum_{j} p_{j} = 1
- *
- * Writing this in an iterative fashion we get (dropping the 'd's):
- *
- *   if (++x_{j}, ++t > period)
- *     t /= 2;
- *     for_each (j)
- *       x_{j} /= 2;
- *
- * so that:
- *
- *   p_{j} = x_{j} / t;
- *
- * We optimize away the '/= 2' for the global time delta by noting that:
- *
- *   if (++t > period) t /= 2:
- *
- * Can be approximated by:
- *
- *   period/2 + (++t % period/2)
- *
- * [ Furthermore, when we choose period to be 2^n it can be written in terms of
- *   binary operations and wraparound artefacts disappear. ]
- *
- * Also note that this yields a natural counter of the elapsed periods:
- *
- *   c = t / (period/2)
- *
- * [ Its monotonic increasing property can be applied to mitigate the wrap-
- *   around issue. ]
- *
- * This allows us to do away with the loop over all prop_locals on each period
- * expiration. By remembering the period count under which it was last accessed
- * as c_{j}, we can obtain the number of 'missed' cycles from:
- *
- *   c - c_{j}
- *
- * We can then lazily catch up to the global period count every time we are
- * going to use x_{j}, by doing:
- *
- *   x_{j} /= 2^(c - c_{j}), c_{j} = c
- */
-
-#include <linux/proportions.h>
-#include <linux/rcupdate.h>
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
-{
-	int err;
-
-	if (shift > PROP_MAX_SHIFT)
-		shift = PROP_MAX_SHIFT;
-
-	pd->index = 0;
-	pd->pg[0].shift = shift;
-	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
-	if (err)
-		goto out;
-
-	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
-	if (err)
-		percpu_counter_destroy(&pd->pg[0].events);
-
-out:
-	return err;
-}
-
-/*
- * We have two copies, and flip between them to make it seem like an atomic
- * update. The update is not really atomic wrt the events counter, but
- * it is internally consistent with the bit layout depending on shift.
- *
- * We copy the events count, move the bits around and flip the index.
- */
-void prop_change_shift(struct prop_descriptor *pd, int shift)
-{
-	int index;
-	int offset;
-	u64 events;
-	unsigned long flags;
-
-	if (shift > PROP_MAX_SHIFT)
-		shift = PROP_MAX_SHIFT;
-
-	mutex_lock(&pd->mutex);
-
-	index = pd->index ^ 1;
-	offset = pd->pg[pd->index].shift - shift;
-	if (!offset)
-		goto out;
-
-	pd->pg[index].shift = shift;
-
-	local_irq_save(flags);
-	events = percpu_counter_sum(&pd->pg[pd->index].events);
-	if (offset < 0)
-		events <<= -offset;
-	else
-		events >>= offset;
-	percpu_counter_set(&pd->pg[index].events, events);
-
-	/*
-	 * ensure the new pg is fully written before the switch
-	 */
-	smp_wmb();
-	pd->index = index;
-	local_irq_restore(flags);
-
-	synchronize_rcu();
-
-out:
-	mutex_unlock(&pd->mutex);
-}
-
-/*
- * wrap the access to the data in an rcu_read_lock() section;
- * this is used to track the active references.
- */
-static struct prop_global *prop_get_global(struct prop_descriptor *pd)
-__acquires(RCU)
-{
-	int index;
-
-	rcu_read_lock();
-	index = pd->index;
-	/*
-	 * match the wmb from vcd_flip()
-	 */
-	smp_rmb();
-	return &pd->pg[index];
-}
-
-static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
-__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static void
-prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
-{
-	int offset = *pl_shift - new_shift;
-
-	if (!offset)
-		return;
-
-	if (offset < 0)
-		*pl_period <<= -offset;
-	else
-		*pl_period >>= offset;
-
-	*pl_shift = new_shift;
-}
-
-/*
- * PERCPU
- */
-
-#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
-{
-	raw_spin_lock_init(&pl->lock);
-	pl->shift = 0;
-	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0, gfp);
-}
-
-void prop_local_destroy_percpu(struct prop_local_percpu *pl)
-{
-	percpu_counter_destroy(&pl->events);
-}
-
-/*
- * Catch up with missed period expirations.
- *
- *   until (c_{j} == c)
- *     x_{j} -= x_{j}/2;
- *     c_{j}++;
- */
-static
-void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
-{
-	unsigned long period = 1UL << (pg->shift - 1);
-	unsigned long period_mask = ~(period - 1);
-	unsigned long global_period;
-	unsigned long flags;
-
-	global_period = percpu_counter_read(&pg->events);
-	global_period &= period_mask;
-
-	/*
-	 * Fast path - check if the local and global period count still match
-	 * outside of the lock.
-	 */
-	if (pl->period == global_period)
-		return;
-
-	raw_spin_lock_irqsave(&pl->lock, flags);
-	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
-
-	/*
-	 * For each missed period, we half the local counter.
-	 * basically:
-	 *   pl->events >> (global_period - pl->period);
-	 */
-	period = (global_period - pl->period) >> (pg->shift - 1);
-	if (period < BITS_PER_LONG) {
-		s64 val = percpu_counter_read(&pl->events);
-
-		if (val < (nr_cpu_ids * PROP_BATCH))
-			val = percpu_counter_sum(&pl->events);
-
-		__percpu_counter_add(&pl->events, -val + (val >> period),
-					PROP_BATCH);
-	} else
-		percpu_counter_set(&pl->events, 0);
-
-	pl->period = global_period;
-	raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- *   ++x_{j}, ++t
- */
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
-	struct prop_global *pg = prop_get_global(pd);
-
-	prop_norm_percpu(pg, pl);
-	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
-	percpu_counter_add(&pg->events, 1);
-	prop_put_global(pd, pg);
-}
-
-/*
- * identical to __prop_inc_percpu, except that it limits this pl's fraction to
- * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
- */
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
-			   struct prop_local_percpu *pl, long frac)
-{
-	struct prop_global *pg = prop_get_global(pd);
-
-	prop_norm_percpu(pg, pl);
-
-	if (unlikely(frac != PROP_FRAC_BASE)) {
-		unsigned long period_2 = 1UL << (pg->shift - 1);
-		unsigned long counter_mask = period_2 - 1;
-		unsigned long global_count;
-		long numerator, denominator;
-
-		numerator = percpu_counter_read_positive(&pl->events);
-		global_count = percpu_counter_read(&pg->events);
-		denominator = period_2 + (global_count & counter_mask);
-
-		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
-			goto out_put;
-	}
-
-	percpu_counter_add(&pl->events, 1);
-	percpu_counter_add(&pg->events, 1);
-
-out_put:
-	prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- *   p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_percpu(struct prop_descriptor *pd,
-		struct prop_local_percpu *pl,
-		long *numerator, long *denominator)
-{
-	struct prop_global *pg = prop_get_global(pd);
-	unsigned long period_2 = 1UL << (pg->shift - 1);
-	unsigned long counter_mask = period_2 - 1;
-	unsigned long global_count;
-
-	prop_norm_percpu(pg, pl);
-	*numerator = percpu_counter_read_positive(&pl->events);
-
-	global_count = percpu_counter_read(&pg->events);
-	*denominator = period_2 + (global_count & counter_mask);
-
-	prop_put_global(pd, pg);
-}
-
-/*
- * SINGLE
- */
-
-int prop_local_init_single(struct prop_local_single *pl)
-{
-	raw_spin_lock_init(&pl->lock);
-	pl->shift = 0;
-	pl->period = 0;
-	pl->events = 0;
-	return 0;
-}
-
-void prop_local_destroy_single(struct prop_local_single *pl)
-{
-}
-
-/*
- * Catch up with missed period expirations.
- */
-static
-void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
-{
-	unsigned long period = 1UL << (pg->shift - 1);
-	unsigned long period_mask = ~(period - 1);
-	unsigned long global_period;
-	unsigned long flags;
-
-	global_period = percpu_counter_read(&pg->events);
-	global_period &= period_mask;
-
-	/*
-	 * Fast path - check if the local and global period count still match
-	 * outside of the lock.
-	 */
-	if (pl->period == global_period)
-		return;
-
-	raw_spin_lock_irqsave(&pl->lock, flags);
-	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
-	/*
-	 * For each missed period, we half the local counter.
-	 */
-	period = (global_period - pl->period) >> (pg->shift - 1);
-	if (likely(period < BITS_PER_LONG))
-		pl->events >>= period;
-	else
-		pl->events = 0;
-	pl->period = global_period;
-	raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- *   ++x_{j}, ++t
- */
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
-	struct prop_global *pg = prop_get_global(pd);
-
-	prop_norm_single(pg, pl);
-	pl->events++;
-	percpu_counter_add(&pg->events, 1);
-	prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- *   p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_single(struct prop_descriptor *pd,
-	       	struct prop_local_single *pl,
-		long *numerator, long *denominator)
-{
-	struct prop_global *pg = prop_get_global(pd);
-	unsigned long period_2 = 1UL << (pg->shift - 1);
-	unsigned long counter_mask = period_2 - 1;
-	unsigned long global_count;
-
-	prop_norm_single(pg, pl);
-	*numerator = pl->events;
-
-	global_count = percpu_counter_read(&pg->events);
-	*denominator = period_2 + (global_count & counter_mask);
-
-	prop_put_global(pd, pg);
-}

diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index f802c2d..6f4d27c 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c

@@ -4,9 +4,9 @@
  */
 
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/mmu_context.h>
 #include <linux/export.h>
-#include <linux/sched.h>
 
 #include <asm/mmu_context.h>
 

diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c
index 59ab6ce..f0955fd 100644
--- a/sound/pci/hda/thinkpad_helper.c
+++ b/sound/pci/hda/thinkpad_helper.c

@@ -13,7 +13,7 @@
 static bool is_thinkpad(struct hda_codec *codec)
 {
 	return (codec->core.subsystem_id >> 16 == 0x17aa) &&
-	       (acpi_dev_present("LEN0068") || acpi_dev_present("IBM0068"));
+	       (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068"));
 }
 
 static void update_tpacpi_mute_led(void *private_data, int enabled)

diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index e609f08..ac60b04 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c

@@ -296,7 +296,7 @@
 	if (!drv)
 		return -ENOMEM;
 
-	drv->ts3a227e_present = acpi_dev_present("104C227E");
+	drv->ts3a227e_present = acpi_dev_found("104C227E");
 	if (!drv->ts3a227e_present) {
 		/* no need probe TI jack detection chip */
 		snd_soc_card_cht.aux_dev = NULL;

diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index 2a6f808..3f2c1ea 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c

@@ -357,7 +357,7 @@
 		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(snd_soc_cards); i++) {
-		if (acpi_dev_present(snd_soc_cards[i].codec_id)) {
+		if (acpi_dev_found(snd_soc_cards[i].codec_id)) {
 			dev_dbg(&pdev->dev,
 				"found codec %s\n", snd_soc_cards[i].codec_id);
 			card = snd_soc_cards[i].soc_card;

diff --git a/tools/Makefile b/tools/Makefile
index 60c7e6c..6bf68fe 100644
--- a/tools/Makefile
+++ b/tools/Makefile

@@ -137,7 +137,8 @@
 	$(call descend,lib/subcmd,clean)
 
 perf_clean:
-	$(call descend,$(@:_clean=),clean)
+	$(Q)mkdir -p $(PERF_O) .
+	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
 
 selftests_clean:
 	$(call descend,testing/$(@:_clean=),clean)

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 9f87861..57c8f98 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature

@@ -49,6 +49,10 @@
 	libslang			\
 	libcrypto			\
 	libunwind			\
+	libunwind-x86			\
+	libunwind-x86_64		\
+	libunwind-arm			\
+	libunwind-aarch64		\
 	pthread-attr-setaffinity-np	\
 	stackprotector-all		\
 	timerfd				\
@@ -69,7 +73,9 @@
 	libbabeltrace			\
 	liberty				\
 	liberty-z			\
-	libunwind-debug-frame
+	libunwind-debug-frame		\
+	libunwind-debug-frame-arm	\
+	libunwind-debug-frame-aarch64
 
 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
 

diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 4ae94db..3d88f09 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile

@@ -27,6 +27,12 @@
 	test-libcrypto.bin		\
 	test-libunwind.bin		\
 	test-libunwind-debug-frame.bin	\
+	test-libunwind-x86.bin		\
+	test-libunwind-x86_64.bin	\
+	test-libunwind-arm.bin		\
+	test-libunwind-aarch64.bin	\
+	test-libunwind-debug-frame-arm.bin	\
+	test-libunwind-debug-frame-aarch64.bin	\
 	test-pthread-attr-setaffinity-np.bin	\
 	test-stackprotector-all.bin	\
 	test-timerfd.bin		\
@@ -103,6 +109,23 @@
 
 $(OUTPUT)test-libunwind-debug-frame.bin:
 	$(BUILD) -lelf
+$(OUTPUT)test-libunwind-x86.bin:
+	$(BUILD) -lelf -lunwind-x86
+
+$(OUTPUT)test-libunwind-x86_64.bin:
+	$(BUILD) -lelf -lunwind-x86_64
+
+$(OUTPUT)test-libunwind-arm.bin:
+	$(BUILD) -lelf -lunwind-arm
+
+$(OUTPUT)test-libunwind-aarch64.bin:
+	$(BUILD) -lelf -lunwind-aarch64
+
+$(OUTPUT)test-libunwind-debug-frame-arm.bin:
+	$(BUILD) -lelf -lunwind-arm
+
+$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
+	$(BUILD) -lelf -lunwind-aarch64
 
 $(OUTPUT)test-libaudit.bin:
 	$(BUILD) -laudit

diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index b389026..e04ab89 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c

@@ -27,10 +27,9 @@
 	attr.log_level = 0;
 	attr.kern_version = 0;
 
-	attr = attr;
 	/*
 	 * Test existence of __NR_bpf and BPF_PROG_LOAD.
 	 * This call should fail if we run the testcase.
 	 */
-	return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr));
+	return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
 }

diff --git a/tools/build/feature/test-libunwind-aarch64.c b/tools/build/feature/test-libunwind-aarch64.c
new file mode 100644
index 0000000..fc03fb6
--- /dev/null
+++ b/tools/build/feature/test-libunwind-aarch64.c

@@ -0,0 +1,26 @@
+#include <libunwind-aarch64.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+					       unw_word_t ip,
+					       unw_dyn_info_t *di,
+					       unw_proc_info_t *pi,
+					       int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+	unw_addr_space_t addr_space;
+
+	addr_space = unw_create_addr_space(&accessors, 0);
+	if (addr_space)
+		return 0;
+
+	unw_init_remote(NULL, addr_space, NULL);
+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+	return 0;
+}

diff --git a/tools/build/feature/test-libunwind-arm.c b/tools/build/feature/test-libunwind-arm.c
new file mode 100644
index 0000000..632d95e
--- /dev/null
+++ b/tools/build/feature/test-libunwind-arm.c

@@ -0,0 +1,27 @@
+#include <libunwind-arm.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+					       unw_word_t ip,
+					       unw_dyn_info_t *di,
+					       unw_proc_info_t *pi,
+					       int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+	unw_addr_space_t addr_space;
+
+	addr_space = unw_create_addr_space(&accessors, 0);
+	if (addr_space)
+		return 0;
+
+	unw_init_remote(NULL, addr_space, NULL);
+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+	return 0;
+}

diff --git a/tools/build/feature/test-libunwind-debug-frame-aarch64.c b/tools/build/feature/test-libunwind-debug-frame-aarch64.c
new file mode 100644
index 0000000..2284467
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame-aarch64.c

@@ -0,0 +1,16 @@
+#include <libunwind-aarch64.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+				 unw_word_t ip, unw_word_t segbase,
+				 const char *obj_name, unw_word_t start,
+				 unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+	dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+	return 0;
+}

diff --git a/tools/build/feature/test-libunwind-debug-frame-arm.c b/tools/build/feature/test-libunwind-debug-frame-arm.c
new file mode 100644
index 0000000..f988596
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame-arm.c

@@ -0,0 +1,16 @@
+#include <libunwind-arm.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+				 unw_word_t ip, unw_word_t segbase,
+				 const char *obj_name, unw_word_t start,
+				 unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+	dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+	return 0;
+}

diff --git a/tools/build/feature/test-libunwind-x86.c b/tools/build/feature/test-libunwind-x86.c
new file mode 100644
index 0000000..3561edc
--- /dev/null
+++ b/tools/build/feature/test-libunwind-x86.c

@@ -0,0 +1,27 @@
+#include <libunwind-x86.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+					       unw_word_t ip,
+					       unw_dyn_info_t *di,
+					       unw_proc_info_t *pi,
+					       int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+	unw_addr_space_t addr_space;
+
+	addr_space = unw_create_addr_space(&accessors, 0);
+	if (addr_space)
+		return 0;
+
+	unw_init_remote(NULL, addr_space, NULL);
+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+	return 0;
+}

diff --git a/tools/build/feature/test-libunwind-x86_64.c b/tools/build/feature/test-libunwind-x86_64.c
new file mode 100644
index 0000000..5add251
--- /dev/null
+++ b/tools/build/feature/test-libunwind-x86_64.c

@@ -0,0 +1,27 @@
+#include <libunwind-x86_64.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+					       unw_word_t ip,
+					       unw_dyn_info_t *di,
+					       unw_proc_info_t *pi,
+					       int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+	unw_addr_space_t addr_space;
+
+	addr_space = unw_create_addr_space(&accessors, 0);
+	if (addr_space)
+		return 0;
+
+	unw_init_remote(NULL, addr_space, NULL);
+	dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+	return 0;
+}

diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 80159e6..d9836c5 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c

@@ -3351,12 +3351,18 @@
 	/* Boot protocol version: 2.07 supports the fields for lguest. */
 	boot->hdr.version = 0x207;
 
-	/* The hardware_subarch value of "1" tells the Guest it's an lguest. */
-	boot->hdr.hardware_subarch = 1;
+	/* X86_SUBARCH_LGUEST tells the Guest it's an lguest. */
+	boot->hdr.hardware_subarch = X86_SUBARCH_LGUEST;
 
 	/* Tell the entry path not to try to reload segment registers. */
 	boot->hdr.loadflags |= KEEP_SEGMENTS;
 
+	/* We don't support tboot: */
+	boot->tboot_addr = 0;
+
+	/* Ensure this is 0 to prevent APM from loading: */
+	boot->apm_bios_info.version = 0;
+
 	/* We tell the kernel to initialize the Guest. */
 	tell_kernel(start);
 

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ef78c22..08556cf 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c

@@ -351,6 +351,19 @@
 	return err;
 }
 
+int procfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+	char path[PATH_MAX];
+	const char *procfs = procfs__mountpoint();
+
+	if (!procfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/%s", procfs, entry);
+
+	return filename__read_str(path, buf, sizep);
+}
+
 int sysfs__read_ull(const char *entry, unsigned long long *value)
 {
 	char path[PATH_MAX];

diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 9f65980..16c9c2e 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h

@@ -29,6 +29,8 @@
 int filename__read_ull(const char *filename, unsigned long long *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 
+int procfs__read_str(const char *entry, char **buf, size_t *sizep);
+
 int sysctl__read_int(const char *sysctl, int *value);
 int sysfs__read_int(const char *entry, int *value);
 int sysfs__read_ull(const char *entry, unsigned long long *value);

diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index be764f9..c6c8318 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt

@@ -672,6 +672,7 @@
 	d	create a debug log
 	g	synthesize a call chain (use with i or x)
 	l	synthesize last branch entries (use with i or x)
+	s	skip initial number of events
 
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
@@ -730,6 +731,12 @@
 
 To disable trace decoding entirely, use the option --no-itrace.
 
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+	--itrace=i0nss1000000
+
+skips the first million instructions.
 
 dump option
 -----------

diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 65453f4..e2a4c5e 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt

@@ -7,6 +7,7 @@
 		d	create a debug log
 		g	synthesize a call chain (use with i or x)
 		l	synthesize last branch entries (use with i or x)
+		s       skip initial number of events
 
 	The default is all events i.e. the same as --itrace=ibxe
 
@@ -24,3 +25,10 @@
 
 	Also the number of last branch entries (default 64, max. 1024) for
 	instructions or transactions events can be specified.
+
+	It is also possible to skip events generated (instructions, branches, transactions)
+	at the beginning. This is useful to ignore initialization code.
+
+	--itrace=i0nss1000000
+
+	skips the first million instructions.

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index e9cd39a..778f54d 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt

@@ -33,7 +33,7 @@
 
 -f::
 --force::
-        Don't complain, do it.
+        Don't do ownership validation.
 
 -v::
 --verbose::

diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index d1deb57..3e9490b 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt

@@ -75,7 +75,7 @@
 
 -f::
 --force::
-       Don't complain, do it.
+        Don't do ownership validation.
 
 --symfs=<directory>::
         Look for files with symbols relative to this directory.

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index ec723d0..a126e97 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt

@@ -93,6 +93,67 @@
 You should refer to the processor specific documentation for getting these
 details. Some of them are referenced in the SEE ALSO section below.
 
+ARBITRARY PMUS
+--------------
+
+perf also supports an extended syntax for specifying raw parameters
+to PMUs. Using this typically requires looking up the specific event
+in the CPU vendor specific documentation.
+
+The available PMUs and their raw parameters can be listed with
+
+  ls /sys/devices/*/format
+
+For example the raw event "LSD.UOPS" core pmu event above could
+be specified as
+
+  perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
+
+PER SOCKET PMUS
+---------------
+
+Some PMUs are not associated with a core, but with a whole CPU socket.
+Events on these PMUs generally cannot be sampled, but only counted globally
+with perf stat -a. They can be bound to one logical CPU, but will measure
+all the CPUs in the same socket.
+
+This example measures memory bandwidth every second
+on the first memory controller on socket 0 of a Intel Xeon system
+
+  perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
+
+Each memory controller has its own PMU.  Measuring the complete system
+bandwidth would require specifying all imc PMUs (see perf list output),
+and adding the values together.
+
+This example measures the combined core power every second
+
+  perf stat -I 1000 -e power/energy-cores/  -a
+
+ACCESS RESTRICTIONS
+-------------------
+
+For non root users generally only context switched PMU events are available.
+This is normally only the events in the cpu PMU, the predefined events
+like cycles and instructions and some software events.
+
+Other PMUs and global measurements are normally root only.
+Some event qualifiers, such as "any", are also root only.
+
+This can be overriden by setting the kernel.perf_event_paranoid
+sysctl to -1, which allows non root to use these events.
+
+For accessing trace point events perf needs to have read access to
+/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
+setting.
+
+TRACING
+-------
+
+Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
+that allows low overhead execution tracing.  These are described in a separate
+intel-pt.txt document.
+
 PARAMETERIZED EVENTS
 --------------------
 
@@ -106,6 +167,50 @@
 
   perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
 
+EVENT GROUPS
+------------
+
+Perf supports time based multiplexing of events, when the number of events
+active exceeds the number of hardware performance counters. Multiplexing
+can cause measurement errors when the workload changes its execution
+profile.
+
+When metrics are computed using formulas from event counts, it is useful to
+ensure some events are always measured together as a group to minimize multiplexing
+errors. Event groups can be specified using { }.
+
+  perf stat -e '{instructions,cycles}' ...
+
+The number of available performance counters depend on the CPU. A group
+cannot contain more events than available counters.
+For example Intel Core CPUs typically have four generic performance counters
+for the core, plus three fixed counters for instructions, cycles and
+ref-cycles. Some special events have restrictions on which counter they
+can schedule, and may not support multiple instances in a single group.
+When too many events are specified in the group none of them will not
+be measured.
+
+Globally pinned events can limit the number of counters available for
+other groups. On x86 systems, the NMI watchdog pins a counter by default.
+The nmi watchdog can be disabled as root with
+
+	echo 0 > /proc/sys/kernel/nmi_watchdog
+
+Events from multiple different PMUs cannot be mixed in a group, with
+some exceptions for software events.
+
+LEADER SAMPLING
+---------------
+
+perf also supports group leader sampling using the :S specifier.
+
+  perf record -e '{cycles,instructions}:S' ...
+  perf report --group
+
+Normally all events in a event group sample, but with :S only
+the first event (the leader) samples, and it only reads the values of the
+other events in the group.
+
 OPTIONS
 -------
 
@@ -143,5 +248,5 @@
 --------
 linkperf:perf-stat[1], linkperf:perf-top[1],
 linkperf:perf-record[1],
-http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
+http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
 http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]

diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 43310d8..1d6092c 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt

@@ -48,6 +48,14 @@
 	option can be passed in record mode. It will be interpreted the same way as perf
 	record.
 
+-K::
+--all-kernel::
+	Configure all used events to run in kernel space.
+
+-U::
+--all-user::
+	Configure all used events to run in user space.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 19aa175..8dbee83 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt

@@ -347,6 +347,19 @@
 --all-user::
 Configure all used events to run in user space.
 
+--timestamp-filename
+Append timestamp to output file name.
+
+--switch-output::
+Generate multiple perf.data files, timestamp prefixed, switching to a new one
+when receiving a SIGUSR2.
+
+A possible use case is to, given an external event, slice the perf.data file
+that gets then processed, possibly via a perf script, to decide if that
+particular perf.data snapshot should be kept or not.
+
+Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 1211399..ebaf849 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt

@@ -248,7 +248,7 @@
 	Note that when using the --itrace option the synthesized callchain size
 	will override this value if the synthesized callchain size is bigger.
 
-	Default: 127
+	Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 -G::
 --inverted::
@@ -285,7 +285,7 @@
 
 -f::
 --force::
-        Don't complain, do it.
+        Don't do ownership validation.
 
 --symfs=<directory>::
         Look for files with symbols relative to this directory.

diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 8ff4df9..1cc08cc 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt

@@ -50,6 +50,22 @@
 --dump-raw-trace=::
         Display verbose dump of the sched data.
 
+OPTIONS for 'perf sched map'
+----------------------------
+
+--compact::
+	Show only CPUs with activity. Helps visualizing on high core
+	count systems.
+
+--cpus::
+	Show just entries with activities for the given CPUs.
+
+--color-cpus::
+	Highlight the given cpus.
+
+--color-pids::
+	Highlight the given pids.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 382ddfb..a856a10 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt

@@ -259,9 +259,23 @@
 --full-source-path::
 	Show the full path for source files for srcline output.
 
+--max-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        beyond the specified depth will be ignored. This is a trade-off
+        between information loss and faster processing especially for
+        workloads that can have a very long callchain stack.
+        Note that when using the --itrace option the synthesized callchain size
+        will override this value if the synthesized callchain size is bigger.
+
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
 --ns::
 	Use 9 decimal places when displaying time (i.e. show the nanoseconds)
 
+-f::
+--force::
+	Don't do ownership validation.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 19f046f..91d638d 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt

@@ -177,7 +177,7 @@
 	between information loss and faster processing especially for
 	workloads that can have a very long callchain stack.
 
-	Default: 127
+	Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
 
 --ignore-callees=<regex>::
         Ignore callees of the function(s) matching the given regex.

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 13293de..6afe201 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt

@@ -117,9 +117,41 @@
 --syscalls::
 	Trace system calls. This options is enabled by default.
 
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+        Setup and enable call-graph (stack chain/backtrace) recording.
+        See `--call-graph` section in perf-record and perf-report
+        man pages for details. The ones that are most useful in 'perf trace'
+        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
+        Using this will, for the root user, bump the value of --mmap-pages to 4
+        times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
+        sysctl. This is done only if the user doesn't specify a --mmap-pages value.
+
+--kernel-syscall-graph::
+	 Show the kernel callchains on the syscall exit path.
+
 --event::
 	Trace other events, see 'perf list' for a complete list.
 
+--max-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        beyond the specified depth will be ignored. Note that at this point
+        this is just about the presentation part, i.e. the kernel is still
+        not limiting, the overhead of callchains needs to be set via the
+        knobs in --call-graph dwarf.
+
+        Implies '--call-graph dwarf' when --call-graph not present on the
+        command line, on systems where DWARF unwinding was built in.
+
+        Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
+--min-stack::
+        Set the stack depth limit when parsing the callchain, anything
+        below the specified depth will be ignored. Disabled by default.
+
+        Implies '--call-graph dwarf' when --call-graph not present on the
+        command line, on systems where DWARF unwinding was built in.
+
 --proc-map-timeout::
 	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
 	because the file may be huge. A time out is needed in such cases.

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 000ea21..bde8cba 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -183,6 +183,11 @@
 include config/Makefile
 endif
 
+ifeq ($(config),0)
+include $(srctree)/tools/scripts/Makefile.arch
+-include arch/$(ARCH)/Makefile
+endif
+
 # The FEATURE_DUMP_EXPORT holds location of the actual
 # FEATURE_DUMP file to be used to bypass feature detection
 # (for bpf or any other subproject)
@@ -297,8 +302,6 @@
 # because maintaining the nesting to match is a pain.  If
 # we had "elif" things would have been much nicer...
 
--include arch/$(ARCH)/Makefile
-
 ifneq ($(OUTPUT),)
   CFLAGS += -I$(OUTPUT)
 endif
@@ -390,7 +393,7 @@
 __build-dir = $(subst $(OUTPUT),,$(dir $@))
 build-dir   = $(if $(__build-dir),$(__build-dir),.)
 
-prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
 
 $(OUTPUT)%.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -430,7 +433,7 @@
 
 LIBPERF_IN := $(OUTPUT)libperf-in.o
 
-$(LIBPERF_IN): fixdep FORCE
+$(LIBPERF_IN): prepare fixdep FORCE
 	$(Q)$(MAKE) $(build)=libperf
 
 $(LIB_FILE): $(LIBPERF_IN)
@@ -625,7 +628,7 @@
 	$(call QUIET_CLEAN, config)
 	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
 
-clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
@@ -662,5 +665,5 @@
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
-.PHONY: libtraceevent_plugins
+.PHONY: libtraceevent_plugins archheaders
 

diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 56e05f1..cc39309 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile

@@ -3,4 +3,5 @@
 endif
 
 HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 PERF_HAVE_JITDUMP := 1

diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 733151c..41bdf95 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c

@@ -10,19 +10,26 @@
  */
 
 #include <stddef.h>
+#include <errno.h>
+#include <string.h>
 #include <dwarf-regs.h>
-
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include "util.h"
 
 struct pt_regs_dwarfnum {
 	const char *name;
 	unsigned int dwarfnum;
+	unsigned int ptregs_offset;
 };
 
-#define STR(s) #s
-#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
-#define GPR_DWARFNUM_NAME(num)	\
-	{.name = STR(%gpr##num), .dwarfnum = num}
-#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+#define REG_DWARFNUM_NAME(r, num)					\
+		{.name = STR(%)STR(r), .dwarfnum = num,			\
+		.ptregs_offset = offsetof(struct pt_regs, r)}
+#define GPR_DWARFNUM_NAME(num)						\
+		{.name = STR(%gpr##num), .dwarfnum = num,		\
+		.ptregs_offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
 
 /*
  * Reference:
@@ -61,12 +68,12 @@
 	GPR_DWARFNUM_NAME(29),
 	GPR_DWARFNUM_NAME(30),
 	GPR_DWARFNUM_NAME(31),
-	REG_DWARFNUM_NAME("%msr",   66),
-	REG_DWARFNUM_NAME("%ctr",   109),
-	REG_DWARFNUM_NAME("%link",  108),
-	REG_DWARFNUM_NAME("%xer",   101),
-	REG_DWARFNUM_NAME("%dar",   119),
-	REG_DWARFNUM_NAME("%dsisr", 118),
+	REG_DWARFNUM_NAME(msr,   66),
+	REG_DWARFNUM_NAME(ctr,   109),
+	REG_DWARFNUM_NAME(link,  108),
+	REG_DWARFNUM_NAME(xer,   101),
+	REG_DWARFNUM_NAME(dar,   119),
+	REG_DWARFNUM_NAME(dsisr, 118),
 	REG_DWARFNUM_END,
 };
 
@@ -86,3 +93,12 @@
 			return roff->name;
 	return NULL;
 }
+
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_dwarfnum *roff;
+	for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->ptregs_offset;
+	return -EINVAL;
+}

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index bbc1a50..c6d0f91 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c

@@ -19,12 +19,6 @@
 	       ehdr.e_type == ET_DYN;
 }
 
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-void arch__elf_sym_adjust(GElf_Sym *sym)
-{
-	sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
-}
-#endif
 #endif
 
 #if !defined(_CALL_ELF) || _CALL_ELF != 2
@@ -65,18 +59,45 @@
 	return true;
 }
 
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
+{
+	s->arch_sym = sym->st_other;
+}
+#endif
+
 #define PPC64LE_LEP_OFFSET	8
 
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
-			     struct probe_trace_event *tev, struct map *map)
+			     struct probe_trace_event *tev, struct map *map,
+			     struct symbol *sym)
 {
+	int lep_offset;
+
 	/*
-	 * ppc64 ABIv2 local entry point is currently always 2 instructions
-	 * (8 bytes) after the global entry point.
+	 * When probing at a function entry point, we normally always want the
+	 * LEP since that catches calls to the function through both the GEP and
+	 * the LEP. Hence, we would like to probe at an offset of 8 bytes if
+	 * the user only specified the function entry.
+	 *
+	 * However, if the user specifies an offset, we fall back to using the
+	 * GEP since all userspace applications (objdump/readelf) show function
+	 * disassembly with offsets from the GEP.
+	 *
+	 * In addition, we shouldn't specify an offset for kretprobes.
 	 */
-	if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
-		tev->point.address += PPC64LE_LEP_OFFSET;
+	if (pev->point.offset || pev->point.retprobe || !map || !sym)
+		return;
+
+	lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+
+	if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
 		tev->point.offset += PPC64LE_LEP_OFFSET;
+	else if (lep_offset) {
+		if (pev->uprobes)
+			tev->point.address += lep_offset;
+		else
+			tev->point.offset += lep_offset;
 	}
 }
 #endif

diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 269af21..6c9211b 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile

@@ -4,3 +4,26 @@
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
 PERF_HAVE_JITDUMP := 1
+
+###
+# Syscall table generation
+#
+
+out    := $(OUTPUT)arch/x86/include/generated/asm
+header := $(out)/syscalls_64.c
+sys    := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sys)/syscall_64.tbl $(systbl)
+	@(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+        (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
+        || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true
+	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
+
+clean::
+	$(call QUIET_CLEAN, x86) $(RM) $(header)
+
+archheaders: $(header)

diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
new file mode 100644
index 0000000..cac6d17
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl

@@ -0,0 +1,376 @@
+#
+# 64-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The abi is "common", "64" or "x32" for this file.
+#
+0	common	read			sys_read
+1	common	write			sys_write
+2	common	open			sys_open
+3	common	close			sys_close
+4	common	stat			sys_newstat
+5	common	fstat			sys_newfstat
+6	common	lstat			sys_newlstat
+7	common	poll			sys_poll
+8	common	lseek			sys_lseek
+9	common	mmap			sys_mmap
+10	common	mprotect		sys_mprotect
+11	common	munmap			sys_munmap
+12	common	brk			sys_brk
+13	64	rt_sigaction		sys_rt_sigaction
+14	common	rt_sigprocmask		sys_rt_sigprocmask
+15	64	rt_sigreturn		sys_rt_sigreturn/ptregs
+16	64	ioctl			sys_ioctl
+17	common	pread64			sys_pread64
+18	common	pwrite64		sys_pwrite64
+19	64	readv			sys_readv
+20	64	writev			sys_writev
+21	common	access			sys_access
+22	common	pipe			sys_pipe
+23	common	select			sys_select
+24	common	sched_yield		sys_sched_yield
+25	common	mremap			sys_mremap
+26	common	msync			sys_msync
+27	common	mincore			sys_mincore
+28	common	madvise			sys_madvise
+29	common	shmget			sys_shmget
+30	common	shmat			sys_shmat
+31	common	shmctl			sys_shmctl
+32	common	dup			sys_dup
+33	common	dup2			sys_dup2
+34	common	pause			sys_pause
+35	common	nanosleep		sys_nanosleep
+36	common	getitimer		sys_getitimer
+37	common	alarm			sys_alarm
+38	common	setitimer		sys_setitimer
+39	common	getpid			sys_getpid
+40	common	sendfile		sys_sendfile64
+41	common	socket			sys_socket
+42	common	connect			sys_connect
+43	common	accept			sys_accept
+44	common	sendto			sys_sendto
+45	64	recvfrom		sys_recvfrom
+46	64	sendmsg			sys_sendmsg
+47	64	recvmsg			sys_recvmsg
+48	common	shutdown		sys_shutdown
+49	common	bind			sys_bind
+50	common	listen			sys_listen
+51	common	getsockname		sys_getsockname
+52	common	getpeername		sys_getpeername
+53	common	socketpair		sys_socketpair
+54	64	setsockopt		sys_setsockopt
+55	64	getsockopt		sys_getsockopt
+56	common	clone			sys_clone/ptregs
+57	common	fork			sys_fork/ptregs
+58	common	vfork			sys_vfork/ptregs
+59	64	execve			sys_execve/ptregs
+60	common	exit			sys_exit
+61	common	wait4			sys_wait4
+62	common	kill			sys_kill
+63	common	uname			sys_newuname
+64	common	semget			sys_semget
+65	common	semop			sys_semop
+66	common	semctl			sys_semctl
+67	common	shmdt			sys_shmdt
+68	common	msgget			sys_msgget
+69	common	msgsnd			sys_msgsnd
+70	common	msgrcv			sys_msgrcv
+71	common	msgctl			sys_msgctl
+72	common	fcntl			sys_fcntl
+73	common	flock			sys_flock
+74	common	fsync			sys_fsync
+75	common	fdatasync		sys_fdatasync
+76	common	truncate		sys_truncate
+77	common	ftruncate		sys_ftruncate
+78	common	getdents		sys_getdents
+79	common	getcwd			sys_getcwd
+80	common	chdir			sys_chdir
+81	common	fchdir			sys_fchdir
+82	common	rename			sys_rename
+83	common	mkdir			sys_mkdir
+84	common	rmdir			sys_rmdir
+85	common	creat			sys_creat
+86	common	link			sys_link
+87	common	unlink			sys_unlink
+88	common	symlink			sys_symlink
+89	common	readlink		sys_readlink
+90	common	chmod			sys_chmod
+91	common	fchmod			sys_fchmod
+92	common	chown			sys_chown
+93	common	fchown			sys_fchown
+94	common	lchown			sys_lchown
+95	common	umask			sys_umask
+96	common	gettimeofday		sys_gettimeofday
+97	common	getrlimit		sys_getrlimit
+98	common	getrusage		sys_getrusage
+99	common	sysinfo			sys_sysinfo
+100	common	times			sys_times
+101	64	ptrace			sys_ptrace
+102	common	getuid			sys_getuid
+103	common	syslog			sys_syslog
+104	common	getgid			sys_getgid
+105	common	setuid			sys_setuid
+106	common	setgid			sys_setgid
+107	common	geteuid			sys_geteuid
+108	common	getegid			sys_getegid
+109	common	setpgid			sys_setpgid
+110	common	getppid			sys_getppid
+111	common	getpgrp			sys_getpgrp
+112	common	setsid			sys_setsid
+113	common	setreuid		sys_setreuid
+114	common	setregid		sys_setregid
+115	common	getgroups		sys_getgroups
+116	common	setgroups		sys_setgroups
+117	common	setresuid		sys_setresuid
+118	common	getresuid		sys_getresuid
+119	common	setresgid		sys_setresgid
+120	common	getresgid		sys_getresgid
+121	common	getpgid			sys_getpgid
+122	common	setfsuid		sys_setfsuid
+123	common	setfsgid		sys_setfsgid
+124	common	getsid			sys_getsid
+125	common	capget			sys_capget
+126	common	capset			sys_capset
+127	64	rt_sigpending		sys_rt_sigpending
+128	64	rt_sigtimedwait		sys_rt_sigtimedwait
+129	64	rt_sigqueueinfo		sys_rt_sigqueueinfo
+130	common	rt_sigsuspend		sys_rt_sigsuspend
+131	64	sigaltstack		sys_sigaltstack
+132	common	utime			sys_utime
+133	common	mknod			sys_mknod
+134	64	uselib
+135	common	personality		sys_personality
+136	common	ustat			sys_ustat
+137	common	statfs			sys_statfs
+138	common	fstatfs			sys_fstatfs
+139	common	sysfs			sys_sysfs
+140	common	getpriority		sys_getpriority
+141	common	setpriority		sys_setpriority
+142	common	sched_setparam		sys_sched_setparam
+143	common	sched_getparam		sys_sched_getparam
+144	common	sched_setscheduler	sys_sched_setscheduler
+145	common	sched_getscheduler	sys_sched_getscheduler
+146	common	sched_get_priority_max	sys_sched_get_priority_max
+147	common	sched_get_priority_min	sys_sched_get_priority_min
+148	common	sched_rr_get_interval	sys_sched_rr_get_interval
+149	common	mlock			sys_mlock
+150	common	munlock			sys_munlock
+151	common	mlockall		sys_mlockall
+152	common	munlockall		sys_munlockall
+153	common	vhangup			sys_vhangup
+154	common	modify_ldt		sys_modify_ldt
+155	common	pivot_root		sys_pivot_root
+156	64	_sysctl			sys_sysctl
+157	common	prctl			sys_prctl
+158	common	arch_prctl		sys_arch_prctl
+159	common	adjtimex		sys_adjtimex
+160	common	setrlimit		sys_setrlimit
+161	common	chroot			sys_chroot
+162	common	sync			sys_sync
+163	common	acct			sys_acct
+164	common	settimeofday		sys_settimeofday
+165	common	mount			sys_mount
+166	common	umount2			sys_umount
+167	common	swapon			sys_swapon
+168	common	swapoff			sys_swapoff
+169	common	reboot			sys_reboot
+170	common	sethostname		sys_sethostname
+171	common	setdomainname		sys_setdomainname
+172	common	iopl			sys_iopl/ptregs
+173	common	ioperm			sys_ioperm
+174	64	create_module
+175	common	init_module		sys_init_module
+176	common	delete_module		sys_delete_module
+177	64	get_kernel_syms
+178	64	query_module
+179	common	quotactl		sys_quotactl
+180	64	nfsservctl
+181	common	getpmsg
+182	common	putpmsg
+183	common	afs_syscall
+184	common	tuxcall
+185	common	security
+186	common	gettid			sys_gettid
+187	common	readahead		sys_readahead
+188	common	setxattr		sys_setxattr
+189	common	lsetxattr		sys_lsetxattr
+190	common	fsetxattr		sys_fsetxattr
+191	common	getxattr		sys_getxattr
+192	common	lgetxattr		sys_lgetxattr
+193	common	fgetxattr		sys_fgetxattr
+194	common	listxattr		sys_listxattr
+195	common	llistxattr		sys_llistxattr
+196	common	flistxattr		sys_flistxattr
+197	common	removexattr		sys_removexattr
+198	common	lremovexattr		sys_lremovexattr
+199	common	fremovexattr		sys_fremovexattr
+200	common	tkill			sys_tkill
+201	common	time			sys_time
+202	common	futex			sys_futex
+203	common	sched_setaffinity	sys_sched_setaffinity
+204	common	sched_getaffinity	sys_sched_getaffinity
+205	64	set_thread_area
+206	64	io_setup		sys_io_setup
+207	common	io_destroy		sys_io_destroy
+208	common	io_getevents		sys_io_getevents
+209	64	io_submit		sys_io_submit
+210	common	io_cancel		sys_io_cancel
+211	64	get_thread_area
+212	common	lookup_dcookie		sys_lookup_dcookie
+213	common	epoll_create		sys_epoll_create
+214	64	epoll_ctl_old
+215	64	epoll_wait_old
+216	common	remap_file_pages	sys_remap_file_pages
+217	common	getdents64		sys_getdents64
+218	common	set_tid_address		sys_set_tid_address
+219	common	restart_syscall		sys_restart_syscall
+220	common	semtimedop		sys_semtimedop
+221	common	fadvise64		sys_fadvise64
+222	64	timer_create		sys_timer_create
+223	common	timer_settime		sys_timer_settime
+224	common	timer_gettime		sys_timer_gettime
+225	common	timer_getoverrun	sys_timer_getoverrun
+226	common	timer_delete		sys_timer_delete
+227	common	clock_settime		sys_clock_settime
+228	common	clock_gettime		sys_clock_gettime
+229	common	clock_getres		sys_clock_getres
+230	common	clock_nanosleep		sys_clock_nanosleep
+231	common	exit_group		sys_exit_group
+232	common	epoll_wait		sys_epoll_wait
+233	common	epoll_ctl		sys_epoll_ctl
+234	common	tgkill			sys_tgkill
+235	common	utimes			sys_utimes
+236	64	vserver
+237	common	mbind			sys_mbind
+238	common	set_mempolicy		sys_set_mempolicy
+239	common	get_mempolicy		sys_get_mempolicy
+240	common	mq_open			sys_mq_open
+241	common	mq_unlink		sys_mq_unlink
+242	common	mq_timedsend		sys_mq_timedsend
+243	common	mq_timedreceive		sys_mq_timedreceive
+244	64	mq_notify		sys_mq_notify
+245	common	mq_getsetattr		sys_mq_getsetattr
+246	64	kexec_load		sys_kexec_load
+247	64	waitid			sys_waitid
+248	common	add_key			sys_add_key
+249	common	request_key		sys_request_key
+250	common	keyctl			sys_keyctl
+251	common	ioprio_set		sys_ioprio_set
+252	common	ioprio_get		sys_ioprio_get
+253	common	inotify_init		sys_inotify_init
+254	common	inotify_add_watch	sys_inotify_add_watch
+255	common	inotify_rm_watch	sys_inotify_rm_watch
+256	common	migrate_pages		sys_migrate_pages
+257	common	openat			sys_openat
+258	common	mkdirat			sys_mkdirat
+259	common	mknodat			sys_mknodat
+260	common	fchownat		sys_fchownat
+261	common	futimesat		sys_futimesat
+262	common	newfstatat		sys_newfstatat
+263	common	unlinkat		sys_unlinkat
+264	common	renameat		sys_renameat
+265	common	linkat			sys_linkat
+266	common	symlinkat		sys_symlinkat
+267	common	readlinkat		sys_readlinkat
+268	common	fchmodat		sys_fchmodat
+269	common	faccessat		sys_faccessat
+270	common	pselect6		sys_pselect6
+271	common	ppoll			sys_ppoll
+272	common	unshare			sys_unshare
+273	64	set_robust_list		sys_set_robust_list
+274	64	get_robust_list		sys_get_robust_list
+275	common	splice			sys_splice
+276	common	tee			sys_tee
+277	common	sync_file_range		sys_sync_file_range
+278	64	vmsplice		sys_vmsplice
+279	64	move_pages		sys_move_pages
+280	common	utimensat		sys_utimensat
+281	common	epoll_pwait		sys_epoll_pwait
+282	common	signalfd		sys_signalfd
+283	common	timerfd_create		sys_timerfd_create
+284	common	eventfd			sys_eventfd
+285	common	fallocate		sys_fallocate
+286	common	timerfd_settime		sys_timerfd_settime
+287	common	timerfd_gettime		sys_timerfd_gettime
+288	common	accept4			sys_accept4
+289	common	signalfd4		sys_signalfd4
+290	common	eventfd2		sys_eventfd2
+291	common	epoll_create1		sys_epoll_create1
+292	common	dup3			sys_dup3
+293	common	pipe2			sys_pipe2
+294	common	inotify_init1		sys_inotify_init1
+295	64	preadv			sys_preadv
+296	64	pwritev			sys_pwritev
+297	64	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo
+298	common	perf_event_open		sys_perf_event_open
+299	64	recvmmsg		sys_recvmmsg
+300	common	fanotify_init		sys_fanotify_init
+301	common	fanotify_mark		sys_fanotify_mark
+302	common	prlimit64		sys_prlimit64
+303	common	name_to_handle_at	sys_name_to_handle_at
+304	common	open_by_handle_at	sys_open_by_handle_at
+305	common	clock_adjtime		sys_clock_adjtime
+306	common	syncfs			sys_syncfs
+307	64	sendmmsg		sys_sendmmsg
+308	common	setns			sys_setns
+309	common	getcpu			sys_getcpu
+310	64	process_vm_readv	sys_process_vm_readv
+311	64	process_vm_writev	sys_process_vm_writev
+312	common	kcmp			sys_kcmp
+313	common	finit_module		sys_finit_module
+314	common	sched_setattr		sys_sched_setattr
+315	common	sched_getattr		sys_sched_getattr
+316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
+318	common	getrandom		sys_getrandom
+319	common	memfd_create		sys_memfd_create
+320	common	kexec_file_load		sys_kexec_file_load
+321	common	bpf			sys_bpf
+322	64	execveat		sys_execveat/ptregs
+323	common	userfaultfd		sys_userfaultfd
+324	common	membarrier		sys_membarrier
+325	common	mlock2			sys_mlock2
+326	common	copy_file_range		sys_copy_file_range
+327	64	preadv2			sys_preadv2
+328	64	pwritev2		sys_pwritev2
+
+#
+# x32-specific system call numbers start at 512 to avoid cache impact
+# for native 64-bit operation.
+#
+512	x32	rt_sigaction		compat_sys_rt_sigaction
+513	x32	rt_sigreturn		sys32_x32_rt_sigreturn
+514	x32	ioctl			compat_sys_ioctl
+515	x32	readv			compat_sys_readv
+516	x32	writev			compat_sys_writev
+517	x32	recvfrom		compat_sys_recvfrom
+518	x32	sendmsg			compat_sys_sendmsg
+519	x32	recvmsg			compat_sys_recvmsg
+520	x32	execve			compat_sys_execve/ptregs
+521	x32	ptrace			compat_sys_ptrace
+522	x32	rt_sigpending		compat_sys_rt_sigpending
+523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
+524	x32	rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+525	x32	sigaltstack		compat_sys_sigaltstack
+526	x32	timer_create		compat_sys_timer_create
+527	x32	mq_notify		compat_sys_mq_notify
+528	x32	kexec_load		compat_sys_kexec_load
+529	x32	waitid			compat_sys_waitid
+530	x32	set_robust_list		compat_sys_set_robust_list
+531	x32	get_robust_list		compat_sys_get_robust_list
+532	x32	vmsplice		compat_sys_vmsplice
+533	x32	move_pages		compat_sys_move_pages
+534	x32	preadv			compat_sys_preadv64
+535	x32	pwritev			compat_sys_pwritev64
+536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
+537	x32	recvmmsg		compat_sys_recvmmsg
+538	x32	sendmmsg		compat_sys_sendmmsg
+539	x32	process_vm_readv	compat_sys_process_vm_readv
+540	x32	process_vm_writev	compat_sys_process_vm_writev
+541	x32	setsockopt		compat_sys_setsockopt
+542	x32	getsockopt		compat_sys_getsockopt
+543	x32	io_setup		compat_sys_io_setup
+544	x32	io_submit		compat_sys_io_submit
+545	x32	execveat		compat_sys_execveat/ptregs

diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
new file mode 100755
index 0000000..49a18b9
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh

@@ -0,0 +1,39 @@
+#!/bin/sh
+
+in="$1"
+arch="$2"
+
+syscall_macro() {
+    nr="$1"
+    name="$2"
+
+    echo "	[$nr] = \"$name\","
+}
+
+emit() {
+    nr="$1"
+    entry="$2"
+
+    syscall_macro "$nr" "$entry"
+}
+
+echo "static const char *syscalltbl_${arch}[] = {"
+
+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
+grep '^[0-9]' "$in" | sort -n > $sorted_table
+
+max_nr=0
+while read nr abi name entry compat; do
+    if [ $nr -ge 512 ] ; then # discard compat sycalls
+        break
+    fi
+
+    emit "$nr" "$name"
+    max_nr=$nr
+done < $sorted_table
+
+rm -f $sorted_table
+
+echo "};"
+
+echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"

diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 9d29ee2..d4aa567 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c

@@ -71,7 +71,7 @@
 
 	CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-	perf_evlist__config(evlist, &opts);
+	perf_evlist__config(evlist, &opts, NULL);
 
 	evsel = perf_evlist__first(evlist);
 

diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index d66f9ad..7dc3063 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c

@@ -438,6 +438,11 @@
 	if (!intel_bts_pmu)
 		return NULL;
 
+	if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+		*err = -errno;
+		return NULL;
+	}
+
 	btsr = zalloc(sizeof(struct intel_bts_recording));
 	if (!btsr) {
 		*err = -ENOMEM;

diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a339517..a07b960 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c

@@ -1027,6 +1027,11 @@
 	if (!intel_pt_pmu)
 		return NULL;
 
+	if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+		*err = -errno;
+		return NULL;
+	}
+
 	ptr = zalloc(sizeof(struct intel_pt_recording));
 	if (!ptr) {
 		*err = -ENOMEM;

diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index fd28684..357f1b1 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c

@@ -7,7 +7,6 @@
 #include <linux/types.h>
 #include "../../util/debug.h"
 #include "../../util/tsc.h"
-#include "tsc.h"
 
 int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
 			     struct perf_tsc_conversion *tc)
@@ -46,3 +45,34 @@
 
 	return low | ((u64)high) << 32;
 }
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+				struct perf_tool *tool,
+				perf_event__handler_t process,
+				struct machine *machine)
+{
+	union perf_event event = {
+		.time_conv = {
+			.header = {
+				.type = PERF_RECORD_TIME_CONV,
+				.size = sizeof(struct time_conv_event),
+			},
+		},
+	};
+	struct perf_tsc_conversion tc;
+	int err;
+
+	err = perf_read_tsc_conversion(pc, &tc);
+	if (err == -EOPNOTSUPP)
+		return 0;
+	if (err)
+		return err;
+
+	pr_debug2("Synthesizing TSC conversion information\n");
+
+	event.time_conv.time_mult  = tc.time_mult;
+	event.time_conv.time_shift = tc.time_shift;
+	event.time_conv.time_zero  = tc.time_zero;
+
+	return process(tool, &event, NULL, machine);
+}

diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
deleted file mode 100644
index 2edc4d3..0000000
--- a/tools/perf/arch/x86/util/tsc.h
+++ /dev/null

@@ -1,17 +0,0 @@
-#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-
-#include <linux/types.h>
-
-struct perf_tsc_conversion {
-	u16 time_shift;
-	u32 time_mult;
-	u64 time_zero;
-};
-
-struct perf_event_mmap_page;
-
-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
-			     struct perf_tsc_conversion *tc);
-
-#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */

diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 6a18ce2..6952db6 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c

@@ -83,7 +83,7 @@
 	do {
 		int ret;
 	again:
-		ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
+		ret = futex_lock_pi(w->futex, NULL, futex_flag);
 
 		if (ret) { /* handle lock acquisition */
 			if (!silent)

diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index d44de9f..b2e06d1 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h

@@ -57,13 +57,11 @@
 
 /**
  * futex_lock_pi() - block on uaddr as a PI mutex
- * @detect:	whether (1) or not (0) to perform deadlock detection
  */
 static inline int
-futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
-	      int opflags)
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
 {
-	return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+	return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
 }
 
 /**

diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index a91aa85..2b54d0f 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c

@@ -6,6 +6,7 @@
  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  */
 
+#include "debug.h"
 #include "../perf.h"
 #include "../util/util.h"
 #include <subcmd/parse-options.h>
@@ -63,14 +64,16 @@
 	.config		= PERF_COUNT_HW_CPU_CYCLES
 };
 
-static void init_cycles(void)
+static int init_cycles(void)
 {
 	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
 
-	if (cycles_fd < 0 && errno == ENOSYS)
-		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
-	else
-		BUG_ON(cycles_fd < 0);
+	if (cycles_fd < 0 && errno == ENOSYS) {
+		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+		return -1;
+	}
+
+	return cycles_fd;
 }
 
 static u64 get_cycles(void)
@@ -155,8 +158,13 @@
 
 	argc = parse_options(argc, argv, options, info->usage, 0);
 
-	if (use_cycles)
-		init_cycles();
+	if (use_cycles) {
+		i = init_cycles();
+		if (i < 0) {
+			fprintf(stderr, "Failed to open cycles counter\n");
+			return i;
+		}
+	}
 
 	size = (size_t)perf_atoll((char *)size_str);
 	size_total = (double)size * nr_loops;

diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index c42448e..fe1b77f 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c

@@ -12,6 +12,7 @@
 #include <subcmd/parse-options.h>
 #include "util/util.h"
 #include "util/debug.h"
+#include "util/config.h"
 
 static bool use_system_config, use_user_config;
 
@@ -32,13 +33,28 @@
 	OPT_END()
 };
 
-static int show_config(const char *key, const char *value,
-		       void *cb __maybe_unused)
+static int show_config(struct perf_config_set *set)
 {
-	if (value)
-		printf("%s=%s\n", key, value);
-	else
-		printf("%s\n", key);
+	struct perf_config_section *section;
+	struct perf_config_item *item;
+	struct list_head *sections;
+
+	if (set == NULL)
+		return -1;
+
+	sections = &set->sections;
+	if (list_empty(sections))
+		return -1;
+
+	list_for_each_entry(section, sections, node) {
+		list_for_each_entry(item, &section->items, node) {
+			char *value = item->value;
+
+			if (value)
+				printf("%s.%s=%s\n", section->name,
+				       item->name, value);
+		}
+	}
 
 	return 0;
 }
@@ -46,6 +62,7 @@
 int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	int ret = 0;
+	struct perf_config_set *set;
 	char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
 
 	argc = parse_options(argc, argv, config_options, config_usage,
@@ -63,13 +80,19 @@
 	else if (use_user_config)
 		config_exclusive_filename = user_config;
 
+	set = perf_config_set__new();
+	if (!set) {
+		ret = -1;
+		goto out_err;
+	}
+
 	switch (actions) {
 	case ACTION_LIST:
 		if (argc) {
 			pr_err("Error: takes no arguments\n");
 			parse_options_usage(config_usage, config_options, "l", 1);
 		} else {
-			ret = perf_config(show_config, NULL);
+			ret = show_config(set);
 			if (ret < 0) {
 				const char * config_filename = config_exclusive_filename;
 				if (!config_exclusive_filename)
@@ -83,5 +106,7 @@
 		usage_with_options(config_usage, config_options);
 	}
 
+	perf_config_set__delete(set);
+out_err:
 	return ret;
 }

diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8053a8c..9ce354f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c

@@ -428,7 +428,7 @@
 	struct rb_root *root;
 	struct rb_node *next;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;
@@ -450,7 +450,7 @@
 	struct rb_root *root;
 	struct rb_node *next;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;

diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index bc1de9b..f9830c9 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c

@@ -61,6 +61,7 @@
 	struct child_process ec_process;
 	const char *argv_ec[] = { "emacsclient", "--version", NULL };
 	int version;
+	int ret = -1;
 
 	/* emacsclient prints its version number on stderr */
 	memset(&ec_process, 0, sizeof(ec_process));
@@ -71,7 +72,10 @@
 		fprintf(stderr, "Failed to start emacsclient.\n");
 		return -1;
 	}
-	strbuf_read(&buffer, ec_process.err, 20);
+	if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
+		fprintf(stderr, "Failed to read emacsclient version\n");
+		goto out;
+	}
 	close(ec_process.err);
 
 	/*
@@ -82,8 +86,7 @@
 
 	if (prefixcmp(buffer.buf, "emacsclient")) {
 		fprintf(stderr, "Failed to parse emacsclient version.\n");
-		strbuf_release(&buffer);
-		return -1;
+		goto out;
 	}
 
 	version = atoi(buffer.buf + strlen("emacsclient"));
@@ -92,12 +95,11 @@
 		fprintf(stderr,
 			"emacsclient version '%d' too old (< 22).\n",
 			version);
-		strbuf_release(&buffer);
-		return -1;
-	}
-
+	} else
+		ret = 0;
+out:
 	strbuf_release(&buffer);
-	return 0;
+	return ret;
 }
 
 static void exec_woman_emacs(const char *path, const char *page)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index d1a2d10..e5afa8f 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c

@@ -748,6 +748,7 @@
 			.auxtrace_info	= perf_event__repipe_op2_synth,
 			.auxtrace	= perf_event__repipe_auxtrace,
 			.auxtrace_error	= perf_event__repipe_op2_synth,
+			.time_conv	= perf_event__repipe_op2_synth,
 			.finished_round	= perf_event__repipe_oe_synth,
 			.build_id	= perf_event__repipe_op2_synth,
 			.id_index	= perf_event__repipe_op2_synth,

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index c9cb3be..58adfee 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c

@@ -375,7 +375,7 @@
 	}
 
 	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
-	sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+	sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
 
 	callchain_cursor_commit(&callchain_cursor);
 	while (true) {

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index bff6664..6487c06 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c

@@ -982,7 +982,7 @@
 	struct perf_evlist *evlist = kvm->evlist;
 	char sbuf[STRERR_BUFSIZE];
 
-	perf_evlist__config(evlist, &kvm->opts);
+	perf_evlist__config(evlist, &kvm->opts, NULL);
 
 	/*
 	 * Note: exclude_{guest,host} do not apply here.

diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 85db3be..1dc140c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c

@@ -62,19 +62,22 @@
 	int rec_argc, i = 0, j;
 	const char **rec_argv;
 	int ret;
+	bool all_user = false, all_kernel = false;
 	struct option options[] = {
 	OPT_CALLBACK('e', "event", &mem, "event",
 		     "event selector. use 'perf mem record -e list' to list available events",
 		     parse_record_events),
 	OPT_INCR('v', "verbose", &verbose,
 		 "be more verbose (show counter open errors, etc)"),
+	OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
+	OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"),
 	OPT_END()
 	};
 
 	argc = parse_options(argc, argv, options, record_mem_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 
-	rec_argc = argc + 7; /* max number of arguments */
+	rec_argc = argc + 9; /* max number of arguments */
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 	if (!rec_argv)
 		return -1;
@@ -103,6 +106,12 @@
 		rec_argv[i++] = perf_mem_events__name(j);
 	};
 
+	if (all_user)
+		rec_argv[i++] = "--all-user";
+
+	if (all_kernel)
+		rec_argv[i++] = "--all-kernel";
+
 	for (j = 0; j < argc; j++, i++)
 		rec_argv[i] = argv[j];
 

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 515510e..f3679c4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c

@@ -29,10 +29,12 @@
 #include "util/data.h"
 #include "util/perf_regs.h"
 #include "util/auxtrace.h"
+#include "util/tsc.h"
 #include "util/parse-branch-options.h"
 #include "util/parse-regs-options.h"
 #include "util/llvm-utils.h"
 #include "util/bpf-loader.h"
+#include "util/trigger.h"
 #include "asm/bug.h"
 
 #include <unistd.h>
@@ -55,6 +57,8 @@
 	bool			no_buildid_cache;
 	bool			no_buildid_cache_set;
 	bool			buildid_all;
+	bool			timestamp_filename;
+	bool			switch_output;
 	unsigned long long	samples;
 };
 
@@ -124,9 +128,10 @@
 static volatile int done;
 static volatile int signr = -1;
 static volatile int child_finished;
-static volatile int auxtrace_snapshot_enabled;
-static volatile int auxtrace_snapshot_err;
+
 static volatile int auxtrace_record__snapshot_started;
+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
+static DEFINE_TRIGGER(switch_output_trigger);
 
 static void sig_handler(int sig)
 {
@@ -244,11 +249,12 @@
 {
 	pr_debug("Recording AUX area tracing snapshot\n");
 	if (record__auxtrace_read_snapshot_all(rec) < 0) {
-		auxtrace_snapshot_err = -1;
+		trigger_error(&auxtrace_snapshot_trigger);
 	} else {
-		auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
-		if (!auxtrace_snapshot_err)
-			auxtrace_snapshot_enabled = 1;
+		if (auxtrace_record__snapshot_finish(rec->itr))
+			trigger_error(&auxtrace_snapshot_trigger);
+		else
+			trigger_ready(&auxtrace_snapshot_trigger);
 	}
 }
 
@@ -283,7 +289,7 @@
 	struct record_opts *opts = &rec->opts;
 	int rc = 0;
 
-	perf_evlist__config(evlist, opts);
+	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each(evlist, pos) {
 try_again:
@@ -494,6 +500,73 @@
 	return;
 }
 
+static int record__synthesize_workload(struct record *rec)
+{
+	struct {
+		struct thread_map map;
+		struct thread_map_data map_data;
+	} thread_map;
+
+	thread_map.map.nr = 1;
+	thread_map.map.map[0].pid = rec->evlist->workload.pid;
+	thread_map.map.map[0].comm = NULL;
+	return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
+						 process_synthesized_event,
+						 &rec->session->machines.host,
+						 rec->opts.sample_address,
+						 rec->opts.proc_map_timeout);
+}
+
+static int record__synthesize(struct record *rec);
+
+static int
+record__switch_output(struct record *rec, bool at_exit)
+{
+	struct perf_data_file *file = &rec->file;
+	int fd, err;
+
+	/* Same Size:      "2015122520103046"*/
+	char timestamp[] = "InvalidTimestamp";
+
+	rec->samples = 0;
+	record__finish_output(rec);
+	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
+	if (err) {
+		pr_err("Failed to get current timestamp\n");
+		return -EINVAL;
+	}
+
+	fd = perf_data_file__switch(file, timestamp,
+				    rec->session->header.data_offset,
+				    at_exit);
+	if (fd >= 0 && !at_exit) {
+		rec->bytes_written = 0;
+		rec->session->header.data_size = 0;
+	}
+
+	if (!quiet)
+		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
+			file->path, timestamp);
+
+	/* Output tracking events */
+	if (!at_exit) {
+		record__synthesize(rec);
+
+		/*
+		 * In 'perf record --switch-output' without -a,
+		 * record__synthesize() in record__switch_output() won't
+		 * generate tracking events because there's no thread_map
+		 * in evlist. Which causes newly created perf.data doesn't
+		 * contain map and comm information.
+		 * Create a fake thread_map and directly call
+		 * perf_event__synthesize_thread_map() for those events.
+		 */
+		if (target__none(&rec->opts.target))
+			record__synthesize_workload(rec);
+	}
+	return fd;
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -512,6 +585,15 @@
 
 static void snapshot_sig_handler(int sig);
 
+int __weak
+perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+			    struct perf_tool *tool __maybe_unused,
+			    perf_event__handler_t process __maybe_unused,
+			    struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
 static int record__synthesize(struct record *rec)
 {
 	struct perf_session *session = rec->session;
@@ -549,6 +631,11 @@
 		}
 	}
 
+	err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+					  process_synthesized_event, machine);
+	if (err)
+		goto out;
+
 	if (rec->opts.full_auxtrace) {
 		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
 					session, process_synthesized_event);
@@ -600,10 +687,16 @@
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 	signal(SIGTERM, sig_handler);
-	if (rec->opts.auxtrace_snapshot_mode)
+
+	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
 		signal(SIGUSR2, snapshot_sig_handler);
-	else
+		if (rec->opts.auxtrace_snapshot_mode)
+			trigger_on(&auxtrace_snapshot_trigger);
+		if (rec->switch_output)
+			trigger_on(&switch_output_trigger);
+	} else {
 		signal(SIGUSR2, SIG_IGN);
+	}
 
 	session = perf_session__new(file, false, tool);
 	if (session == NULL) {
@@ -729,27 +822,45 @@
 		perf_evlist__enable(rec->evlist);
 	}
 
-	auxtrace_snapshot_enabled = 1;
+	trigger_ready(&auxtrace_snapshot_trigger);
+	trigger_ready(&switch_output_trigger);
 	for (;;) {
 		unsigned long long hits = rec->samples;
 
 		if (record__mmap_read_all(rec) < 0) {
-			auxtrace_snapshot_enabled = 0;
+			trigger_error(&auxtrace_snapshot_trigger);
+			trigger_error(&switch_output_trigger);
 			err = -1;
 			goto out_child;
 		}
 
 		if (auxtrace_record__snapshot_started) {
 			auxtrace_record__snapshot_started = 0;
-			if (!auxtrace_snapshot_err)
+			if (!trigger_is_error(&auxtrace_snapshot_trigger))
 				record__read_auxtrace_snapshot(rec);
-			if (auxtrace_snapshot_err) {
+			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
 				pr_err("AUX area tracing snapshot failed\n");
 				err = -1;
 				goto out_child;
 			}
 		}
 
+		if (trigger_is_hit(&switch_output_trigger)) {
+			trigger_ready(&switch_output_trigger);
+
+			if (!quiet)
+				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
+					waking);
+			waking = 0;
+			fd = record__switch_output(rec, false);
+			if (fd < 0) {
+				pr_err("Failed to switch to new file\n");
+				trigger_error(&switch_output_trigger);
+				err = fd;
+				goto out_child;
+			}
+		}
+
 		if (hits == rec->samples) {
 			if (done || draining)
 				break;
@@ -772,12 +883,13 @@
 		 * disable events in this case.
 		 */
 		if (done && !disabled && !target__none(&opts->target)) {
-			auxtrace_snapshot_enabled = 0;
+			trigger_off(&auxtrace_snapshot_trigger);
 			perf_evlist__disable(rec->evlist);
 			disabled = true;
 		}
 	}
-	auxtrace_snapshot_enabled = 0;
+	trigger_off(&auxtrace_snapshot_trigger);
+	trigger_off(&switch_output_trigger);
 
 	if (forks && workload_exec_errno) {
 		char msg[STRERR_BUFSIZE];
@@ -811,11 +923,22 @@
 	/* this will be recalculated during process_buildids() */
 	rec->samples = 0;
 
-	if (!err)
-		record__finish_output(rec);
+	if (!err) {
+		if (!rec->timestamp_filename) {
+			record__finish_output(rec);
+		} else {
+			fd = record__switch_output(rec, true);
+			if (fd < 0) {
+				status = fd;
+				goto out_delete_session;
+			}
+		}
+	}
 
 	if (!err && !quiet) {
 		char samples[128];
+		const char *postfix = rec->timestamp_filename ?
+					".<timestamp>" : "";
 
 		if (rec->samples && !rec->opts.full_auxtrace)
 			scnprintf(samples, sizeof(samples),
@@ -823,9 +946,9 @@
 		else
 			samples[0] = '\0';
 
-		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s ]\n",
+		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
 			perf_data_file__size(file) / 1024.0 / 1024.0,
-			file->path, samples);
+			file->path, postfix, samples);
 	}
 
 out_delete_session:
@@ -833,58 +956,61 @@
 	return status;
 }
 
-static void callchain_debug(void)
+static void callchain_debug(struct callchain_param *callchain)
 {
 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
 
-	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
 
-	if (callchain_param.record_mode == CALLCHAIN_DWARF)
+	if (callchain->record_mode == CALLCHAIN_DWARF)
 		pr_debug("callchain: stack dump size %d\n",
-			 callchain_param.dump_size);
+			 callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset)
+{
+	int ret;
+	callchain->enabled = !unset;
+
+	/* --no-call-graph */
+	if (unset) {
+		callchain->record_mode = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = parse_callchain_record_opt(arg, callchain);
+	if (!ret) {
+		/* Enable data address sampling for DWARF unwind. */
+		if (callchain->record_mode == CALLCHAIN_DWARF)
+			record->sample_address = true;
+		callchain_debug(callchain);
+	}
+
+	return ret;
 }
 
 int record_parse_callchain_opt(const struct option *opt,
 			       const char *arg,
 			       int unset)
 {
-	int ret;
-	struct record_opts *record = (struct record_opts *)opt->value;
-
-	record->callgraph_set = true;
-	callchain_param.enabled = !unset;
-
-	/* --no-call-graph */
-	if (unset) {
-		callchain_param.record_mode = CALLCHAIN_NONE;
-		pr_debug("callchain: disabled\n");
-		return 0;
-	}
-
-	ret = parse_callchain_record_opt(arg, &callchain_param);
-	if (!ret) {
-		/* Enable data address sampling for DWARF unwind. */
-		if (callchain_param.record_mode == CALLCHAIN_DWARF)
-			record->sample_address = true;
-		callchain_debug();
-	}
-
-	return ret;
+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
 }
 
 int record_callchain_opt(const struct option *opt,
 			 const char *arg __maybe_unused,
 			 int unset __maybe_unused)
 {
-	struct record_opts *record = (struct record_opts *)opt->value;
+	struct callchain_param *callchain = opt->value;
 
-	record->callgraph_set = true;
-	callchain_param.enabled = true;
+	callchain->enabled = true;
 
-	if (callchain_param.record_mode == CALLCHAIN_NONE)
-		callchain_param.record_mode = CALLCHAIN_FP;
+	if (callchain->record_mode == CALLCHAIN_NONE)
+		callchain->record_mode = CALLCHAIN_FP;
 
-	callchain_debug();
+	callchain_debug(callchain);
 	return 0;
 }
 
@@ -1122,7 +1248,7 @@
 		     record__parse_mmap_pages),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
-	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
+	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
 			   NULL, "enables call-graph recording" ,
 			   &record_callchain_opt),
 	OPT_CALLBACK(0, "call-graph", &record.opts,
@@ -1195,6 +1321,10 @@
 		   "file", "vmlinux pathname"),
 	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
 		    "Record build-id of all DSOs regardless of hits"),
+	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
+		    "append timestamp to output filename"),
+	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
+		    "Switch output when receive SIGUSR2"),
 	OPT_END()
 };
 
@@ -1250,6 +1380,9 @@
 		return -EINVAL;
 	}
 
+	if (rec->switch_output)
+		rec->timestamp_filename = true;
+
 	if (!rec->itr) {
 		rec->itr = auxtrace_record__init(rec->evlist, &err);
 		if (err)
@@ -1261,6 +1394,14 @@
 	if (err)
 		return err;
 
+	err = bpf__setup_stdout(rec->evlist);
+	if (err) {
+		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
+		pr_err("ERROR: Setup BPF stdout failed: %s\n",
+			 errbuf);
+		return err;
+	}
+
 	err = -ENOMEM;
 
 	symbol__init(NULL);
@@ -1275,8 +1416,36 @@
 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
 "even with a suitable vmlinux or kallsyms file.\n\n");
 
-	if (rec->no_buildid_cache || rec->no_buildid)
+	if (rec->no_buildid_cache || rec->no_buildid) {
 		disable_buildid_cache();
+	} else if (rec->switch_output) {
+		/*
+		 * In 'perf record --switch-output', disable buildid
+		 * generation by default to reduce data file switching
+		 * overhead. Still generate buildid if they are required
+		 * explicitly using
+		 *
+		 *  perf record --signal-trigger --no-no-buildid \
+		 *              --no-no-buildid-cache
+		 *
+		 * Following code equals to:
+		 *
+		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
+		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
+		 *         disable_buildid_cache();
+		 */
+		bool disable = true;
+
+		if (rec->no_buildid_set && !rec->no_buildid)
+			disable = false;
+		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
+			disable = false;
+		if (disable) {
+			rec->no_buildid = true;
+			rec->no_buildid_cache = true;
+			disable_buildid_cache();
+		}
+	}
 
 	if (rec->evlist->nr_entries == 0 &&
 	    perf_evlist__add_default(rec->evlist) < 0) {
@@ -1335,9 +1504,13 @@
 
 static void snapshot_sig_handler(int sig __maybe_unused)
 {
-	if (!auxtrace_snapshot_enabled)
-		return;
-	auxtrace_snapshot_enabled = 0;
-	auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
-	auxtrace_record__snapshot_started = 1;
+	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+		trigger_hit(&auxtrace_snapshot_trigger);
+		auxtrace_record__snapshot_started = 1;
+		if (auxtrace_record__snapshot_start(record.itr))
+			trigger_error(&auxtrace_snapshot_trigger);
+	}
+
+	if (trigger_is_ready(&switch_output_trigger))
+		trigger_hit(&switch_output_trigger);
 }

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 160ea23..87d40e3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c

@@ -47,7 +47,6 @@
 	struct perf_tool	tool;
 	struct perf_session	*session;
 	bool			use_tui, use_gtk, use_stdio;
-	bool			dont_use_callchains;
 	bool			show_full_info;
 	bool			show_threads;
 	bool			inverted_callchain;
@@ -235,7 +234,7 @@
 		sample_type |= PERF_SAMPLE_BRANCH_STACK;
 
 	if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
-		if (sort__has_parent) {
+		if (perf_hpp_list.parent) {
 			ui__error("Selected --sort parent, but no "
 				    "callchain data. Did you call "
 				    "'perf record' without -g?\n");
@@ -247,7 +246,7 @@
 				  "you call 'perf record' without -g?\n");
 			return -1;
 		}
-	} else if (!rep->dont_use_callchains &&
+	} else if (!callchain_param.enabled &&
 		   callchain_param.mode != CHAIN_NONE &&
 		   !symbol_conf.use_callchain) {
 			symbol_conf.use_callchain = true;
@@ -599,13 +598,15 @@
 static int
 report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-	struct report *rep = (struct report *)opt->value;
+	struct callchain_param *callchain = opt->value;
 
+	callchain->enabled = !unset;
 	/*
 	 * --no-call-graph
 	 */
 	if (unset) {
-		rep->dont_use_callchains = true;
+		symbol_conf.use_callchain = false;
+		callchain->mode = CHAIN_NONE;
 		return 0;
 	}
 
@@ -690,7 +691,7 @@
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
 		},
-		.max_stack		 = PERF_MAX_STACK_DEPTH,
+		.max_stack		 = sysctl_perf_event_max_stack,
 		.pretty_printing_style	 = "normal",
 		.socket_filter		 = -1,
 	};
@@ -734,7 +735,7 @@
 		   "regex filter to identify parent, see: '--sort parent'"),
 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
 		    "Only display entries with parent-match"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
+	OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
 			     "print_type,threshold[,print_limit],order,sort_key[,branch],value",
 			     report_callchain_help, &report_parse_callchain_opt,
 			     callchain_default_opt),
@@ -743,7 +744,7 @@
 	OPT_INTEGER(0, "max-stack", &report.max_stack,
 		    "Set the maximum stack depth when parsing the callchain, "
 		    "anything beyond the specified depth will be ignored. "
-		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
 	OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
 		    "alias for inverted call graph"),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
@@ -935,7 +936,7 @@
 			goto error;
 		}
 
-		sort__need_collapse = true;
+		perf_hpp_list.need_collapse = true;
 	}
 
 	/* Force tty output for header output and per-thread stat. */

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 871b55ae..afa0576 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c

@@ -11,6 +11,8 @@
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/cloexec.h"
+#include "util/thread_map.h"
+#include "util/color.h"
 
 #include <subcmd/parse-options.h>
 #include "util/trace-event.h"
@@ -122,6 +124,21 @@
 				  struct machine *machine);
 };
 
+#define COLOR_PIDS PERF_COLOR_BLUE
+#define COLOR_CPUS PERF_COLOR_BG_RED
+
+struct perf_sched_map {
+	DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
+	int			*comp_cpus;
+	bool			 comp;
+	struct thread_map	*color_pids;
+	const char		*color_pids_str;
+	struct cpu_map		*color_cpus;
+	const char		*color_cpus_str;
+	struct cpu_map		*cpus;
+	const char		*cpus_str;
+};
+
 struct perf_sched {
 	struct perf_tool tool;
 	const char	 *sort_order;
@@ -173,6 +190,7 @@
 	struct list_head sort_list, cmp_pid;
 	bool force;
 	bool skip_merge;
+	struct perf_sched_map map;
 };
 
 static u64 get_nsecs(void)
@@ -1339,6 +1357,38 @@
 	return 0;
 }
 
+union map_priv {
+	void	*ptr;
+	bool	 color;
+};
+
+static bool thread__has_color(struct thread *thread)
+{
+	union map_priv priv = {
+		.ptr = thread__priv(thread),
+	};
+
+	return priv.color;
+}
+
+static struct thread*
+map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
+{
+	struct thread *thread = machine__findnew_thread(machine, pid, tid);
+	union map_priv priv = {
+		.color = false,
+	};
+
+	if (!sched->map.color_pids || !thread || thread__priv(thread))
+		return thread;
+
+	if (thread_map__has(sched->map.color_pids, tid))
+		priv.color = true;
+
+	thread__set_priv(thread, priv.ptr);
+	return thread;
+}
+
 static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 			    struct perf_sample *sample, struct machine *machine)
 {
@@ -1347,13 +1397,25 @@
 	int new_shortname;
 	u64 timestamp0, timestamp = sample->time;
 	s64 delta;
-	int cpu, this_cpu = sample->cpu;
+	int i, this_cpu = sample->cpu;
+	int cpus_nr;
+	bool new_cpu = false;
+	const char *color = PERF_COLOR_NORMAL;
 
 	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
 
 	if (this_cpu > sched->max_cpu)
 		sched->max_cpu = this_cpu;
 
+	if (sched->map.comp) {
+		cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
+		if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+			sched->map.comp_cpus[cpus_nr++] = this_cpu;
+			new_cpu = true;
+		}
+	} else
+		cpus_nr = sched->max_cpu;
+
 	timestamp0 = sched->cpu_last_switched[this_cpu];
 	sched->cpu_last_switched[this_cpu] = timestamp;
 	if (timestamp0)
@@ -1366,7 +1428,7 @@
 		return -1;
 	}
 
-	sched_in = machine__findnew_thread(machine, -1, next_pid);
+	sched_in = map__findnew_thread(sched, machine, -1, next_pid);
 	if (sched_in == NULL)
 		return -1;
 
@@ -1400,26 +1462,52 @@
 		new_shortname = 1;
 	}
 
-	for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
+	for (i = 0; i < cpus_nr; i++) {
+		int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
+		struct thread *curr_thread = sched->curr_thread[cpu];
+		const char *pid_color = color;
+		const char *cpu_color = color;
+
+		if (curr_thread && thread__has_color(curr_thread))
+			pid_color = COLOR_PIDS;
+
+		if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+			continue;
+
+		if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+			cpu_color = COLOR_CPUS;
+
 		if (cpu != this_cpu)
-			printf(" ");
+			color_fprintf(stdout, cpu_color, " ");
 		else
-			printf("*");
+			color_fprintf(stdout, cpu_color, "*");
 
 		if (sched->curr_thread[cpu])
-			printf("%2s ", sched->curr_thread[cpu]->shortname);
+			color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
 		else
-			printf("   ");
+			color_fprintf(stdout, color, "   ");
 	}
 
-	printf("  %12.6f secs ", (double)timestamp/1e9);
+	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+		goto out;
+
+	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp/1e9);
 	if (new_shortname) {
-		printf("%s => %s:%d\n",
+		const char *pid_color = color;
+
+		if (thread__has_color(sched_in))
+			pid_color = COLOR_PIDS;
+
+		color_fprintf(stdout, pid_color, "%s => %s:%d",
 		       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
-	} else {
-		printf("\n");
 	}
 
+	if (sched->map.comp && new_cpu)
+		color_fprintf(stdout, color, " (CPU %d)", this_cpu);
+
+out:
+	color_fprintf(stdout, color, "\n");
+
 	thread__put(sched_in);
 
 	return 0;
@@ -1675,9 +1763,75 @@
 	return 0;
 }
 
+static int setup_map_cpus(struct perf_sched *sched)
+{
+	struct cpu_map *map;
+
+	sched->max_cpu  = sysconf(_SC_NPROCESSORS_CONF);
+
+	if (sched->map.comp) {
+		sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+		if (!sched->map.comp_cpus)
+			return -1;
+	}
+
+	if (!sched->map.cpus_str)
+		return 0;
+
+	map = cpu_map__new(sched->map.cpus_str);
+	if (!map) {
+		pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
+		return -1;
+	}
+
+	sched->map.cpus = map;
+	return 0;
+}
+
+static int setup_color_pids(struct perf_sched *sched)
+{
+	struct thread_map *map;
+
+	if (!sched->map.color_pids_str)
+		return 0;
+
+	map = thread_map__new_by_tid_str(sched->map.color_pids_str);
+	if (!map) {
+		pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
+		return -1;
+	}
+
+	sched->map.color_pids = map;
+	return 0;
+}
+
+static int setup_color_cpus(struct perf_sched *sched)
+{
+	struct cpu_map *map;
+
+	if (!sched->map.color_cpus_str)
+		return 0;
+
+	map = cpu_map__new(sched->map.color_cpus_str);
+	if (!map) {
+		pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
+		return -1;
+	}
+
+	sched->map.color_cpus = map;
+	return 0;
+}
+
 static int perf_sched__map(struct perf_sched *sched)
 {
-	sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+	if (setup_map_cpus(sched))
+		return -1;
+
+	if (setup_color_pids(sched))
+		return -1;
+
+	if (setup_color_cpus(sched))
+		return -1;
 
 	setup_pager();
 	if (perf_sched__read_events(sched))
@@ -1831,6 +1985,17 @@
 		    "dump raw trace in ASCII"),
 	OPT_END()
 	};
+	const struct option map_options[] = {
+	OPT_BOOLEAN(0, "compact", &sched.map.comp,
+		    "map output in compact mode"),
+	OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
+		   "highlight given pids in map"),
+	OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
+                    "highlight given CPUs in map"),
+	OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
+                    "display given CPUs in map"),
+	OPT_END()
+	};
 	const char * const latency_usage[] = {
 		"perf sched latency [<options>]",
 		NULL
@@ -1839,6 +2004,10 @@
 		"perf sched replay [<options>]",
 		NULL
 	};
+	const char * const map_usage[] = {
+		"perf sched map [<options>]",
+		NULL
+	};
 	const char *const sched_subcommands[] = { "record", "latency", "map",
 						  "replay", "script", NULL };
 	const char *sched_usage[] = {
@@ -1887,6 +2056,11 @@
 		setup_sorting(&sched, latency_options, latency_usage);
 		return perf_sched__lat(&sched);
 	} else if (!strcmp(argv[0], "map")) {
+		if (argc) {
+			argc = parse_options(argc, argv, map_options, map_usage, 0);
+			if (argc)
+				usage_with_options(map_usage, map_options);
+		}
 		sched.tp_handler = &map_ops;
 		setup_sorting(&sched, latency_options, latency_usage);
 		return perf_sched__map(&sched);

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 5282669..efca816 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c

@@ -22,6 +22,7 @@
 #include "util/thread_map.h"
 #include "util/stat.h"
 #include <linux/bitmap.h>
+#include <linux/stringify.h>
 #include "asm/bug.h"
 #include "util/mem-events.h"
 
@@ -317,19 +318,19 @@
 
 	output[type].print_ip_opts = 0;
 	if (PRINT_FIELD(IP))
-		output[type].print_ip_opts |= PRINT_IP_OPT_IP;
+		output[type].print_ip_opts |= EVSEL__PRINT_IP;
 
 	if (PRINT_FIELD(SYM))
-		output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
+		output[type].print_ip_opts |= EVSEL__PRINT_SYM;
 
 	if (PRINT_FIELD(DSO))
-		output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
+		output[type].print_ip_opts |= EVSEL__PRINT_DSO;
 
 	if (PRINT_FIELD(SYMOFFSET))
-		output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
+		output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
 
 	if (PRINT_FIELD(SRCLINE))
-		output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
+		output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
 }
 
 /*
@@ -569,18 +570,23 @@
 	/* print branch_from information */
 	if (PRINT_FIELD(IP)) {
 		unsigned int print_opts = output[attr->type].print_ip_opts;
+		struct callchain_cursor *cursor = NULL;
 
-		if (symbol_conf.use_callchain && sample->callchain) {
-			printf("\n");
-		} else {
-			printf(" ");
-			if (print_opts & PRINT_IP_OPT_SRCLINE) {
+		if (symbol_conf.use_callchain && sample->callchain &&
+		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+					      sample, NULL, NULL, scripting_max_stack) == 0)
+			cursor = &callchain_cursor;
+
+		if (cursor == NULL) {
+			putchar(' ');
+			if (print_opts & EVSEL__PRINT_SRCLINE) {
 				print_srcline_last = true;
-				print_opts &= ~PRINT_IP_OPT_SRCLINE;
+				print_opts &= ~EVSEL__PRINT_SRCLINE;
 			}
-		}
-		perf_evsel__print_ip(evsel, sample, al, print_opts,
-				     scripting_max_stack);
+		} else
+			putchar('\n');
+
+		sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
 	}
 
 	/* print branch_to information */
@@ -783,14 +789,15 @@
 		printf("%16" PRIu64, sample->weight);
 
 	if (PRINT_FIELD(IP)) {
-		if (!symbol_conf.use_callchain)
-			printf(" ");
-		else
-			printf("\n");
+		struct callchain_cursor *cursor = NULL;
 
-		perf_evsel__print_ip(evsel, sample, al,
-				     output[attr->type].print_ip_opts,
-				     scripting_max_stack);
+		if (symbol_conf.use_callchain && sample->callchain &&
+		    thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+					      sample, NULL, NULL, scripting_max_stack) == 0)
+			cursor = &callchain_cursor;
+
+		putchar(cursor ? '\n' : ' ');
+		sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
 	}
 
 	if (PRINT_FIELD(IREGS))
@@ -1959,6 +1966,7 @@
 			.exit		 = perf_event__process_exit,
 			.fork		 = perf_event__process_fork,
 			.attr		 = process_attr,
+			.event_update   = perf_event__process_event_update,
 			.tracing_data	 = perf_event__process_tracing_data,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
@@ -2020,6 +2028,10 @@
 		   "only consider symbols in these pids"),
 	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
 		   "only consider symbols in these tids"),
+	OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
+		     "Set the maximum stack depth when parsing the callchain, "
+		     "anything beyond the specified depth will be ignored. "
+		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
 	OPT_BOOLEAN('I', "show-info", &show_full_info,
 		    "display extended information from perf.data file"),
 	OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -2055,6 +2067,8 @@
 		NULL
 	};
 
+	scripting_max_stack = sysctl_perf_event_max_stack;
+
 	setup_scripting();
 
 	argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 307e8a1..e459b68 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c

@@ -298,6 +298,14 @@
 					return -1;
 				}
 			}
+
+			if (verbose > 1) {
+				fprintf(stat_config.output,
+					"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+						perf_evsel__name(counter),
+						cpu,
+						count->val, count->ena, count->run);
+			}
 		}
 	}
 

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8332149..1793da5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c

@@ -688,7 +688,7 @@
 	struct hist_entry *he = iter->he;
 	struct perf_evsel *evsel = iter->evsel;
 
-	if (sort__has_sym && single)
+	if (perf_hpp_list.sym && single)
 		perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
 
 	hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
@@ -886,7 +886,7 @@
 	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;
 
-	perf_evlist__config(evlist, opts);
+	perf_evlist__config(evlist, opts, &callchain_param);
 
 	evlist__for_each(evlist, counter) {
 try_again:
@@ -917,15 +917,15 @@
 	return -1;
 }
 
-static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
+static int callchain_param__setup_sample_type(struct callchain_param *callchain)
 {
-	if (!sort__has_sym) {
-		if (symbol_conf.use_callchain) {
+	if (!perf_hpp_list.sym) {
+		if (callchain->enabled) {
 			ui__error("Selected -g but \"sym\" not present in --sort/-s.");
 			return -EINVAL;
 		}
-	} else if (callchain_param.mode != CHAIN_NONE) {
-		if (callchain_register_param(&callchain_param) < 0) {
+	} else if (callchain->mode != CHAIN_NONE) {
+		if (callchain_register_param(callchain) < 0) {
 			ui__error("Can't register callchain params.\n");
 			return -EINVAL;
 		}
@@ -952,7 +952,7 @@
 			goto out_delete;
 	}
 
-	ret = perf_top__setup_sample_type(top);
+	ret = callchain_param__setup_sample_type(&callchain_param);
 	if (ret)
 		goto out_delete;
 
@@ -962,7 +962,7 @@
 	machine__synthesize_threads(&top->session->machines.host, &opts->target,
 				    top->evlist->threads, false, opts->proc_map_timeout);
 
-	if (sort__has_socket) {
+	if (perf_hpp_list.socket) {
 		ret = perf_env__read_cpu_topology_map(&perf_env);
 		if (ret < 0)
 			goto out_err_cpu_topo;
@@ -1045,18 +1045,17 @@
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
-	struct record_opts *record = (struct record_opts *)opt->value;
+	struct callchain_param *callchain = opt->value;
 
-	record->callgraph_set = true;
-	callchain_param.enabled = !unset;
-	callchain_param.record_mode = CALLCHAIN_FP;
+	callchain->enabled = !unset;
+	callchain->record_mode = CALLCHAIN_FP;
 
 	/*
 	 * --no-call-graph
 	 */
 	if (unset) {
 		symbol_conf.use_callchain = false;
-		callchain_param.record_mode = CALLCHAIN_NONE;
+		callchain->record_mode = CALLCHAIN_NONE;
 		return 0;
 	}
 
@@ -1104,7 +1103,7 @@
 			},
 			.proc_map_timeout    = 500,
 		},
-		.max_stack	     = PERF_MAX_STACK_DEPTH,
+		.max_stack	     = sysctl_perf_event_max_stack,
 		.sym_pcnt_filter     = 5,
 	};
 	struct record_opts *opts = &top.record_opts;
@@ -1162,17 +1161,17 @@
 		   "output field(s): overhead, period, sample plus all of sort keys"),
 	OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
 		    "Show a column with the number of samples"),
-	OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
+	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
 			   NULL, "enables call-graph recording and display",
 			   &callchain_opt),
-	OPT_CALLBACK(0, "call-graph", &top.record_opts,
+	OPT_CALLBACK(0, "call-graph", &callchain_param,
 		     "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
 		     top_callchain_help, &parse_callchain_opt),
 	OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
 		    "Accumulate callchains of children and show total overhead as well"),
 	OPT_INTEGER(0, "max-stack", &top.max_stack,
 		    "Set the maximum stack depth when parsing the callchain. "
-		    "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+		    "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
 	OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
 		   "ignore callees of these functions in call graphs",
 		   report_parse_ignore_callees_opt),
@@ -1256,7 +1255,7 @@
 
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
-	sort__need_collapse = 1;
+	perf_hpp_list.need_collapse = 1;
 
 	if (top.use_stdio)
 		use_browser = 0;
@@ -1312,7 +1311,7 @@
 
 	top.sym_evsel = perf_evlist__first(top.evlist);
 
-	if (!symbol_conf.use_callchain) {
+	if (!callchain_param.enabled) {
 		symbol_conf.cumulate_callchain = false;
 		perf_hpp__cancel_cumulate();
 	}

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 93ac724..6e5c325 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c

@@ -34,79 +34,76 @@
 #include "trace-event.h"
 #include "util/parse-events.h"
 #include "util/bpf-loader.h"
+#include "callchain.h"
+#include "syscalltbl.h"
+#include "rb_resort.h"
 
-#include <libaudit.h>
+#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
 #include <stdlib.h>
-#include <sys/mman.h>
-#include <linux/futex.h>
 #include <linux/err.h>
-
-/* For older distros: */
-#ifndef MAP_STACK
-# define MAP_STACK		0x20000
-#endif
-
-#ifndef MADV_HWPOISON
-# define MADV_HWPOISON		100
-
-#endif
-
-#ifndef MADV_MERGEABLE
-# define MADV_MERGEABLE		12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-# define MADV_UNMERGEABLE	13
-#endif
-
-#ifndef EFD_SEMAPHORE
-# define EFD_SEMAPHORE		1
-#endif
-
-#ifndef EFD_NONBLOCK
-# define EFD_NONBLOCK		00004000
-#endif
-
-#ifndef EFD_CLOEXEC
-# define EFD_CLOEXEC		02000000
-#endif
+#include <linux/filter.h>
+#include <linux/audit.h>
+#include <sys/ptrace.h>
+#include <linux/random.h>
+#include <linux/stringify.h>
 
 #ifndef O_CLOEXEC
 # define O_CLOEXEC		02000000
 #endif
 
-#ifndef SOCK_DCCP
-# define SOCK_DCCP		6
-#endif
-
-#ifndef SOCK_CLOEXEC
-# define SOCK_CLOEXEC		02000000
-#endif
-
-#ifndef SOCK_NONBLOCK
-# define SOCK_NONBLOCK		00004000
-#endif
-
-#ifndef MSG_CMSG_CLOEXEC
-# define MSG_CMSG_CLOEXEC	0x40000000
-#endif
-
-#ifndef PERF_FLAG_FD_NO_GROUP
-# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
-#endif
-
-#ifndef PERF_FLAG_FD_OUTPUT
-# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
-#endif
-
-#ifndef PERF_FLAG_PID_CGROUP
-# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#endif
-
-#ifndef PERF_FLAG_FD_CLOEXEC
-# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
-#endif
-
+struct trace {
+	struct perf_tool	tool;
+	struct syscalltbl	*sctbl;
+	struct {
+		int		max;
+		struct syscall  *table;
+		struct {
+			struct perf_evsel *sys_enter,
+					  *sys_exit;
+		}		events;
+	} syscalls;
+	struct record_opts	opts;
+	struct perf_evlist	*evlist;
+	struct machine		*host;
+	struct thread		*current;
+	u64			base_time;
+	FILE			*output;
+	unsigned long		nr_events;
+	struct strlist		*ev_qualifier;
+	struct {
+		size_t		nr;
+		int		*entries;
+	}			ev_qualifier_ids;
+	struct intlist		*tid_list;
+	struct intlist		*pid_list;
+	struct {
+		size_t		nr;
+		pid_t		*entries;
+	}			filter_pids;
+	double			duration_filter;
+	double			runtime_ms;
+	struct {
+		u64		vfs_getname,
+				proc_getname;
+	} stats;
+	unsigned int		max_stack;
+	unsigned int		min_stack;
+	bool			not_ev_qualifier;
+	bool			live;
+	bool			full_time;
+	bool			sched;
+	bool			multiple_threads;
+	bool			summary;
+	bool			summary_only;
+	bool			show_comm;
+	bool			show_tool_stats;
+	bool			trace_syscalls;
+	bool			kernel_syscallchains;
+	bool			force;
+	bool			vfs_getname;
+	int			trace_pgfaults;
+	int			open_id;
+};
 
 struct tp_field {
 	int offset;
@@ -371,221 +368,6 @@
 
 #define SCA_INT syscall_arg__scnprintf_int
 
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
-					       struct syscall_arg *arg)
-{
-	int printed = 0, prot = arg->val;
-
-	if (prot == PROT_NONE)
-		return scnprintf(bf, size, "NONE");
-#define	P_MMAP_PROT(n) \
-	if (prot & PROT_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		prot &= ~PROT_##n; \
-	}
-
-	P_MMAP_PROT(EXEC);
-	P_MMAP_PROT(READ);
-	P_MMAP_PROT(WRITE);
-#ifdef PROT_SEM
-	P_MMAP_PROT(SEM);
-#endif
-	P_MMAP_PROT(GROWSDOWN);
-	P_MMAP_PROT(GROWSUP);
-#undef P_MMAP_PROT
-
-	if (prot)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
-
-	return printed;
-}
-
-#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
-
-static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
-						struct syscall_arg *arg)
-{
-	int printed = 0, flags = arg->val;
-
-#define	P_MMAP_FLAG(n) \
-	if (flags & MAP_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~MAP_##n; \
-	}
-
-	P_MMAP_FLAG(SHARED);
-	P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-	P_MMAP_FLAG(32BIT);
-#endif
-	P_MMAP_FLAG(ANONYMOUS);
-	P_MMAP_FLAG(DENYWRITE);
-	P_MMAP_FLAG(EXECUTABLE);
-	P_MMAP_FLAG(FILE);
-	P_MMAP_FLAG(FIXED);
-	P_MMAP_FLAG(GROWSDOWN);
-#ifdef MAP_HUGETLB
-	P_MMAP_FLAG(HUGETLB);
-#endif
-	P_MMAP_FLAG(LOCKED);
-	P_MMAP_FLAG(NONBLOCK);
-	P_MMAP_FLAG(NORESERVE);
-	P_MMAP_FLAG(POPULATE);
-	P_MMAP_FLAG(STACK);
-#ifdef MAP_UNINITIALIZED
-	P_MMAP_FLAG(UNINITIALIZED);
-#endif
-#undef P_MMAP_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
-}
-
-#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-
-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
-						  struct syscall_arg *arg)
-{
-	int printed = 0, flags = arg->val;
-
-#define P_MREMAP_FLAG(n) \
-	if (flags & MREMAP_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~MREMAP_##n; \
-	}
-
-	P_MREMAP_FLAG(MAYMOVE);
-#ifdef MREMAP_FIXED
-	P_MREMAP_FLAG(FIXED);
-#endif
-#undef P_MREMAP_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
-}
-
-#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
-
-static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
-						      struct syscall_arg *arg)
-{
-	int behavior = arg->val;
-
-	switch (behavior) {
-#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
-	P_MADV_BHV(NORMAL);
-	P_MADV_BHV(RANDOM);
-	P_MADV_BHV(SEQUENTIAL);
-	P_MADV_BHV(WILLNEED);
-	P_MADV_BHV(DONTNEED);
-	P_MADV_BHV(REMOVE);
-	P_MADV_BHV(DONTFORK);
-	P_MADV_BHV(DOFORK);
-	P_MADV_BHV(HWPOISON);
-#ifdef MADV_SOFT_OFFLINE
-	P_MADV_BHV(SOFT_OFFLINE);
-#endif
-	P_MADV_BHV(MERGEABLE);
-	P_MADV_BHV(UNMERGEABLE);
-#ifdef MADV_HUGEPAGE
-	P_MADV_BHV(HUGEPAGE);
-#endif
-#ifdef MADV_NOHUGEPAGE
-	P_MADV_BHV(NOHUGEPAGE);
-#endif
-#ifdef MADV_DONTDUMP
-	P_MADV_BHV(DONTDUMP);
-#endif
-#ifdef MADV_DODUMP
-	P_MADV_BHV(DODUMP);
-#endif
-#undef P_MADV_PHV
-	default: break;
-	}
-
-	return scnprintf(bf, size, "%#x", behavior);
-}
-
-#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
-
-static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
-					   struct syscall_arg *arg)
-{
-	int printed = 0, op = arg->val;
-
-	if (op == 0)
-		return scnprintf(bf, size, "NONE");
-#define	P_CMD(cmd) \
-	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
-		op &= ~LOCK_##cmd; \
-	}
-
-	P_CMD(SH);
-	P_CMD(EX);
-	P_CMD(NB);
-	P_CMD(UN);
-	P_CMD(MAND);
-	P_CMD(RW);
-	P_CMD(READ);
-	P_CMD(WRITE);
-#undef P_OP
-
-	if (op)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
-
-	return printed;
-}
-
-#define SCA_FLOCK syscall_arg__scnprintf_flock
-
-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
-{
-	enum syscall_futex_args {
-		SCF_UADDR   = (1 << 0),
-		SCF_OP	    = (1 << 1),
-		SCF_VAL	    = (1 << 2),
-		SCF_TIMEOUT = (1 << 3),
-		SCF_UADDR2  = (1 << 4),
-		SCF_VAL3    = (1 << 5),
-	};
-	int op = arg->val;
-	int cmd = op & FUTEX_CMD_MASK;
-	size_t printed = 0;
-
-	switch (cmd) {
-#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
-	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
-	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
-	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
-	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
-	P_FUTEX_OP(WAKE_OP);							  break;
-	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
-	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
-	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
-	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
-	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
-	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
-	}
-
-	if (op & FUTEX_PRIVATE_FLAG)
-		printed += scnprintf(bf + printed, size - printed, "|PRIV");
-
-	if (op & FUTEX_CLOCK_REALTIME)
-		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
-
-	return printed;
-}
-
-#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op
-
 static const char *bpf_cmd[] = {
 	"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
 	"MAP_GET_NEXT_KEY", "PROG_LOAD",
@@ -652,110 +434,6 @@
 };
 static DEFINE_STRARRAY(socket_families);
 
-#ifndef SOCK_TYPE_MASK
-#define SOCK_TYPE_MASK 0xf
-#endif
-
-static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
-						      struct syscall_arg *arg)
-{
-	size_t printed;
-	int type = arg->val,
-	    flags = type & ~SOCK_TYPE_MASK;
-
-	type &= SOCK_TYPE_MASK;
-	/*
- 	 * Can't use a strarray, MIPS may override for ABI reasons.
- 	 */
-	switch (type) {
-#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
-	P_SK_TYPE(STREAM);
-	P_SK_TYPE(DGRAM);
-	P_SK_TYPE(RAW);
-	P_SK_TYPE(RDM);
-	P_SK_TYPE(SEQPACKET);
-	P_SK_TYPE(DCCP);
-	P_SK_TYPE(PACKET);
-#undef P_SK_TYPE
-	default:
-		printed = scnprintf(bf, size, "%#x", type);
-	}
-
-#define	P_SK_FLAG(n) \
-	if (flags & SOCK_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
-		flags &= ~SOCK_##n; \
-	}
-
-	P_SK_FLAG(CLOEXEC);
-	P_SK_FLAG(NONBLOCK);
-#undef P_SK_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
-
-	return printed;
-}
-
-#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
-
-#ifndef MSG_PROBE
-#define MSG_PROBE	     0x10
-#endif
-#ifndef MSG_WAITFORONE
-#define MSG_WAITFORONE	0x10000
-#endif
-#ifndef MSG_SENDPAGE_NOTLAST
-#define MSG_SENDPAGE_NOTLAST 0x20000
-#endif
-#ifndef MSG_FASTOPEN
-#define MSG_FASTOPEN	     0x20000000
-#endif
-
-static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
-					       struct syscall_arg *arg)
-{
-	int printed = 0, flags = arg->val;
-
-	if (flags == 0)
-		return scnprintf(bf, size, "NONE");
-#define	P_MSG_FLAG(n) \
-	if (flags & MSG_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~MSG_##n; \
-	}
-
-	P_MSG_FLAG(OOB);
-	P_MSG_FLAG(PEEK);
-	P_MSG_FLAG(DONTROUTE);
-	P_MSG_FLAG(TRYHARD);
-	P_MSG_FLAG(CTRUNC);
-	P_MSG_FLAG(PROBE);
-	P_MSG_FLAG(TRUNC);
-	P_MSG_FLAG(DONTWAIT);
-	P_MSG_FLAG(EOR);
-	P_MSG_FLAG(WAITALL);
-	P_MSG_FLAG(FIN);
-	P_MSG_FLAG(SYN);
-	P_MSG_FLAG(CONFIRM);
-	P_MSG_FLAG(RST);
-	P_MSG_FLAG(ERRQUEUE);
-	P_MSG_FLAG(NOSIGNAL);
-	P_MSG_FLAG(MORE);
-	P_MSG_FLAG(WAITFORONE);
-	P_MSG_FLAG(SENDPAGE_NOTLAST);
-	P_MSG_FLAG(FASTOPEN);
-	P_MSG_FLAG(CMSG_CLOEXEC);
-#undef P_MSG_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
-}
-
-#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
-
 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 						 struct syscall_arg *arg)
 {
@@ -788,116 +466,6 @@
 
 #define SCA_FILENAME syscall_arg__scnprintf_filename
 
-static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
-					       struct syscall_arg *arg)
-{
-	int printed = 0, flags = arg->val;
-
-	if (!(flags & O_CREAT))
-		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
-
-	if (flags == 0)
-		return scnprintf(bf, size, "RDONLY");
-#define	P_FLAG(n) \
-	if (flags & O_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~O_##n; \
-	}
-
-	P_FLAG(APPEND);
-	P_FLAG(ASYNC);
-	P_FLAG(CLOEXEC);
-	P_FLAG(CREAT);
-	P_FLAG(DIRECT);
-	P_FLAG(DIRECTORY);
-	P_FLAG(EXCL);
-	P_FLAG(LARGEFILE);
-	P_FLAG(NOATIME);
-	P_FLAG(NOCTTY);
-#ifdef O_NONBLOCK
-	P_FLAG(NONBLOCK);
-#elif O_NDELAY
-	P_FLAG(NDELAY);
-#endif
-#ifdef O_PATH
-	P_FLAG(PATH);
-#endif
-	P_FLAG(RDWR);
-#ifdef O_DSYNC
-	if ((flags & O_SYNC) == O_SYNC)
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
-	else {
-		P_FLAG(DSYNC);
-	}
-#else
-	P_FLAG(SYNC);
-#endif
-	P_FLAG(TRUNC);
-	P_FLAG(WRONLY);
-#undef P_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
-}
-
-#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
-
-static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
-						struct syscall_arg *arg)
-{
-	int printed = 0, flags = arg->val;
-
-	if (flags == 0)
-		return 0;
-
-#define	P_FLAG(n) \
-	if (flags & PERF_FLAG_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~PERF_FLAG_##n; \
-	}
-
-	P_FLAG(FD_NO_GROUP);
-	P_FLAG(FD_OUTPUT);
-	P_FLAG(PID_CGROUP);
-	P_FLAG(FD_CLOEXEC);
-#undef P_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
-}
-
-#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
-
-static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
-						   struct syscall_arg *arg)
-{
-	int printed = 0, flags = arg->val;
-
-	if (flags == 0)
-		return scnprintf(bf, size, "NONE");
-#define	P_FLAG(n) \
-	if (flags & EFD_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~EFD_##n; \
-	}
-
-	P_FLAG(SEMAPHORE);
-	P_FLAG(CLOEXEC);
-	P_FLAG(NONBLOCK);
-#undef P_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
-}
-
-#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
-
 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
@@ -921,59 +489,6 @@
 
 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
 
-static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
-{
-	int sig = arg->val;
-
-	switch (sig) {
-#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
-	P_SIGNUM(HUP);
-	P_SIGNUM(INT);
-	P_SIGNUM(QUIT);
-	P_SIGNUM(ILL);
-	P_SIGNUM(TRAP);
-	P_SIGNUM(ABRT);
-	P_SIGNUM(BUS);
-	P_SIGNUM(FPE);
-	P_SIGNUM(KILL);
-	P_SIGNUM(USR1);
-	P_SIGNUM(SEGV);
-	P_SIGNUM(USR2);
-	P_SIGNUM(PIPE);
-	P_SIGNUM(ALRM);
-	P_SIGNUM(TERM);
-	P_SIGNUM(CHLD);
-	P_SIGNUM(CONT);
-	P_SIGNUM(STOP);
-	P_SIGNUM(TSTP);
-	P_SIGNUM(TTIN);
-	P_SIGNUM(TTOU);
-	P_SIGNUM(URG);
-	P_SIGNUM(XCPU);
-	P_SIGNUM(XFSZ);
-	P_SIGNUM(VTALRM);
-	P_SIGNUM(PROF);
-	P_SIGNUM(WINCH);
-	P_SIGNUM(IO);
-	P_SIGNUM(PWR);
-	P_SIGNUM(SYS);
-#ifdef SIGEMT
-	P_SIGNUM(EMT);
-#endif
-#ifdef SIGSTKFLT
-	P_SIGNUM(STKFLT);
-#endif
-#ifdef SIGSWI
-	P_SIGNUM(SWI);
-#endif
-	default: break;
-	}
-
-	return scnprintf(bf, size, "%#x", sig);
-}
-
-#define SCA_SIGNUM syscall_arg__scnprintf_signum
-
 #if defined(__i386__) || defined(__x86_64__)
 /*
  * FIXME: Make this available to all arches.
@@ -1001,16 +516,62 @@
 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
 #endif /* defined(__i386__) || defined(__x86_64__) */
 
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK	0x0001
+#endif
+#ifndef GRND_RANDOM
+#define GRND_RANDOM	0x0002
+#endif
+
+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
+						   struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & GRND_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~GRND_##n; \
+	}
+
+	P_FLAG(RANDOM);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+
 #define STRARRAY(arg, name, array) \
 	  .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
 	  .arg_parm	 = { [arg] = &strarray__##array, }
 
+#include "trace/beauty/eventfd.c"
+#include "trace/beauty/flock.c"
+#include "trace/beauty/futex_op.c"
+#include "trace/beauty/mmap.c"
+#include "trace/beauty/mode_t.c"
+#include "trace/beauty/msg_flags.c"
+#include "trace/beauty/open_flags.c"
+#include "trace/beauty/perf_event_open.c"
+#include "trace/beauty/pid.c"
+#include "trace/beauty/sched_policy.c"
+#include "trace/beauty/seccomp.c"
+#include "trace/beauty/signum.c"
+#include "trace/beauty/socket_type.c"
+#include "trace/beauty/waitid_options.c"
+
 static struct syscall_fmt {
 	const char *name;
 	const char *alias;
 	size_t	   (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
 	void	   *arg_parm[6];
 	bool	   errmsg;
+	bool	   errpid;
 	bool	   timeout;
 	bool	   hexret;
 } syscall_fmts[] = {
@@ -1028,6 +589,7 @@
 	{ .name	    = "chroot",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
 	{ .name     = "clock_gettime",  .errmsg = true, STRARRAY(0, clk_id, clockid), },
+	{ .name	    = "clone",	    .errpid = true, },
 	{ .name	    = "close",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
 	{ .name	    = "connect",    .errmsg = true, },
@@ -1093,6 +655,11 @@
 	{ .name	    = "getdents64", .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 	{ .name	    = "getitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+	{ .name	    = "getpid",	    .errpid = true, },
+	{ .name	    = "getpgid",    .errpid = true, },
+	{ .name	    = "getppid",    .errpid = true, },
+	{ .name	    = "getrandom",  .errmsg = true,
+	  .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
 	{ .name	    = "getrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 	{ .name	    = "getxattr",    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
@@ -1186,8 +753,7 @@
 			     [1] = SCA_FILENAME, /* filename */
 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
 	{ .name	    = "perf_event_open", .errmsg = true,
-	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
-			     [2] = SCA_INT, /* cpu */
+	  .arg_scnprintf = { [2] = SCA_INT, /* cpu */
 			     [3] = SCA_FD,  /* group_fd */
 			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
 	{ .name	    = "pipe2",	    .errmsg = true,
@@ -1234,6 +800,11 @@
 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "sched_setscheduler",   .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
+	{ .name	    = "seccomp", .errmsg = true,
+	  .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
+			     [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
 	{ .name	    = "select",	    .errmsg = true, .timeout = true, },
 	{ .name	    = "sendmmsg",    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
@@ -1244,7 +815,9 @@
 	{ .name	    = "sendto",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */
 			     [3] = SCA_MSG_FLAGS, /* flags */ }, },
+	{ .name	    = "set_tid_address", .errpid = true, },
 	{ .name	    = "setitimer",  .errmsg = true, STRARRAY(0, which, itimers), },
+	{ .name	    = "setpgid",    .errmsg = true, },
 	{ .name	    = "setrlimit",  .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
 	{ .name	    = "setxattr",   .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
@@ -1287,6 +860,10 @@
 	  .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
 	{ .name	    = "vmsplice",  .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+	{ .name	    = "wait4",	    .errpid = true,
+	  .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
+	{ .name	    = "waitid",	    .errpid = true,
+	  .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
 	{ .name	    = "write",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
 	{ .name	    = "writev",	    .errmsg = true,
@@ -1398,59 +975,6 @@
 
 static const size_t trace__entry_str_size = 2048;
 
-struct trace {
-	struct perf_tool	tool;
-	struct {
-		int		machine;
-		int		open_id;
-	}			audit;
-	struct {
-		int		max;
-		struct syscall  *table;
-		struct {
-			struct perf_evsel *sys_enter,
-					  *sys_exit;
-		}		events;
-	} syscalls;
-	struct record_opts	opts;
-	struct perf_evlist	*evlist;
-	struct machine		*host;
-	struct thread		*current;
-	u64			base_time;
-	FILE			*output;
-	unsigned long		nr_events;
-	struct strlist		*ev_qualifier;
-	struct {
-		size_t		nr;
-		int		*entries;
-	}			ev_qualifier_ids;
-	struct intlist		*tid_list;
-	struct intlist		*pid_list;
-	struct {
-		size_t		nr;
-		pid_t		*entries;
-	}			filter_pids;
-	double			duration_filter;
-	double			runtime_ms;
-	struct {
-		u64		vfs_getname,
-				proc_getname;
-	} stats;
-	bool			not_ev_qualifier;
-	bool			live;
-	bool			full_time;
-	bool			sched;
-	bool			multiple_threads;
-	bool			summary;
-	bool			summary_only;
-	bool			show_comm;
-	bool			show_tool_stats;
-	bool			trace_syscalls;
-	bool			force;
-	bool			vfs_getname;
-	int			trace_pgfaults;
-};
-
 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
 {
 	struct thread_trace *ttrace = thread__priv(thread);
@@ -1618,6 +1142,7 @@
 		color_fprintf(trace->output, PERF_COLOR_RED,
 			      "LOST %" PRIu64 " events!\n", event->lost.lost);
 		ret = machine__process_lost_event(machine, event, sample);
+		break;
 	default:
 		ret = machine__process_event(machine, event, sample);
 		break;
@@ -1675,6 +1200,10 @@
 			sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
 		else if (field->flags & FIELD_IS_POINTER)
 			sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
+		else if (strcmp(field->type, "pid_t") == 0)
+			sc->arg_scnprintf[idx] = SCA_PID;
+		else if (strcmp(field->type, "umode_t") == 0)
+			sc->arg_scnprintf[idx] = SCA_MODE_T;
 		++idx;
 	}
 
@@ -1685,7 +1214,7 @@
 {
 	char tp_name[128];
 	struct syscall *sc;
-	const char *name = audit_syscall_to_name(id, trace->audit.machine);
+	const char *name = syscalltbl__name(trace->sctbl, id);
 
 	if (name == NULL)
 		return -1;
@@ -1760,7 +1289,7 @@
 
 	strlist__for_each(pos, trace->ev_qualifier) {
 		const char *sc = pos->s;
-		int id = audit_name_to_syscall(sc, trace->audit.machine);
+		int id = syscalltbl__id(trace->sctbl, sc);
 
 		if (id < 0) {
 			if (err == 0) {
@@ -1846,7 +1375,12 @@
 						     "%ld", val);
 			}
 		}
-	} else {
+	} else if (IS_ERR(sc->tp_format)) {
+		/*
+		 * If we managed to read the tracepoint /format file, then we
+		 * may end up not having any args, like with gettid(), so only
+		 * print the raw args when we didn't manage to read it.
+		 */
 		int i = 0;
 
 		while (i < 6) {
@@ -1987,7 +1521,7 @@
 			goto out_put;
 	}
 
-	if (!trace->summary_only)
+	if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
 		trace__printf_interrupted_entry(trace, sample);
 
 	ttrace->entry_time = sample->time;
@@ -1998,7 +1532,7 @@
 					   args, trace, thread);
 
 	if (sc->is_exit) {
-		if (!trace->duration_filter && !trace->summary_only) {
+		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
 			trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
 			fprintf(trace->output, "%-70s\n", ttrace->entry_str);
 		}
@@ -2018,6 +1552,29 @@
 	return err;
 }
 
+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
+				    struct perf_sample *sample,
+				    struct callchain_cursor *cursor)
+{
+	struct addr_location al;
+
+	if (machine__resolve(trace->host, &al, sample) < 0 ||
+	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
+		return -1;
+
+	return 0;
+}
+
+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
+{
+	/* TODO: user-configurable print_opts */
+	const unsigned int print_opts = EVSEL__PRINT_SYM |
+				        EVSEL__PRINT_DSO |
+				        EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+	return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+}
+
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 			   union perf_event *event __maybe_unused,
 			   struct perf_sample *sample)
@@ -2025,7 +1582,7 @@
 	long ret;
 	u64 duration = 0;
 	struct thread *thread;
-	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
@@ -2042,7 +1599,7 @@
 
 	ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
 
-	if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
+	if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
 		trace__set_fd_pathname(thread, ret, ttrace->filename.name);
 		ttrace->filename.pending_open = false;
 		++trace->stats.vfs_getname;
@@ -2057,6 +1614,15 @@
 	} else if (trace->duration_filter)
 		goto out;
 
+	if (sample->callchain) {
+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out;
+			callchain_ret = 1;
+		}
+	}
+
 	if (trace->summary_only)
 		goto out;
 
@@ -2073,7 +1639,7 @@
 	if (sc->fmt == NULL) {
 signed_print:
 		fprintf(trace->output, ") = %ld", ret);
-	} else if (ret < 0 && sc->fmt->errmsg) {
+	} else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
 		char bf[STRERR_BUFSIZE];
 		const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
 			   *e = audit_errno_to_name(-ret);
@@ -2083,10 +1649,24 @@
 		fprintf(trace->output, ") = 0 Timeout");
 	else if (sc->fmt->hexret)
 		fprintf(trace->output, ") = %#lx", ret);
-	else
+	else if (sc->fmt->errpid) {
+		struct thread *child = machine__find_thread(trace->host, ret, ret);
+
+		if (child != NULL) {
+			fprintf(trace->output, ") = %ld", ret);
+			if (child->comm_set)
+				fprintf(trace->output, " (%s)", thread__comm_str(child));
+			thread__put(child);
+		}
+	} else
 		goto signed_print;
 
 	fputc('\n', trace->output);
+
+	if (callchain_ret > 0)
+		trace__fprintf_callchain(trace, sample);
+	else if (callchain_ret < 0)
+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
 	ttrace->entry_pending = false;
 	err = 0;
@@ -2217,6 +1797,17 @@
 				union perf_event *event __maybe_unused,
 				struct perf_sample *sample)
 {
+	int callchain_ret = 0;
+
+	if (sample->callchain) {
+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out;
+			callchain_ret = 1;
+		}
+	}
+
 	trace__printf_interrupted_entry(trace, sample);
 	trace__fprintf_tstamp(trace, sample->time, trace->output);
 
@@ -2234,6 +1825,12 @@
 	}
 
 	fprintf(trace->output, ")\n");
+
+	if (callchain_ret > 0)
+		trace__fprintf_callchain(trace, sample);
+	else if (callchain_ret < 0)
+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
 	return 0;
 }
 
@@ -2264,8 +1861,19 @@
 	char map_type = 'd';
 	struct thread_trace *ttrace;
 	int err = -1;
+	int callchain_ret = 0;
 
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+	if (sample->callchain) {
+		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out_put;
+			callchain_ret = 1;
+		}
+	}
+
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
 		goto out_put;
@@ -2307,6 +1915,11 @@
 	print_location(trace->output, sample, &al, true, false);
 
 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+	if (callchain_ret > 0)
+		trace__fprintf_callchain(trace, sample);
+	else if (callchain_ret < 0)
+		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
 out:
 	err = 0;
 out_put:
@@ -2326,6 +1939,23 @@
 	return false;
 }
 
+static void trace__set_base_time(struct trace *trace,
+				 struct perf_evsel *evsel,
+				 struct perf_sample *sample)
+{
+	/*
+	 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
+	 * and don't use sample->time unconditionally, we may end up having
+	 * some other event in the future without PERF_SAMPLE_TIME for good
+	 * reason, i.e. we may not be interested in its timestamps, just in
+	 * it taking place, picking some piece of information when it
+	 * appears in our event stream (vfs_getname comes to mind).
+	 */
+	if (trace->base_time == 0 && !trace->full_time &&
+	    (evsel->attr.sample_type & PERF_SAMPLE_TIME))
+		trace->base_time = sample->time;
+}
+
 static int trace__process_sample(struct perf_tool *tool,
 				 union perf_event *event,
 				 struct perf_sample *sample,
@@ -2340,8 +1970,7 @@
 	if (skip_sample(trace, sample))
 		return 0;
 
-	if (!trace->full_time && trace->base_time == 0)
-		trace->base_time = sample->time;
+	trace__set_base_time(trace, evsel, sample);
 
 	if (handler) {
 		++trace->nr_events;
@@ -2450,8 +2079,7 @@
 	return true;
 }
 
-static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
-				    u64 config)
+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
 {
 	struct perf_evsel *evsel;
 	struct perf_event_attr attr = {
@@ -2465,13 +2093,10 @@
 	event_attr_init(&attr);
 
 	evsel = perf_evsel__new(&attr);
-	if (!evsel)
-		return -ENOMEM;
+	if (evsel)
+		evsel->handler = trace__pgfault;
 
-	evsel->handler = trace__pgfault;
-	perf_evlist__add(evlist, evsel);
-
-	return 0;
+	return evsel;
 }
 
 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
@@ -2479,9 +2104,6 @@
 	const u32 type = event->header.type;
 	struct perf_evsel *evsel;
 
-	if (!trace->full_time && trace->base_time == 0)
-		trace->base_time = sample->time;
-
 	if (type != PERF_RECORD_SAMPLE) {
 		trace__process_event(trace, trace->host, event, sample);
 		return;
@@ -2493,6 +2115,8 @@
 		return;
 	}
 
+	trace__set_base_time(trace, evsel, sample);
+
 	if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
 	    sample->raw_data == NULL) {
 		fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
@@ -2527,6 +2151,15 @@
 	perf_evlist__add(evlist, sys_enter);
 	perf_evlist__add(evlist, sys_exit);
 
+	if (callchain_param.enabled && !trace->kernel_syscallchains) {
+		/*
+		 * We're interested only in the user space callchain
+		 * leading to the syscall, allow overriding that for
+		 * debugging reasons using --kernel_syscall_callchains
+		 */
+		sys_exit->attr.exclude_callchain_kernel = 1;
+	}
+
 	trace->syscalls.events.sys_enter = sys_enter;
 	trace->syscalls.events.sys_exit  = sys_exit;
 
@@ -2565,7 +2198,7 @@
 static int trace__run(struct trace *trace, int argc, const char **argv)
 {
 	struct perf_evlist *evlist = trace->evlist;
-	struct perf_evsel *evsel;
+	struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
 	int err = -1, i;
 	unsigned long before;
 	const bool forks = argc > 0;
@@ -2579,14 +2212,19 @@
 	if (trace->trace_syscalls)
 		trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
 
-	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
-	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
-		goto out_error_mem;
+	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
+		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+		if (pgfault_maj == NULL)
+			goto out_error_mem;
+		perf_evlist__add(evlist, pgfault_maj);
 	}
 
-	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
-	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
-		goto out_error_mem;
+	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
+		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+		if (pgfault_min == NULL)
+			goto out_error_mem;
+		perf_evlist__add(evlist, pgfault_min);
+	}
 
 	if (trace->sched &&
 	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
@@ -2605,7 +2243,45 @@
 		goto out_delete_evlist;
 	}
 
-	perf_evlist__config(evlist, &trace->opts);
+	perf_evlist__config(evlist, &trace->opts, NULL);
+
+	if (callchain_param.enabled) {
+		bool use_identifier = false;
+
+		if (trace->syscalls.events.sys_exit) {
+			perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
+						     &trace->opts, &callchain_param);
+			use_identifier = true;
+		}
+
+		if (pgfault_maj) {
+			perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+			use_identifier = true;
+		}
+
+		if (pgfault_min) {
+			perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+			use_identifier = true;
+		}
+
+		if (use_identifier) {
+		       /*
+			* Now we have evsels with different sample_ids, use
+			* PERF_SAMPLE_IDENTIFIER to map from sample to evsel
+			* from a fixed position in each ring buffer record.
+			*
+			* As of this the changeset introducing this comment, this
+			* isn't strictly needed, as the fields that can come before
+			* PERF_SAMPLE_ID are all used, but we'll probably disable
+			* some of those for things like copying the payload of
+			* pointer syscall arguments, and for vfs_getname we don't
+			* need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
+			* here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
+			*/
+			perf_evlist__set_sample_bit(evlist, IDENTIFIER);
+			perf_evlist__reset_sample_bit(evlist, ID);
+		}
+	}
 
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
@@ -2883,15 +2559,29 @@
 	return printed;
 }
 
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+	struct stats 	*stats;
+	double		msecs;
+	int		syscall;
+)
+{
+	struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+	struct stats *stats = source->priv;
+
+	entry->syscall = source->i;
+	entry->stats   = stats;
+	entry->msecs   = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
 static size_t thread__dump_stats(struct thread_trace *ttrace,
 				 struct trace *trace, FILE *fp)
 {
-	struct stats *stats;
 	size_t printed = 0;
 	struct syscall *sc;
-	struct int_node *inode = intlist__first(ttrace->syscall_stats);
+	struct rb_node *nd;
+	DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
 
-	if (inode == NULL)
+	if (syscall_stats == NULL)
 		return 0;
 
 	printed += fprintf(fp, "\n");
@@ -2900,9 +2590,8 @@
 	printed += fprintf(fp, "                               (msec)    (msec)    (msec)    (msec)        (%%)\n");
 	printed += fprintf(fp, "   --------------- -------- --------- --------- --------- ---------     ------\n");
 
-	/* each int_node is a syscall */
-	while (inode) {
-		stats = inode->priv;
+	resort_rb__for_each(nd, syscall_stats) {
+		struct stats *stats = syscall_stats_entry->stats;
 		if (stats) {
 			double min = (double)(stats->min) / NSEC_PER_MSEC;
 			double max = (double)(stats->max) / NSEC_PER_MSEC;
@@ -2913,34 +2602,23 @@
 			pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
 			avg /= NSEC_PER_MSEC;
 
-			sc = &trace->syscalls.table[inode->i];
+			sc = &trace->syscalls.table[syscall_stats_entry->syscall];
 			printed += fprintf(fp, "   %-15s", sc->name);
 			printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
-					   n, avg * n, min, avg);
+					   n, syscall_stats_entry->msecs, min, avg);
 			printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
 		}
-
-		inode = intlist__next(inode);
 	}
 
+	resort_rb__delete(syscall_stats);
 	printed += fprintf(fp, "\n\n");
 
 	return printed;
 }
 
-/* struct used to pass data to per-thread function */
-struct summary_data {
-	FILE *fp;
-	struct trace *trace;
-	size_t printed;
-};
-
-static int trace__fprintf_one_thread(struct thread *thread, void *priv)
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
 {
-	struct summary_data *data = priv;
-	FILE *fp = data->fp;
-	size_t printed = data->printed;
-	struct trace *trace = data->trace;
+	size_t printed = 0;
 	struct thread_trace *ttrace = thread__priv(thread);
 	double ratio;
 
@@ -2956,25 +2634,45 @@
 		printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
 	if (ttrace->pfmin)
 		printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
-	printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+	if (trace->sched)
+		printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+	else if (fputc('\n', fp) != EOF)
+		++printed;
+
 	printed += thread__dump_stats(ttrace, trace, fp);
 
-	data->printed += printed;
+	return printed;
+}
 
-	return 0;
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+	return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+	struct thread *thread;
+)
+{
+	entry->thread = rb_entry(nd, struct thread, rb_node);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
-	struct summary_data data = {
-		.fp = fp,
-		.trace = trace
-	};
-	data.printed = trace__fprintf_threads_header(fp);
+	DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
+	size_t printed = trace__fprintf_threads_header(fp);
+	struct rb_node *nd;
 
-	machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
+	if (threads == NULL) {
+		fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+		return 0;
+	}
 
-	return data.printed;
+	resort_rb__for_each(nd, threads)
+		printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+
+	resort_rb__delete(threads);
+
+	return printed;
 }
 
 static int trace__set_duration(const struct option *opt, const char *str,
@@ -3070,10 +2768,6 @@
 		NULL
 	};
 	struct trace trace = {
-		.audit = {
-			.machine = audit_detect_machine(),
-			.open_id = audit_name_to_syscall("open", trace.audit.machine),
-		},
 		.syscalls = {
 			. max = -1,
 		},
@@ -3091,6 +2785,8 @@
 		.output = stderr,
 		.show_comm = true,
 		.trace_syscalls = true,
+		.kernel_syscallchains = false,
+		.max_stack = UINT_MAX,
 	};
 	const char *output_name = NULL;
 	const char *ev_qualifier_str = NULL;
@@ -3136,10 +2832,24 @@
 		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+	OPT_CALLBACK(0, "call-graph", &trace.opts,
+		     "record_mode[,record_size]", record_callchain_help,
+		     &record_parse_callchain_opt),
+	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+		    "Show the kernel callchains on the syscall exit path"),
+	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
+		     "Set the minimum stack depth when parsing the callchain, "
+		     "anything below the specified depth will be ignored."),
+	OPT_UINTEGER(0, "max-stack", &trace.max_stack,
+		     "Set the maximum stack depth when parsing the callchain, "
+		     "anything beyond the specified depth will be ignored. "
+		     "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
 	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
 			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 	};
+	bool __maybe_unused max_stack_user_set = true;
+	bool mmap_pages_user_set = true;
 	const char * const trace_subcommands[] = { "record", NULL };
 	int err;
 	char bf[BUFSIZ];
@@ -3148,8 +2858,9 @@
 	signal(SIGFPE, sighandler_dump_stack);
 
 	trace.evlist = perf_evlist__new();
+	trace.sctbl = syscalltbl__new();
 
-	if (trace.evlist == NULL) {
+	if (trace.evlist == NULL || trace.sctbl == NULL) {
 		pr_err("Not enough memory to run!\n");
 		err = -ENOMEM;
 		goto out;
@@ -3158,11 +2869,40 @@
 	argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
 				 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
 
+	err = bpf__setup_stdout(trace.evlist);
+	if (err) {
+		bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+		pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+		goto out;
+	}
+
+	err = -1;
+
 	if (trace.trace_pgfaults) {
 		trace.opts.sample_address = true;
 		trace.opts.sample_time = true;
 	}
 
+	if (trace.opts.mmap_pages == UINT_MAX)
+		mmap_pages_user_set = false;
+
+	if (trace.max_stack == UINT_MAX) {
+		trace.max_stack = sysctl_perf_event_max_stack;
+		max_stack_user_set = false;
+	}
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
+		record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+#endif
+
+	if (callchain_param.enabled) {
+		if (!mmap_pages_user_set && geteuid() == 0)
+			trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
+
+		symbol_conf.use_callchain = true;
+	}
+
 	if (trace.evlist->nr_entries > 0)
 		evlist__set_evsel_handler(trace.evlist, trace__event_handler);
 
@@ -3179,6 +2919,11 @@
 		return -1;
 	}
 
+	if (!trace.trace_syscalls && ev_qualifier_str) {
+		pr_err("The -e option can't be used with --no-syscalls.\n");
+		goto out;
+	}
+
 	if (output_name != NULL) {
 		err = trace__open_output(&trace, output_name);
 		if (err < 0) {
@@ -3187,6 +2932,8 @@
 		}
 	}
 
+	trace.open_id = syscalltbl__id(trace.sctbl, "open");
+
 	if (ev_qualifier_str != NULL) {
 		const char *s = ev_qualifier_str;
 		struct strlist_config slist_config = {

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 6f8f643..1e46277 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile

@@ -27,7 +27,7 @@
 ifeq ($(ARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
-    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
+    CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
     $(call detected,CONFIG_X86_64)
@@ -295,9 +295,6 @@
     CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
   endif
 
-  # include ARCH specific config
-  -include $(src-perf)/arch/$(ARCH)/Makefile
-
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
       msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);

diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 6461e02..3573f31 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c

@@ -92,6 +92,22 @@
 	return ret;
 }
 
+static int use_arch_timestamp;
+
+static inline uint64_t
+get_arch_timestamp(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+	unsigned int low, high;
+
+	asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+	return low | ((uint64_t)high) << 32;
+#else
+	return 0;
+#endif
+}
+
 #define NSEC_PER_SEC	1000000000
 static int perf_clk_id = CLOCK_MONOTONIC;
 
@@ -107,6 +123,9 @@
 	struct timespec ts;
 	int ret;
 
+	if (use_arch_timestamp)
+		return get_arch_timestamp();
+
 	ret = clock_gettime(perf_clk_id, &ts);
 	if (ret)
 		return 0;
@@ -203,6 +222,17 @@
 	munmap(marker_addr, pgsz);
 }
 
+static void
+init_arch_timestamp(void)
+{
+	char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
+
+	if (!str || !*str || !strcmp(str, "0"))
+		return;
+
+	use_arch_timestamp = 1;
+}
+
 void *jvmti_open(void)
 {
 	int pad_cnt;
@@ -211,11 +241,17 @@
 	int fd;
 	FILE *fp;
 
+	init_arch_timestamp();
+
 	/*
 	 * check if clockid is supported
 	 */
-	if (!perf_get_timestamp())
-		warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+	if (!perf_get_timestamp()) {
+		if (use_arch_timestamp)
+			warnx("jvmti: arch timestamp not supported");
+		else
+			warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+	}
 
 	memset(&header, 0, sizeof(header));
 
@@ -263,6 +299,9 @@
 
 	header.timestamp = perf_get_timestamp();
 
+	if (use_arch_timestamp)
+		header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
 	if (!fwrite(&header, sizeof(header), 1, fp)) {
 		warn("jvmti: cannot write dumpfile header");
 		goto error;

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index aaee0a7..7970008 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c

@@ -17,6 +17,7 @@
 #include <subcmd/parse-options.h>
 #include "util/bpf-loader.h"
 #include "util/debug.h"
+#include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
 #include <pthread.h>
 #include <stdlib.h>
@@ -308,9 +309,11 @@
 			if (*argcp > 1) {
 				struct strbuf buf;
 
-				strbuf_init(&buf, PATH_MAX);
-				strbuf_addstr(&buf, alias_string);
-				sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
+				if (strbuf_init(&buf, PATH_MAX) < 0 ||
+				    strbuf_addstr(&buf, alias_string) < 0 ||
+				    sq_quote_argv(&buf, (*argv) + 1,
+						  PATH_MAX) < 0)
+					die("Failed to allocate memory.");
 				free(alias_string);
 				alias_string = buf.buf;
 			}
@@ -533,6 +536,7 @@
 {
 	const char *cmd;
 	char sbuf[STRERR_BUFSIZE];
+	int value;
 
 	/* libsubcmd init */
 	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -542,6 +546,9 @@
 	page_size = sysconf(_SC_PAGE_SIZE);
 	cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
 
+	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+		sysctl_perf_event_max_stack = value;
+
 	cmd = extract_argv0_path(argv[0]);
 	if (!cmd)
 		cmd = "perf-help";
@@ -549,6 +556,7 @@
 	srandom(time(NULL));
 
 	perf_config(perf_default_config, NULL);
+	set_buildid_dir(NULL);
 
 	/* get debugfs/tracefs mount point from /proc/mounts */
 	tracing_path_mount();
@@ -572,7 +580,6 @@
 	}
 	if (!prefixcmp(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
-		set_buildid_dir(NULL);
 		setup_path();
 		argv[0] = "trace";
 		return cmd_trace(argc, argv, NULL);
@@ -587,7 +594,6 @@
 	argc--;
 	handle_options(&argv, &argc, NULL);
 	commit_pager_choice();
-	set_buildid_dir(NULL);
 
 	if (argc > 0) {
 		if (!prefixcmp(argv[0], "--"))

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 5381a01..cd8f1b1 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h

@@ -52,7 +52,6 @@
 	bool	     sample_weight;
 	bool	     sample_time;
 	bool	     sample_time_set;
-	bool	     callgraph_set;
 	bool	     period;
 	bool	     running_time;
 	bool	     full_auxtrace;

diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 1b02cdc..7656ff8 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py

@@ -34,10 +34,9 @@
 #
 # ubuntu:
 #
-#	$ sudo apt-get install postgresql
+#	$ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
 #	$ sudo su - postgres
-#	$ createuser <your user id here>
-#	Shall the new role be a superuser? (y/n) y
+#	$ createuser -s <your user id here>
 #
 # An example of using this script with Intel PT:
 #
@@ -224,11 +223,14 @@
 
 perf_db_export_mode = True
 perf_db_export_calls = False
+perf_db_export_callchains = False
+
 
 def usage():
-	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
+	print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
 	print >> sys.stderr, "where:	columns		'all' or 'branches'"
-	print >> sys.stderr, "		calls		'calls' => create calls table"
+	print >> sys.stderr, "		calls		'calls' => create calls and call_paths table"
+	print >> sys.stderr, "		callchains	'callchains' => create call_paths table"
 	raise Exception("Too few arguments")
 
 if (len(sys.argv) < 2):
@@ -246,9 +248,11 @@
 
 branches = (columns == "branches")
 
-if (len(sys.argv) >= 4):
-	if (sys.argv[3] == "calls"):
+for i in range(3,len(sys.argv)):
+	if (sys.argv[i] == "calls"):
 		perf_db_export_calls = True
+	elif (sys.argv[i] == "callchains"):
+		perf_db_export_callchains = True
 	else:
 		usage()
 
@@ -359,14 +363,16 @@
 		'transaction	bigint,'
 		'data_src	bigint,'
 		'branch_type	integer,'
-		'in_tx		boolean)')
+		'in_tx		boolean,'
+		'call_path_id	bigint)')
 
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
 	do_query(query, 'CREATE TABLE call_paths ('
 		'id		bigint		NOT NULL,'
 		'parent_id	bigint,'
 		'symbol_id	bigint,'
 		'ip		bigint)')
+if perf_db_export_calls:
 	do_query(query, 'CREATE TABLE calls ('
 		'id		bigint		NOT NULL,'
 		'thread_id	bigint,'
@@ -428,7 +434,7 @@
 		'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
 	' FROM comm_threads')
 
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
 	do_query(query, 'CREATE VIEW call_paths_view AS '
 		'SELECT '
 			'c.id,'
@@ -444,6 +450,7 @@
 			'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
 			'(SELECT dso FROM symbols_view  WHERE id = p.symbol_id) AS parent_dso_short_name'
 		' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
 	do_query(query, 'CREATE VIEW calls_view AS '
 		'SELECT '
 			'calls.id,'
@@ -541,8 +548,9 @@
 symbol_file		= open_output_file("symbol_table.bin")
 branch_type_file	= open_output_file("branch_type_table.bin")
 sample_file		= open_output_file("sample_table.bin")
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
 	call_path_file		= open_output_file("call_path_table.bin")
+if perf_db_export_calls:
 	call_file		= open_output_file("call_table.bin")
 
 def trace_begin():
@@ -554,8 +562,8 @@
 	comm_table(0, "unknown")
 	dso_table(0, 0, "unknown", "unknown", "")
 	symbol_table(0, 0, 0, 0, 0, "unknown")
-	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-	if perf_db_export_calls:
+	sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+	if perf_db_export_calls or perf_db_export_callchains:
 		call_path_table(0, 0, 0, 0)
 
 unhandled_count = 0
@@ -571,8 +579,9 @@
 	copy_output_file(symbol_file,		"symbols")
 	copy_output_file(branch_type_file,	"branch_types")
 	copy_output_file(sample_file,		"samples")
-	if perf_db_export_calls:
+	if perf_db_export_calls or perf_db_export_callchains:
 		copy_output_file(call_path_file,	"call_paths")
+	if perf_db_export_calls:
 		copy_output_file(call_file,		"calls")
 
 	print datetime.datetime.today(), "Removing intermediate files..."
@@ -585,8 +594,9 @@
 	remove_output_file(symbol_file)
 	remove_output_file(branch_type_file)
 	remove_output_file(sample_file)
-	if perf_db_export_calls:
+	if perf_db_export_calls or perf_db_export_callchains:
 		remove_output_file(call_path_file)
+	if perf_db_export_calls:
 		remove_output_file(call_file)
 	os.rmdir(output_dir_name)
 	print datetime.datetime.today(), "Adding primary keys"
@@ -599,8 +609,9 @@
 	do_query(query, 'ALTER TABLE symbols         ADD PRIMARY KEY (id)')
 	do_query(query, 'ALTER TABLE branch_types    ADD PRIMARY KEY (id)')
 	do_query(query, 'ALTER TABLE samples         ADD PRIMARY KEY (id)')
-	if perf_db_export_calls:
+	if perf_db_export_calls or perf_db_export_callchains:
 		do_query(query, 'ALTER TABLE call_paths      ADD PRIMARY KEY (id)')
+	if perf_db_export_calls:
 		do_query(query, 'ALTER TABLE calls           ADD PRIMARY KEY (id)')
 
 	print datetime.datetime.today(), "Adding foreign keys"
@@ -623,10 +634,11 @@
 					'ADD CONSTRAINT symbolfk   FOREIGN KEY (symbol_id)    REFERENCES symbols    (id),'
 					'ADD CONSTRAINT todsofk    FOREIGN KEY (to_dso_id)    REFERENCES dsos       (id),'
 					'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols    (id)')
-	if perf_db_export_calls:
+	if perf_db_export_calls or perf_db_export_callchains:
 		do_query(query, 'ALTER TABLE call_paths '
 					'ADD CONSTRAINT parentfk    FOREIGN KEY (parent_id)    REFERENCES call_paths (id),'
 					'ADD CONSTRAINT symbolfk    FOREIGN KEY (symbol_id)    REFERENCES symbols    (id)')
+	if perf_db_export_calls:
 		do_query(query, 'ALTER TABLE calls '
 					'ADD CONSTRAINT threadfk    FOREIGN KEY (thread_id)    REFERENCES threads    (id),'
 					'ADD CONSTRAINT commfk      FOREIGN KEY (comm_id)      REFERENCES comms      (id),'
@@ -694,11 +706,11 @@
 	value = struct.pack(fmt, 2, 4, branch_type, n, name)
 	branch_type_file.write(value)
 
-def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
 	if branches:
-		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
+		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
 	else:
-		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
+		value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
 	sample_file.write(value)
 
 def call_path_table(cp_id, parent_id, symbol_id, ip, *x):

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 1ba628e..66a2898 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build

@@ -37,6 +37,8 @@
 perf-y += cpumap.o
 perf-y += stat.o
 perf-y += event_update.o
+perf-y += event-times.o
+perf-y += backward-ring-buffer.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)

diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
new file mode 100644
index 0000000..d9ba991
--- /dev/null
+++ b/tools/perf/tests/backward-ring-buffer.c

@@ -0,0 +1,151 @@
+/*
+ * Test backward bit in event attribute, read ring buffer from end to
+ * beginning
+ */
+
+#include <perf.h>
+#include <evlist.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "debug.h"
+
+#define NR_ITERS 111
+
+static void testcase(void)
+{
+	int i;
+
+	for (i = 0; i < NR_ITERS; i++) {
+		char proc_name[10];
+
+		snprintf(proc_name, sizeof(proc_name), "p:%d\n", i);
+		prctl(PR_SET_NAME, proc_name);
+	}
+}
+
+static int count_samples(struct perf_evlist *evlist, int *sample_count,
+			 int *comm_count)
+{
+	int i;
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		union perf_event *event;
+
+		perf_evlist__mmap_read_catchup(evlist, i);
+		while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) {
+			const u32 type = event->header.type;
+
+			switch (type) {
+			case PERF_RECORD_SAMPLE:
+				(*sample_count)++;
+				break;
+			case PERF_RECORD_COMM:
+				(*comm_count)++;
+				break;
+			default:
+				pr_err("Unexpected record of type %d\n", type);
+				return TEST_FAIL;
+			}
+		}
+	}
+	return TEST_OK;
+}
+
+static int do_test(struct perf_evlist *evlist, int mmap_pages,
+		   int *sample_count, int *comm_count)
+{
+	int err;
+	char sbuf[STRERR_BUFSIZE];
+
+	err = perf_evlist__mmap(evlist, mmap_pages, true);
+	if (err < 0) {
+		pr_debug("perf_evlist__mmap: %s\n",
+			 strerror_r(errno, sbuf, sizeof(sbuf)));
+		return TEST_FAIL;
+	}
+
+	perf_evlist__enable(evlist);
+	testcase();
+	perf_evlist__disable(evlist);
+
+	err = count_samples(evlist, sample_count, comm_count);
+	perf_evlist__munmap(evlist);
+	return err;
+}
+
+
+int test__backward_ring_buffer(int subtest __maybe_unused)
+{
+	int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
+	char pid[16], sbuf[STRERR_BUFSIZE];
+	struct perf_evlist *evlist;
+	struct perf_evsel *evsel __maybe_unused;
+	struct parse_events_error parse_error;
+	struct record_opts opts = {
+		.target = {
+			.uid = UINT_MAX,
+			.uses_mmap = true,
+		},
+		.freq	      = 0,
+		.mmap_pages   = 256,
+		.default_interval = 1,
+	};
+
+	snprintf(pid, sizeof(pid), "%d", getpid());
+	pid[sizeof(pid) - 1] = '\0';
+	opts.target.tid = opts.target.pid = pid;
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("No ehough memory to create evlist\n");
+		return TEST_FAIL;
+	}
+
+	err = perf_evlist__create_maps(evlist, &opts.target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		goto out_delete_evlist;
+	}
+
+	bzero(&parse_error, sizeof(parse_error));
+	err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error);
+	if (err) {
+		pr_debug("Failed to parse tracepoint event, try use root\n");
+		ret = TEST_SKIP;
+		goto out_delete_evlist;
+	}
+
+	perf_evlist__config(evlist, &opts, NULL);
+
+	/* Set backward bit, ring buffer should be writing from end */
+	evlist__for_each(evlist, evsel)
+		evsel->attr.write_backward = 1;
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 strerror_r(errno, sbuf, sizeof(sbuf)));
+		goto out_delete_evlist;
+	}
+
+	ret = TEST_FAIL;
+	err = do_test(evlist, opts.mmap_pages, &sample_count,
+		      &comm_count);
+	if (err != TEST_OK)
+		goto out_delete_evlist;
+
+	if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) {
+		pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n",
+		       sample_count, comm_count);
+		goto out_delete_evlist;
+	}
+
+	err = do_test(evlist, 1, &sample_count, &comm_count);
+	if (err != TEST_OK)
+		goto out_delete_evlist;
+
+	ret = TEST_OK;
+out_delete_evlist:
+	perf_evlist__delete(evlist);
+	return ret;
+}

diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 199501c..f31eed3 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c

@@ -138,7 +138,7 @@
 	perf_evlist__splice_list_tail(evlist, &parse_evlist.list);
 	evlist->nr_groups = parse_evlist.nr_groups;
 
-	perf_evlist__config(evlist, &opts);
+	perf_evlist__config(evlist, &opts, NULL);
 
 	err = perf_evlist__open(evlist);
 	if (err < 0) {

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index f2b1dca..0e95c20 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c

@@ -204,6 +204,14 @@
 		.func = test__event_update,
 	},
 	{
+		.desc = "Test events times",
+		.func = test__event_times,
+	},
+	{
+		.desc = "Test backward reading from ring buffer",
+		.func = test__backward_ring_buffer,
+	},
+	{
 		.func = NULL,
 	},
 };

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index abd3f0e..68a69a1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c

@@ -532,7 +532,7 @@
 			goto out_put;
 		}
 
-		perf_evlist__config(evlist, &opts);
+		perf_evlist__config(evlist, &opts, NULL);
 
 		evsel = perf_evlist__first(evlist);
 

diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index dc673ff..8cf0d9e 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c

@@ -202,7 +202,7 @@
 {
 	int i;
 
-	dsos = malloc(sizeof(dsos) * cnt);
+	dsos = malloc(sizeof(*dsos) * cnt);
 	TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
 
 	for (i = 0; i < cnt; i++) {

diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
new file mode 100644
index 0000000..95fb744
--- /dev/null
+++ b/tools/perf/tests/event-times.c

@@ -0,0 +1,236 @@
+#include <linux/compiler.h>
+#include <string.h>
+#include "tests.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+#include "debug.h"
+#include "thread_map.h"
+#include "target.h"
+
+static int attach__enable_on_exec(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct target target = {
+		.uid = UINT_MAX,
+	};
+	const char *argv[] = { "true", NULL, };
+	char sbuf[STRERR_BUFSIZE];
+	int err;
+
+	pr_debug("attaching to spawned child, enable on exec\n");
+
+	err = perf_evlist__create_maps(evlist, &target);
+	if (err < 0) {
+		pr_debug("Not enough memory to create thread/cpu maps\n");
+		return err;
+	}
+
+	err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+	if (err < 0) {
+		pr_debug("Couldn't run the workload!\n");
+		return err;
+	}
+
+	evsel->attr.enable_on_exec = 1;
+
+	err = perf_evlist__open(evlist);
+	if (err < 0) {
+		pr_debug("perf_evlist__open: %s\n",
+			 strerror_r(errno, sbuf, sizeof(sbuf)));
+		return err;
+	}
+
+	return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__enable_on_exec(struct perf_evlist *evlist)
+{
+	waitpid(evlist->workload.pid, NULL, 0);
+	return 0;
+}
+
+static int attach__current_disabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct thread_map *threads;
+	int err;
+
+	pr_debug("attaching to current thread as disabled\n");
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	evsel->attr.disabled = 1;
+
+	err = perf_evsel__open_per_thread(evsel, threads);
+	if (err) {
+		pr_debug("Failed to open event cpu-clock:u\n");
+		return err;
+	}
+
+	thread_map__put(threads);
+	return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int attach__current_enabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct thread_map *threads;
+	int err;
+
+	pr_debug("attaching to current thread as enabled\n");
+
+	threads = thread_map__new(-1, getpid(), UINT_MAX);
+	if (threads == NULL) {
+		pr_debug("failed to call thread_map__new\n");
+		return -1;
+	}
+
+	err = perf_evsel__open_per_thread(evsel, threads);
+
+	thread_map__put(threads);
+	return err == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__disable(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+
+	return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_disabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct cpu_map *cpus;
+	int err;
+
+	pr_debug("attaching to CPU 0 as enabled\n");
+
+	cpus = cpu_map__new("0");
+	if (cpus == NULL) {
+		pr_debug("failed to call cpu_map__new\n");
+		return -1;
+	}
+
+	evsel->attr.disabled = 1;
+
+	err = perf_evsel__open_per_cpu(evsel, cpus);
+	if (err) {
+		if (err == -EACCES)
+			return TEST_SKIP;
+
+		pr_debug("Failed to open event cpu-clock:u\n");
+		return err;
+	}
+
+	cpu_map__put(cpus);
+	return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_enabled(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__last(evlist);
+	struct cpu_map *cpus;
+	int err;
+
+	pr_debug("attaching to CPU 0 as enabled\n");
+
+	cpus = cpu_map__new("0");
+	if (cpus == NULL) {
+		pr_debug("failed to call cpu_map__new\n");
+		return -1;
+	}
+
+	err = perf_evsel__open_per_cpu(evsel, cpus);
+	if (err == -EACCES)
+		return TEST_SKIP;
+
+	cpu_map__put(cpus);
+	return err ? TEST_FAIL : TEST_OK;
+}
+
+static int test_times(int (attach)(struct perf_evlist *),
+		      int (detach)(struct perf_evlist *))
+{
+	struct perf_counts_values count;
+	struct perf_evlist *evlist = NULL;
+	struct perf_evsel *evsel;
+	int err = -1, i;
+
+	evlist = perf_evlist__new();
+	if (!evlist) {
+		pr_debug("failed to create event list\n");
+		goto out_err;
+	}
+
+	err = parse_events(evlist, "cpu-clock:u", NULL);
+	if (err) {
+		pr_debug("failed to parse event cpu-clock:u\n");
+		goto out_err;
+	}
+
+	evsel = perf_evlist__last(evlist);
+	evsel->attr.read_format |=
+		PERF_FORMAT_TOTAL_TIME_ENABLED |
+		PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+	err = attach(evlist);
+	if (err == TEST_SKIP) {
+		pr_debug("  SKIP  : not enough rights\n");
+		return err;
+	}
+
+	TEST_ASSERT_VAL("failed to attach", !err);
+
+	for (i = 0; i < 100000000; i++) { }
+
+	TEST_ASSERT_VAL("failed to detach", !detach(evlist));
+
+	perf_evsel__read(evsel, 0, 0, &count);
+
+	err = !(count.ena == count.run);
+
+	pr_debug("  %s: ena %" PRIu64", run %" PRIu64"\n",
+		 !err ? "OK    " : "FAILED",
+		 count.ena, count.run);
+
+out_err:
+	if (evlist)
+		perf_evlist__delete(evlist);
+	return !err ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This test creates software event 'cpu-clock'
+ * attaches it in several ways (explained below)
+ * and checks that enabled and running times
+ * match.
+ */
+int test__event_times(int subtest __maybe_unused)
+{
+	int err, ret = 0;
+
+#define _T(attach, detach)			\
+	err = test_times(attach, detach);	\
+	if (err && (ret == TEST_OK || ret == TEST_SKIP))	\
+		ret = err;
+
+	/* attach on newly spawned process after exec */
+	_T(attach__enable_on_exec,   detach__enable_on_exec)
+	/* attach on current process as enabled */
+	_T(attach__current_enabled,  detach__disable)
+	/* attach on current process as disabled */
+	_T(attach__current_disabled, detach__disable)
+	/* attach on cpu as disabled */
+	_T(attach__cpu_disabled,     detach__disable)
+	/* attach on cpu as enabled */
+	_T(attach__cpu_enabled,      detach__disable)
+
+#undef _T
+	return ret;
+}

diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index 012eab5..63ecf21 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c

@@ -30,7 +30,7 @@
 
 	TEST_ASSERT_VAL("wrong id", ev->id == 123);
 	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
-	TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
+	TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
 	return 0;
 }
 

diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f55f4bd..6b21746 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c

@@ -161,7 +161,7 @@
 	struct rb_root *root;
 	struct rb_node *node;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;

diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index ed5aa9e..a9e3db3 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c

@@ -101,7 +101,7 @@
 		if (machine__resolve(machine, &al, &sample) < 0)
 			goto out;
 
-		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+		if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
 					 NULL) < 0) {
 			addr_location__put(&al);
 			goto out;
@@ -126,7 +126,7 @@
 	struct rb_root *root_out;
 	struct rb_node *node;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root_in = &hists->entries_collapsed;
 	else
 		root_in = hists->entries_in;

diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index b825d24..e846f8c 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c

@@ -81,7 +81,7 @@
 
 			al.socket = fake_samples[i].socket;
 			if (hist_entry_iter__add(&iter, &al,
-						 PERF_MAX_STACK_DEPTH, NULL) < 0) {
+						 sysctl_perf_event_max_stack, NULL) < 0) {
 				addr_location__put(&al);
 				goto out;
 			}

diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 358324e..acf5a13 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c

@@ -145,7 +145,7 @@
 	/*
 	 * Only entries from fake_common_samples should have a pair.
 	 */
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;
@@ -197,7 +197,7 @@
 	 * and some entries will have no pair.  However every entry
 	 * in other hists should have (dummy) pair.
 	 */
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;

diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index d3556fb..63c5efa 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c

@@ -67,7 +67,7 @@
 		if (machine__resolve(machine, &al, &sample) < 0)
 			goto out;
 
-		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+		if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
 					 NULL) < 0) {
 			addr_location__put(&al);
 			goto out;
@@ -92,7 +92,7 @@
 	struct rb_root *root_out;
 	struct rb_node *node;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root_in = &hists->entries_collapsed;
 	else
 		root_in = hists->entries_in;

diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index ddb78fa..614e45a 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c

@@ -80,7 +80,7 @@
 	CHECK__(parse_events(evlist, "dummy:u", NULL));
 	CHECK__(parse_events(evlist, "cycles:u", NULL));
 
-	perf_evlist__config(evlist, &opts);
+	perf_evlist__config(evlist, &opts, NULL);
 
 	evsel = perf_evlist__first(evlist);
 

diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index eb99a105..4344fe4 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c

@@ -44,7 +44,7 @@
 		goto out_delete_evlist;
 	}
 
-	perf_evsel__config(evsel, &opts);
+	perf_evsel__config(evsel, &opts, NULL);
 
 	thread_map__set_pid(evlist->threads, 0, getpid());
 

diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 1cc78ce..b836ee6a 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c

@@ -99,7 +99,7 @@
 	perf_evsel__set_sample_bit(evsel, CPU);
 	perf_evsel__set_sample_bit(evsel, TID);
 	perf_evsel__set_sample_bit(evsel, TIME);
-	perf_evlist__config(evlist, &opts);
+	perf_evlist__config(evlist, &opts, NULL);
 
 	err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
 	if (err < 0) {

diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index ebd8016..39a689b 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c

@@ -417,7 +417,7 @@
 	perf_evsel__set_sample_bit(tracking_evsel, TIME);
 
 	/* Config events */
-	perf_evlist__config(evlist, &opts);
+	perf_evlist__config(evlist, &opts, NULL);
 
 	/* Check moved event is still at the front */
 	if (cycles_evsel != perf_evlist__first(evlist)) {

diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 82b2b5e..c57e72c 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h

@@ -85,6 +85,8 @@
 int test__synthesize_stat(int subtest);
 int test__synthesize_stat_round(int subtest);
 int test__event_update(int subtest);
+int test__event_times(int subtest);
+int test__backward_ring_buffer(int subtest);
 
 #if defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 630b0b4..e63abab 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c

@@ -54,8 +54,14 @@
 	 * Step 3:
 	 *
 	 * Load and split /proc/kallsyms into multiple maps, one per module.
+	 * Do not use kcore, as this test was designed before kcore support
+	 * and has parts that only make sense if using the non-kcore code.
+	 * XXX: extend it to stress the kcorre code as well, hint: the list
+	 * of modules extracted from /proc/kcore, in its current form, can't
+	 * be compacted against the list of modules found in the "vmlinux"
+	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) {
+	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
@@ -157,6 +163,9 @@
 
 					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, pair->name);
+				} else {
+					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+						 mem_start, sym->name, first_pair->name);
 				}
 			}
 		} else

diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
new file mode 100644
index 0000000..d64f4a9
--- /dev/null
+++ b/tools/perf/trace/beauty/eventfd.c

@@ -0,0 +1,38 @@
+#include <sys/eventfd.h>
+
+#ifndef EFD_SEMAPHORE
+#define EFD_SEMAPHORE		1
+#endif
+
+#ifndef EFD_NONBLOCK
+#define EFD_NONBLOCK		00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+#define EFD_CLOEXEC		02000000
+#endif
+
+static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_FLAG(n) \
+	if (flags & EFD_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~EFD_##n; \
+	}
+
+	P_FLAG(SEMAPHORE);
+	P_FLAG(CLOEXEC);
+	P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags

diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
new file mode 100644
index 0000000..021bb48
--- /dev/null
+++ b/tools/perf/trace/beauty/flock.c

@@ -0,0 +1,31 @@
+
+static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
+					   struct syscall_arg *arg)
+{
+	int printed = 0, op = arg->val;
+
+	if (op == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_CMD(cmd) \
+	if ((op & LOCK_##cmd) == LOCK_##cmd) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
+		op &= ~LOCK_##cmd; \
+	}
+
+	P_CMD(SH);
+	P_CMD(EX);
+	P_CMD(NB);
+	P_CMD(UN);
+	P_CMD(MAND);
+	P_CMD(RW);
+	P_CMD(READ);
+	P_CMD(WRITE);
+#undef P_OP
+
+	if (op)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
+
+	return printed;
+}
+
+#define SCA_FLOCK syscall_arg__scnprintf_flock

diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
new file mode 100644
index 0000000..e247621
--- /dev/null
+++ b/tools/perf/trace/beauty/futex_op.c

@@ -0,0 +1,44 @@
+#include <linux/futex.h>
+
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+	enum syscall_futex_args {
+		SCF_UADDR   = (1 << 0),
+		SCF_OP	    = (1 << 1),
+		SCF_VAL	    = (1 << 2),
+		SCF_TIMEOUT = (1 << 3),
+		SCF_UADDR2  = (1 << 4),
+		SCF_VAL3    = (1 << 5),
+	};
+	int op = arg->val;
+	int cmd = op & FUTEX_CMD_MASK;
+	size_t printed = 0;
+
+	switch (cmd) {
+#define	P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
+	P_FUTEX_OP(WAIT);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAKE);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(FD);		    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(REQUEUE);	    arg->mask |= SCF_VAL3|SCF_TIMEOUT;	          break;
+	P_FUTEX_OP(CMP_REQUEUE);    arg->mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT;			  break;
+	P_FUTEX_OP(WAKE_OP);							  break;
+	P_FUTEX_OP(LOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(UNLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+	P_FUTEX_OP(TRYLOCK_PI);	    arg->mask |= SCF_VAL3|SCF_UADDR2;		  break;
+	P_FUTEX_OP(WAIT_BITSET);    arg->mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAKE_BITSET);    arg->mask |= SCF_UADDR2;			  break;
+	P_FUTEX_OP(WAIT_REQUEUE_PI);						  break;
+	default: printed = scnprintf(bf, size, "%#x", cmd);			  break;
+	}
+
+	if (op & FUTEX_PRIVATE_FLAG)
+		printed += scnprintf(bf + printed, size - printed, "|PRIV");
+
+	if (op & FUTEX_CLOCK_REALTIME)
+		printed += scnprintf(bf + printed, size - printed, "|CLKRT");
+
+	return printed;
+}
+
+#define SCA_FUTEX_OP  syscall_arg__scnprintf_futex_op

diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
new file mode 100644
index 0000000..3444a4d
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap.c

@@ -0,0 +1,158 @@
+#include <sys/mman.h>
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
+					       struct syscall_arg *arg)
+{
+	int printed = 0, prot = arg->val;
+
+	if (prot == PROT_NONE)
+		return scnprintf(bf, size, "NONE");
+#define	P_MMAP_PROT(n) \
+	if (prot & PROT_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		prot &= ~PROT_##n; \
+	}
+
+	P_MMAP_PROT(EXEC);
+	P_MMAP_PROT(READ);
+	P_MMAP_PROT(WRITE);
+#ifdef PROT_SEM
+	P_MMAP_PROT(SEM);
+#endif
+	P_MMAP_PROT(GROWSDOWN);
+	P_MMAP_PROT(GROWSUP);
+#undef P_MMAP_PROT
+
+	if (prot)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
+
+	return printed;
+}
+
+#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+
+#ifndef MAP_STACK
+# define MAP_STACK		0x20000
+#endif
+
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_MMAP_FLAG(n) \
+	if (flags & MAP_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MAP_##n; \
+	}
+
+	P_MMAP_FLAG(SHARED);
+	P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
+	P_MMAP_FLAG(32BIT);
+#endif
+	P_MMAP_FLAG(ANONYMOUS);
+	P_MMAP_FLAG(DENYWRITE);
+	P_MMAP_FLAG(EXECUTABLE);
+	P_MMAP_FLAG(FILE);
+	P_MMAP_FLAG(FIXED);
+	P_MMAP_FLAG(GROWSDOWN);
+#ifdef MAP_HUGETLB
+	P_MMAP_FLAG(HUGETLB);
+#endif
+	P_MMAP_FLAG(LOCKED);
+	P_MMAP_FLAG(NONBLOCK);
+	P_MMAP_FLAG(NORESERVE);
+	P_MMAP_FLAG(POPULATE);
+	P_MMAP_FLAG(STACK);
+#ifdef MAP_UNINITIALIZED
+	P_MMAP_FLAG(UNINITIALIZED);
+#endif
+#undef P_MMAP_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
+
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
+						  struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define P_MREMAP_FLAG(n) \
+	if (flags & MREMAP_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MREMAP_##n; \
+	}
+
+	P_MREMAP_FLAG(MAYMOVE);
+#ifdef MREMAP_FIXED
+	P_MREMAP_FLAG(FIXED);
+#endif
+#undef P_MREMAP_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
+
+#ifndef MADV_HWPOISON
+#define MADV_HWPOISON		100
+#endif
+
+#ifndef MADV_MERGEABLE
+#define MADV_MERGEABLE		 12
+#endif
+
+#ifndef MADV_UNMERGEABLE
+#define MADV_UNMERGEABLE	 13
+#endif
+
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
+						      struct syscall_arg *arg)
+{
+	int behavior = arg->val;
+
+	switch (behavior) {
+#define	P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
+	P_MADV_BHV(NORMAL);
+	P_MADV_BHV(RANDOM);
+	P_MADV_BHV(SEQUENTIAL);
+	P_MADV_BHV(WILLNEED);
+	P_MADV_BHV(DONTNEED);
+	P_MADV_BHV(REMOVE);
+	P_MADV_BHV(DONTFORK);
+	P_MADV_BHV(DOFORK);
+	P_MADV_BHV(HWPOISON);
+#ifdef MADV_SOFT_OFFLINE
+	P_MADV_BHV(SOFT_OFFLINE);
+#endif
+	P_MADV_BHV(MERGEABLE);
+	P_MADV_BHV(UNMERGEABLE);
+#ifdef MADV_HUGEPAGE
+	P_MADV_BHV(HUGEPAGE);
+#endif
+#ifdef MADV_NOHUGEPAGE
+	P_MADV_BHV(NOHUGEPAGE);
+#endif
+#ifdef MADV_DONTDUMP
+	P_MADV_BHV(DONTDUMP);
+#endif
+#ifdef MADV_DODUMP
+	P_MADV_BHV(DODUMP);
+#endif
+#undef P_MADV_PHV
+	default: break;
+	}
+
+	return scnprintf(bf, size, "%#x", behavior);
+}
+
+#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior

diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
new file mode 100644
index 0000000..930d8fe
--- /dev/null
+++ b/tools/perf/trace/beauty/mode_t.c

@@ -0,0 +1,68 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* From include/linux/stat.h */
+#ifndef S_IRWXUGO
+#define S_IRWXUGO	(S_IRWXU|S_IRWXG|S_IRWXO)
+#endif
+#ifndef S_IALLUGO
+#define S_IALLUGO	(S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#endif
+#ifndef S_IRUGO
+#define S_IRUGO         (S_IRUSR|S_IRGRP|S_IROTH)
+#endif
+#ifndef S_IWUGO
+#define S_IWUGO         (S_IWUSR|S_IWGRP|S_IWOTH)
+#endif
+#ifndef S_IXUGO
+#define S_IXUGO         (S_IXUSR|S_IXGRP|S_IXOTH)
+#endif
+
+static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int printed = 0, mode = arg->val;
+
+#define	P_MODE(n) \
+	if ((mode & S_##n) == S_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		mode &= ~S_##n; \
+	}
+
+	P_MODE(IALLUGO);
+	P_MODE(IRWXUGO);
+	P_MODE(IRUGO);
+	P_MODE(IWUGO);
+	P_MODE(IXUGO);
+	P_MODE(IFMT);
+	P_MODE(IFSOCK);
+	P_MODE(IFLNK);
+	P_MODE(IFREG);
+	P_MODE(IFBLK);
+	P_MODE(IFDIR);
+	P_MODE(IFCHR);
+	P_MODE(IFIFO);
+	P_MODE(ISUID);
+	P_MODE(ISGID);
+	P_MODE(ISVTX);
+	P_MODE(IRWXU);
+	P_MODE(IRUSR);
+	P_MODE(IWUSR);
+	P_MODE(IXUSR);
+	P_MODE(IRWXG);
+	P_MODE(IRGRP);
+	P_MODE(IWGRP);
+	P_MODE(IXGRP);
+	P_MODE(IRWXO);
+	P_MODE(IROTH);
+	P_MODE(IWOTH);
+	P_MODE(IXOTH);
+#undef P_MODE
+
+	if (mode)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode);
+
+	return printed;
+}
+
+#define SCA_MODE_T syscall_arg__scnprintf_mode_t

diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
new file mode 100644
index 0000000..07fa8a0
--- /dev/null
+++ b/tools/perf/trace/beauty/msg_flags.c

@@ -0,0 +1,62 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef MSG_PROBE
+#define MSG_PROBE		     0x10
+#endif
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE		   0x10000
+#endif
+#ifndef MSG_SENDPAGE_NOTLAST
+#define MSG_SENDPAGE_NOTLAST	   0x20000
+#endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN		0x20000000
+#endif
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC	0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
+					       struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return scnprintf(bf, size, "NONE");
+#define	P_MSG_FLAG(n) \
+	if (flags & MSG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~MSG_##n; \
+	}
+
+	P_MSG_FLAG(OOB);
+	P_MSG_FLAG(PEEK);
+	P_MSG_FLAG(DONTROUTE);
+	P_MSG_FLAG(TRYHARD);
+	P_MSG_FLAG(CTRUNC);
+	P_MSG_FLAG(PROBE);
+	P_MSG_FLAG(TRUNC);
+	P_MSG_FLAG(DONTWAIT);
+	P_MSG_FLAG(EOR);
+	P_MSG_FLAG(WAITALL);
+	P_MSG_FLAG(FIN);
+	P_MSG_FLAG(SYN);
+	P_MSG_FLAG(CONFIRM);
+	P_MSG_FLAG(RST);
+	P_MSG_FLAG(ERRQUEUE);
+	P_MSG_FLAG(NOSIGNAL);
+	P_MSG_FLAG(MORE);
+	P_MSG_FLAG(WAITFORONE);
+	P_MSG_FLAG(SENDPAGE_NOTLAST);
+	P_MSG_FLAG(FASTOPEN);
+	P_MSG_FLAG(CMSG_CLOEXEC);
+#undef P_MSG_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags

diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
new file mode 100644
index 0000000..0f3679e
--- /dev/null
+++ b/tools/perf/trace/beauty/open_flags.c

@@ -0,0 +1,56 @@
+
+static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
+					       struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (!(flags & O_CREAT))
+		arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
+
+	if (flags == 0)
+		return scnprintf(bf, size, "RDONLY");
+#define	P_FLAG(n) \
+	if (flags & O_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~O_##n; \
+	}
+
+	P_FLAG(APPEND);
+	P_FLAG(ASYNC);
+	P_FLAG(CLOEXEC);
+	P_FLAG(CREAT);
+	P_FLAG(DIRECT);
+	P_FLAG(DIRECTORY);
+	P_FLAG(EXCL);
+	P_FLAG(LARGEFILE);
+	P_FLAG(NOATIME);
+	P_FLAG(NOCTTY);
+#ifdef O_NONBLOCK
+	P_FLAG(NONBLOCK);
+#elif O_NDELAY
+	P_FLAG(NDELAY);
+#endif
+#ifdef O_PATH
+	P_FLAG(PATH);
+#endif
+	P_FLAG(RDWR);
+#ifdef O_DSYNC
+	if ((flags & O_SYNC) == O_SYNC)
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
+	else {
+		P_FLAG(DSYNC);
+	}
+#else
+	P_FLAG(SYNC);
+#endif
+	P_FLAG(TRUNC);
+	P_FLAG(WRONLY);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags

diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
new file mode 100644
index 0000000..311f09d
--- /dev/null
+++ b/tools/perf/trace/beauty/perf_event_open.c

@@ -0,0 +1,43 @@
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
+#endif
+
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return 0;
+
+#define	P_FLAG(n) \
+	if (flags & PERF_FLAG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~PERF_FLAG_##n; \
+	}
+
+	P_FLAG(FD_NO_GROUP);
+	P_FLAG(FD_OUTPUT);
+	P_FLAG(PID_CGROUP);
+	P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags

diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
new file mode 100644
index 0000000..07486ea
--- /dev/null
+++ b/tools/perf/trace/beauty/pid.c

@@ -0,0 +1,21 @@
+static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int pid = arg->val;
+	struct trace *trace = arg->trace;
+	size_t printed = scnprintf(bf, size, "%d", pid);
+	struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
+
+	if (thread != NULL) {
+		if (!thread->comm_set)
+			thread__set_comm_from_proc(thread);
+
+		if (thread->comm_set)
+			printed += scnprintf(bf + printed, size - printed,
+					     " (%s)", thread__comm_str(thread));
+		thread__put(thread);
+	}
+
+	return printed;
+}
+
+#define SCA_PID syscall_arg__scnprintf_pid

diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
new file mode 100644
index 0000000..c205bc6
--- /dev/null
+++ b/tools/perf/trace/beauty/sched_policy.c

@@ -0,0 +1,44 @@
+#include <sched.h>
+
+/*
+ * Not defined anywhere else, probably, just to make sure we
+ * catch future flags
+ */
+#define SCHED_POLICY_MASK 0xff
+
+#ifndef SCHED_DEADLINE
+#define SCHED_DEADLINE 6
+#endif
+
+static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size,
+						  struct syscall_arg *arg)
+{
+	const char *policies[] = {
+		"NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE",
+	};
+	size_t printed;
+	int policy = arg->val,
+	    flags = policy & ~SCHED_POLICY_MASK;
+
+	policy &= SCHED_POLICY_MASK;
+	if (policy <= SCHED_DEADLINE)
+		printed = scnprintf(bf, size, "%s", policies[policy]);
+	else
+		printed = scnprintf(bf, size, "%#x", policy);
+
+#define	P_POLICY_FLAG(n) \
+	if (flags & SCHED_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+		flags &= ~SCHED_##n; \
+	}
+
+	P_POLICY_FLAG(RESET_ON_FORK);
+#undef P_POLICY_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+	return printed;
+}
+
+#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy

diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
new file mode 100644
index 0000000..213c5a7
--- /dev/null
+++ b/tools/perf/trace/beauty/seccomp.c

@@ -0,0 +1,52 @@
+#include <linux/seccomp.h>
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int op = arg->val;
+	size_t printed = 0;
+
+	switch (op) {
+#define	P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
+	P_SECCOMP_SET_MODE_OP(STRICT);
+	P_SECCOMP_SET_MODE_OP(FILTER);
+#undef P_SECCOMP_SET_MODE_OP
+	default: printed = scnprintf(bf, size, "%#x", op);			  break;
+	}
+
+	return printed;
+}
+
+#define SCA_SECCOMP_OP  syscall_arg__scnprintf_seccomp_op
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
+						   struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+#define	P_FLAG(n) \
+	if (flags & SECCOMP_FILTER_FLAG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~SECCOMP_FILTER_FLAG_##n; \
+	}
+
+	P_FLAG(TSYNC);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags

diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
new file mode 100644
index 0000000..d3b0b1f
--- /dev/null
+++ b/tools/perf/trace/beauty/signum.c

@@ -0,0 +1,53 @@
+
+static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
+{
+	int sig = arg->val;
+
+	switch (sig) {
+#define	P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
+	P_SIGNUM(HUP);
+	P_SIGNUM(INT);
+	P_SIGNUM(QUIT);
+	P_SIGNUM(ILL);
+	P_SIGNUM(TRAP);
+	P_SIGNUM(ABRT);
+	P_SIGNUM(BUS);
+	P_SIGNUM(FPE);
+	P_SIGNUM(KILL);
+	P_SIGNUM(USR1);
+	P_SIGNUM(SEGV);
+	P_SIGNUM(USR2);
+	P_SIGNUM(PIPE);
+	P_SIGNUM(ALRM);
+	P_SIGNUM(TERM);
+	P_SIGNUM(CHLD);
+	P_SIGNUM(CONT);
+	P_SIGNUM(STOP);
+	P_SIGNUM(TSTP);
+	P_SIGNUM(TTIN);
+	P_SIGNUM(TTOU);
+	P_SIGNUM(URG);
+	P_SIGNUM(XCPU);
+	P_SIGNUM(XFSZ);
+	P_SIGNUM(VTALRM);
+	P_SIGNUM(PROF);
+	P_SIGNUM(WINCH);
+	P_SIGNUM(IO);
+	P_SIGNUM(PWR);
+	P_SIGNUM(SYS);
+#ifdef SIGEMT
+	P_SIGNUM(EMT);
+#endif
+#ifdef SIGSTKFLT
+	P_SIGNUM(STKFLT);
+#endif
+#ifdef SIGSWI
+	P_SIGNUM(SWI);
+#endif
+	default: break;
+	}
+
+	return scnprintf(bf, size, "%#x", sig);
+}
+
+#define SCA_SIGNUM syscall_arg__scnprintf_signum

diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
new file mode 100644
index 0000000..0a5ce81
--- /dev/null
+++ b/tools/perf/trace/beauty/socket_type.c

@@ -0,0 +1,60 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP		6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC		02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK		00004000
+#endif
+
+#ifndef SOCK_TYPE_MASK
+#define SOCK_TYPE_MASK 0xf
+#endif
+
+static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+	size_t printed;
+	int type = arg->val,
+	    flags = type & ~SOCK_TYPE_MASK;
+
+	type &= SOCK_TYPE_MASK;
+	/*
+	 * Can't use a strarray, MIPS may override for ABI reasons.
+	 */
+	switch (type) {
+#define	P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
+	P_SK_TYPE(STREAM);
+	P_SK_TYPE(DGRAM);
+	P_SK_TYPE(RAW);
+	P_SK_TYPE(RDM);
+	P_SK_TYPE(SEQPACKET);
+	P_SK_TYPE(DCCP);
+	P_SK_TYPE(PACKET);
+#undef P_SK_TYPE
+	default:
+		printed = scnprintf(bf, size, "%#x", type);
+	}
+
+#define	P_SK_FLAG(n) \
+	if (flags & SOCK_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+		flags &= ~SOCK_##n; \
+	}
+
+	P_SK_FLAG(CLOEXEC);
+	P_SK_FLAG(NONBLOCK);
+#undef P_SK_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+	return printed;
+}
+
+#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type

diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
new file mode 100644
index 0000000..7942724
--- /dev/null
+++ b/tools/perf/trace/beauty/waitid_options.c

@@ -0,0 +1,26 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+
+static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
+						    struct syscall_arg *arg)
+{
+	int printed = 0, options = arg->val;
+
+#define	P_OPTION(n) \
+	if (options & W##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		options &= ~W##n; \
+	}
+
+	P_OPTION(NOHANG);
+	P_OPTION(UNTRACED);
+	P_OPTION(CONTINUED);
+#undef P_OPTION
+
+	if (options)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options);
+
+	return printed;
+}
+
+#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 2a83414..538bae8 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c

@@ -1607,9 +1607,8 @@
 
 			ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
 			dummy_hpp.buf[ret] = '\0';
-			rtrim(dummy_hpp.buf);
 
-			start = ltrim(dummy_hpp.buf);
+			start = trim(dummy_hpp.buf);
 			ret = strlen(start);
 
 			if (start != dummy_hpp.buf)
@@ -1897,11 +1896,10 @@
 	bool first = true;
 	int ret;
 
-	if (symbol_conf.use_callchain)
+	if (symbol_conf.use_callchain) {
 		folded_sign = hist_entry__folded(he);
-
-	if (symbol_conf.use_callchain)
 		printed += fprintf(fp, "%c ", folded_sign);
+	}
 
 	hists__for_each_format(browser->hists, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
@@ -2137,7 +2135,7 @@
 		printed += snprintf(bf + printed, size - printed,
 				    ", UID: %s", hists->uid_filter_str);
 	if (thread) {
-		if (sort__has_thread) {
+		if (hists__has(hists, thread)) {
 			printed += scnprintf(bf + printed, size - printed,
 				    ", Thread: %s(%d)",
 				     (thread->comm_set ? thread__comm_str(thread) : ""),
@@ -2322,7 +2320,8 @@
 {
 	struct thread *thread = act->thread;
 
-	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+	if ((!hists__has(browser->hists, thread) &&
+	     !hists__has(browser->hists, comm)) || thread == NULL)
 		return 0;
 
 	if (browser->hists->thread_filter) {
@@ -2331,7 +2330,7 @@
 		thread__zput(browser->hists->thread_filter);
 		ui_helpline__pop();
 	} else {
-		if (sort__has_thread) {
+		if (hists__has(browser->hists, thread)) {
 			ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
 					   thread->comm_set ? thread__comm_str(thread) : "",
 					   thread->tid);
@@ -2356,10 +2355,11 @@
 {
 	int ret;
 
-	if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+	if ((!hists__has(browser->hists, thread) &&
+	     !hists__has(browser->hists, comm)) || thread == NULL)
 		return 0;
 
-	if (sort__has_thread) {
+	if (hists__has(browser->hists, thread)) {
 		ret = asprintf(optstr, "Zoom %s %s(%d) thread",
 			       browser->hists->thread_filter ? "out of" : "into",
 			       thread->comm_set ? thread__comm_str(thread) : "",
@@ -2382,7 +2382,7 @@
 {
 	struct map *map = act->ms.map;
 
-	if (!sort__has_dso || map == NULL)
+	if (!hists__has(browser->hists, dso) || map == NULL)
 		return 0;
 
 	if (browser->hists->dso_filter) {
@@ -2409,7 +2409,7 @@
 add_dso_opt(struct hist_browser *browser, struct popup_action *act,
 	    char **optstr, struct map *map)
 {
-	if (!sort__has_dso || map == NULL)
+	if (!hists__has(browser->hists, dso) || map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s %s DSO",
@@ -2431,10 +2431,10 @@
 }
 
 static int
-add_map_opt(struct hist_browser *browser __maybe_unused,
+add_map_opt(struct hist_browser *browser,
 	    struct popup_action *act, char **optstr, struct map *map)
 {
-	if (!sort__has_dso || map == NULL)
+	if (!hists__has(browser->hists, dso) || map == NULL)
 		return 0;
 
 	if (asprintf(optstr, "Browse map details") < 0)
@@ -2536,7 +2536,7 @@
 static int
 do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
 {
-	if (!sort__has_socket || act->socket < 0)
+	if (!hists__has(browser->hists, socket) || act->socket < 0)
 		return 0;
 
 	if (browser->hists->socket_filter > -1) {
@@ -2558,7 +2558,7 @@
 add_socket_opt(struct hist_browser *browser, struct popup_action *act,
 	       char **optstr, int socket_id)
 {
-	if (!sort__has_socket || socket_id < 0)
+	if (!hists__has(browser->hists, socket) || socket_id < 0)
 		return 0;
 
 	if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -2749,7 +2749,7 @@
 			 */
 			goto out_free_stack;
 		case 'a':
-			if (!sort__has_sym) {
+			if (!hists__has(hists, sym)) {
 				ui_browser__warning(&browser->b, delay_secs * 2,
 			"Annotation is only available for symbolic views, "
 			"include \"sym*\" in --sort to use it.");
@@ -2912,7 +2912,7 @@
 			continue;
 		}
 
-		if (!sort__has_sym || browser->selection == NULL)
+		if (!hists__has(hists, sym) || browser->selection == NULL)
 			goto skip_annotation;
 
 		if (sort__mode == SORT_MODE__BRANCH) {
@@ -2956,7 +2956,7 @@
 			goto skip_scripting;
 
 		if (browser->he_selection) {
-			if (sort__has_thread && thread) {
+			if (hists__has(hists, thread) && thread) {
 				nr_options += add_script_opt(browser,
 							     &actions[nr_options],
 							     &options[nr_options],
@@ -2971,7 +2971,7 @@
 			 *
 			 * See hist_browser__show_entry.
 			 */
-			if (sort__has_sym && browser->selection->sym) {
+			if (hists__has(hists, sym) && browser->selection->sym) {
 				nr_options += add_script_opt(browser,
 							     &actions[nr_options],
 							     &options[nr_options],

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 2aa45b6..932adfa 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c

@@ -379,7 +379,7 @@
 			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
 
-		if (symbol_conf.use_callchain && sort__has_sym) {
+		if (symbol_conf.use_callchain && hists__has(hists, sym)) {
 			if (callchain_param.mode == CHAIN_GRAPH_REL)
 				total = symbol_conf.cumulate_callchain ?
 					h->stat_acc->period : h->stat.period;

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 3baeaa6..af07ffb 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c

@@ -635,7 +635,7 @@
 		ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
 	}
 
-	if (verbose && sort__has_sym) /* Addr + origin */
+	if (verbose && hists__has(hists, sym)) /* Addr + origin */
 		ret += 3 + BITS_PER_LONG / 4;
 
 	return ret;

diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 7aff5ac..560eb47 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c

@@ -569,9 +569,8 @@
 			first_col = false;
 
 			fmt->header(fmt, hpp, hists_to_evsel(hists));
-			rtrim(hpp->buf);
 
-			header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+			header_width += fprintf(fp, "%s", trim(hpp->buf));
 		}
 	}
 

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index da48fd8..8c6c8a0 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build

@@ -8,6 +8,7 @@
 libperf-y += event.o
 libperf-y += evlist.o
 libperf-y += evsel.o
+libperf-y += evsel_fprintf.o
 libperf-y += find_bit.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
@@ -26,9 +27,9 @@
 libperf-y += strfilter.o
 libperf-y += top.o
 libperf-y += usage.o
-libperf-y += wrapper.o
 libperf-y += dso.o
 libperf-y += symbol.o
+libperf-y += symbol_fprintf.o
 libperf-y += color.o
 libperf-y += header.o
 libperf-y += callchain.o
@@ -38,6 +39,7 @@
 libperf-y += map.o
 libperf-y += pstack.o
 libperf-y += session.o
+libperf-$(CONFIG_AUDIT) += syscalltbl.o
 libperf-y += ordered-events.o
 libperf-y += comm.o
 libperf-y += thread.o
@@ -69,9 +71,9 @@
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o
-libperf-$(CONFIG_X86) += tsc.o
-libperf-$(CONFIG_AUXTRACE) += tsc.o
+libperf-y += tsc.o
 libperf-y += cloexec.o
+libperf-y += call-path.o
 libperf-y += thread-stack.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b795b69..4db73d5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c

@@ -1138,7 +1138,7 @@
 
 	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
 	    !dso__is_kcore(dso)) {
-		char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
+		char bf[SBUILD_ID_SIZE + 15] = " with build id ";
 		char *build_id_msg = NULL;
 
 		if (dso->annotate_warned)
@@ -1665,5 +1665,5 @@
 
 bool ui__has_annotation(void)
 {
-	return use_browser == 1 && sort__has_sym;
+	return use_browser == 1 && perf_hpp_list.sym;
 }

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index ec164fe..c916901 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c

@@ -940,6 +940,7 @@
 	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
 	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
 	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+	synth_opts->initial_skip = 0;
 }
 
 /*
@@ -1064,6 +1065,12 @@
 				synth_opts->last_branch_sz = val;
 			}
 			break;
+		case 's':
+			synth_opts->initial_skip = strtoul(p, &endptr, 10);
+			if (p == endptr)
+				goto out_err;
+			p = endptr;
+			break;
 		case ' ':
 		case ',':
 			break;

diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 57ff31e..767989e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h

@@ -68,6 +68,7 @@
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
  * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
  */
 struct itrace_synth_opts {
 	bool			set;
@@ -86,6 +87,7 @@
 	unsigned int		last_branch_sz;
 	unsigned long long	period;
 	enum itrace_period_type	period_type;
+	unsigned long		initial_skip;
 };
 
 /**

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 0967ce6..493307d 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c

@@ -842,6 +842,58 @@
 	return op;
 }
 
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+	struct bpf_map_op *newop;
+
+	newop = memdup(op, sizeof(*op));
+	if (!newop) {
+		pr_debug("Failed to alloc bpf_map_op\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&newop->list);
+	if (op->key_type == BPF_MAP_KEY_RANGES) {
+		size_t memsz = op->k.array.nr_ranges *
+			       sizeof(op->k.array.ranges[0]);
+
+		newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+		if (!newop->k.array.ranges) {
+			pr_debug("Failed to alloc indices for map\n");
+			free(newop);
+			return NULL;
+		}
+	}
+
+	return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+	struct bpf_map_priv *newpriv;
+	struct bpf_map_op *pos, *newop;
+
+	newpriv = zalloc(sizeof(*newpriv));
+	if (!newpriv) {
+		pr_debug("No enough memory to alloc map private\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&newpriv->ops_list);
+
+	list_for_each_entry(pos, &priv->ops_list, list) {
+		newop = bpf_map_op__clone(pos);
+		if (!newop) {
+			bpf_map_priv__purge(newpriv);
+			return NULL;
+		}
+		list_add_tail(&newop->list, &newpriv->ops_list);
+	}
+
+	return newpriv;
+}
+
 static int
 bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
 {
@@ -1417,6 +1469,89 @@
 	return 0;
 }
 
+#define bpf__for_each_map(pos, obj, objtmp)	\
+	bpf_object__for_each_safe(obj, objtmp)	\
+		bpf_map__for_each(pos, obj)
+
+#define bpf__for_each_stdout_map(pos, obj, objtmp)	\
+	bpf__for_each_map(pos, obj, objtmp) 		\
+		if (bpf_map__get_name(pos) && 		\
+			(strcmp("__bpf_stdout__", 	\
+				bpf_map__get_name(pos)) == 0))
+
+int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+	struct bpf_map_priv *tmpl_priv = NULL;
+	struct bpf_object *obj, *tmp;
+	struct perf_evsel *evsel = NULL;
+	struct bpf_map *map;
+	int err;
+	bool need_init = false;
+
+	bpf__for_each_stdout_map(map, obj, tmp) {
+		struct bpf_map_priv *priv;
+
+		err = bpf_map__get_private(map, (void **)&priv);
+		if (err)
+			return -BPF_LOADER_ERRNO__INTERNAL;
+
+		/*
+		 * No need to check map type: type should have been
+		 * verified by kernel.
+		 */
+		if (!need_init && !priv)
+			need_init = !priv;
+		if (!tmpl_priv && priv)
+			tmpl_priv = priv;
+	}
+
+	if (!need_init)
+		return 0;
+
+	if (!tmpl_priv) {
+		err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/",
+				   NULL);
+		if (err) {
+			pr_debug("ERROR: failed to create bpf-output event\n");
+			return -err;
+		}
+
+		evsel = perf_evlist__last(evlist);
+	}
+
+	bpf__for_each_stdout_map(map, obj, tmp) {
+		struct bpf_map_priv *priv;
+
+		err = bpf_map__get_private(map, (void **)&priv);
+		if (err)
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		if (priv)
+			continue;
+
+		if (tmpl_priv) {
+			priv = bpf_map_priv__clone(tmpl_priv);
+			if (!priv)
+				return -ENOMEM;
+
+			err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+			if (err) {
+				bpf_map_priv__clear(map, priv);
+				return err;
+			}
+		} else if (evsel) {
+			struct bpf_map_op *op;
+
+			op = bpf_map__add_newop(map, NULL);
+			if (IS_ERR(op))
+				return PTR_ERR(op);
+			op->op_type = BPF_MAP_OP_SET_EVSEL;
+			op->v.evsel = evsel;
+		}
+	}
+
+	return 0;
+}
+
 #define ERRNO_OFFSET(e)		((e) - __BPF_LOADER_ERRNO__START)
 #define ERRCODE_OFFSET(c)	ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
 #define NR_ERRNO	(__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -1590,3 +1725,11 @@
 	bpf__strerror_end(buf, size);
 	return 0;
 }
+
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+			       int err, char *buf, size_t size)
+{
+	bpf__strerror_head(err, buf, size);
+	bpf__strerror_end(buf, size);
+	return 0;
+}

diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index be43119..941e172 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h

@@ -79,6 +79,11 @@
 			     size_t size);
 int bpf__apply_obj_config(void);
 int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct perf_evlist *evlist);
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err,
+			       char *buf, size_t size);
+
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused,
@@ -125,6 +130,12 @@
 }
 
 static inline int
+bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
 __bpf_strerror(char *buf, size_t size)
 {
 	if (!size)
@@ -177,5 +188,13 @@
 {
 	return __bpf_strerror(buf, size);
 }
+
+static inline int
+bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+			   int err __maybe_unused, char *buf,
+			   size_t size)
+{
+	return __bpf_strerror(buf, size);
+}
 #endif
 #endif

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 0573c2e..bff425e 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c

@@ -261,14 +261,14 @@
 
 		if (dso__is_vdso(pos)) {
 			name = pos->short_name;
-			name_len = pos->short_name_len + 1;
+			name_len = pos->short_name_len;
 		} else if (dso__is_kcore(pos)) {
 			machine__mmap_name(machine, nm, sizeof(nm));
 			name = nm;
-			name_len = strlen(nm) + 1;
+			name_len = strlen(nm);
 		} else {
 			name = pos->long_name;
-			name_len = pos->long_name_len + 1;
+			name_len = pos->long_name_len;
 		}
 
 		in_kernel = pos->kernel ||
@@ -365,39 +365,17 @@
 int build_id_cache__list_build_ids(const char *pathname,
 				   struct strlist **result)
 {
-	struct strlist *list;
 	char *dir_name;
-	DIR *dir;
-	struct dirent *d;
 	int ret = 0;
 
-	list = strlist__new(NULL, NULL);
 	dir_name = build_id_cache__dirname_from_path(pathname, false, false);
-	if (!list || !dir_name) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!dir_name)
+		return -ENOMEM;
 
-	/* List up all dirents */
-	dir = opendir(dir_name);
-	if (!dir) {
+	*result = lsdir(dir_name, lsdir_no_dot_filter);
+	if (!*result)
 		ret = -errno;
-		goto out;
-	}
-
-	while ((d = readdir(dir)) != NULL) {
-		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
-			continue;
-		strlist__add(list, d->d_name);
-	}
-	closedir(dir);
-
-out:
 	free(dir_name);
-	if (ret)
-		strlist__delete(list);
-	else
-		*result = list;
 
 	return ret;
 }

diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 1f5a93c..0d814bb 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h

@@ -40,25 +40,6 @@
 
 #define alloc_nr(x) (((x)+16)*3/2)
 
-/*
- * Realloc the buffer pointed at by variable 'x' so that it can hold
- * at least 'nr' entries; the number of entries currently allocated
- * is 'alloc', using the standard growing factor alloc_nr() macro.
- *
- * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
-	do { \
-		if ((nr) > alloc) { \
-			if (alloc_nr(alloc) < (nr)) \
-				alloc = (nr); \
-			else \
-				alloc = alloc_nr(alloc); \
-			x = xrealloc((x), alloc * sizeof(*(x))); \
-		} \
-	} while(0)
-
-
 static inline int is_absolute_path(const char *path)
 {
 	return path[0] == '/';

diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c
new file mode 100644
index 0000000..904a170
--- /dev/null
+++ b/tools/perf/util/call-path.c

@@ -0,0 +1,122 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+#include "util.h"
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+			    struct symbol *sym, u64 ip, bool in_kernel)
+{
+	cp->parent = parent;
+	cp->sym = sym;
+	cp->ip = sym ? 0 : ip;
+	cp->db_id = 0;
+	cp->in_kernel = in_kernel;
+	RB_CLEAR_NODE(&cp->rb_node);
+	cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+	struct call_path_root *cpr;
+
+	cpr = zalloc(sizeof(struct call_path_root));
+	if (!cpr)
+		return NULL;
+	call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+	INIT_LIST_HEAD(&cpr->blocks);
+	return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+	struct call_path_block *pos, *n;
+
+	list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+		list_del(&pos->node);
+		free(pos);
+	}
+	free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+					struct call_path *parent,
+					struct symbol *sym, u64 ip,
+					bool in_kernel)
+{
+	struct call_path_block *cpb;
+	struct call_path *cp;
+	size_t n;
+
+	if (cpr->next < cpr->sz) {
+		cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+				      node);
+	} else {
+		cpb = zalloc(sizeof(struct call_path_block));
+		if (!cpb)
+			return NULL;
+		list_add_tail(&cpb->node, &cpr->blocks);
+		cpr->sz += CALL_PATH_BLOCK_SIZE;
+	}
+
+	n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+	cp = &cpb->cp[n];
+
+	call_path__init(cp, parent, sym, ip, in_kernel);
+
+	return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+				     struct call_path *parent,
+				     struct symbol *sym, u64 ip, u64 ks)
+{
+	struct rb_node **p;
+	struct rb_node *node_parent = NULL;
+	struct call_path *cp;
+	bool in_kernel = ip >= ks;
+
+	if (sym)
+		ip = 0;
+
+	if (!parent)
+		return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+	p = &parent->children.rb_node;
+	while (*p != NULL) {
+		node_parent = *p;
+		cp = rb_entry(node_parent, struct call_path, rb_node);
+
+		if (cp->sym == sym && cp->ip == ip)
+			return cp;
+
+		if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+	if (!cp)
+		return NULL;
+
+	rb_link_node(&cp->rb_node, node_parent, p);
+	rb_insert_color(&cp->rb_node, &parent->children);
+
+	return cp;
+}

diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h
new file mode 100644
index 0000000..477f6d0
--- /dev/null
+++ b/tools/perf/util/call-path.h

@@ -0,0 +1,77 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitve call-graph.
+ */
+struct call_path {
+	struct call_path *parent;
+	struct symbol *sym;
+	u64 ip;
+	u64 db_id;
+	bool in_kernel;
+	struct rb_node rb_node;
+	struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+	struct call_path cp[CALL_PATH_BLOCK_SIZE];
+	struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+	struct call_path call_path;
+	struct list_head blocks;
+	size_t next;
+	size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+				     struct call_path *parent,
+				     struct symbol *sym, u64 ip, u64 ks);
+
+#endif

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 24b4bd0..07fd30b 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c

@@ -109,6 +109,7 @@
 	bool record_opt_set = false;
 	bool try_stack_size = false;
 
+	callchain_param.enabled = true;
 	symbol_conf.use_callchain = true;
 
 	if (!arg)
@@ -117,6 +118,7 @@
 	while ((tok = strtok((char *)arg, ",")) != NULL) {
 		if (!strncmp(tok, "none", strlen(tok))) {
 			callchain_param.mode = CHAIN_NONE;
+			callchain_param.enabled = false;
 			symbol_conf.use_callchain = false;
 			return 0;
 		}
@@ -788,7 +790,8 @@
 	return 0;
 }
 
-int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
+int sample__resolve_callchain(struct perf_sample *sample,
+			      struct callchain_cursor *cursor, struct symbol **parent,
 			      struct perf_evsel *evsel, struct addr_location *al,
 			      int max_stack)
 {
@@ -796,8 +799,8 @@
 		return 0;
 
 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
-	    sort__has_parent) {
-		return thread__resolve_callchain(al->thread, evsel, sample,
+	    perf_hpp_list.parent) {
+		return thread__resolve_callchain(al->thread, cursor, evsel, sample,
 						 parent, al, max_stack);
 	}
 	return 0;

diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d2a9e69..65e2a4f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h

@@ -212,7 +212,14 @@
 int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
 int record_callchain_opt(const struct option *opt, const char *arg, int unset);
 
-int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+			      struct callchain_cursor *cursor, struct symbol **parent,
 			      struct perf_evsel *evsel, struct addr_location *al,
 			      int max_stack);
 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);

diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 4e72763..dad7d82 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c

@@ -13,6 +13,7 @@
 #include <subcmd/exec-cmd.h>
 #include "util/hist.h"  /* perf_hist_config */
 #include "util/llvm-utils.h"   /* perf_llvm_config */
+#include "config.h"
 
 #define MAXNAME (256)
 
@@ -377,6 +378,21 @@
 	return value;
 }
 
+static int perf_buildid_config(const char *var, const char *value)
+{
+	/* same dir for all commands */
+	if (!strcmp(var, "buildid.dir")) {
+		const char *dir = perf_config_dirname(var, value);
+
+		if (!dir)
+			return -1;
+		strncpy(buildid_dir, dir, MAXPATHLEN-1);
+		buildid_dir[MAXPATHLEN-1] = '\0';
+	}
+
+	return 0;
+}
+
 static int perf_default_core_config(const char *var __maybe_unused,
 				    const char *value __maybe_unused)
 {
@@ -412,6 +428,9 @@
 	if (!prefixcmp(var, "llvm."))
 		return perf_llvm_config(var, value);
 
+	if (!prefixcmp(var, "buildid."))
+		return perf_buildid_config(var, value);
+
 	/* Add other config variables here. */
 	return 0;
 }
@@ -506,6 +525,178 @@
 	return ret;
 }
 
+static struct perf_config_section *find_section(struct list_head *sections,
+						const char *section_name)
+{
+	struct perf_config_section *section;
+
+	list_for_each_entry(section, sections, node)
+		if (!strcmp(section->name, section_name))
+			return section;
+
+	return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+						 struct perf_config_section *section)
+{
+	struct perf_config_item *item;
+
+	list_for_each_entry(item, &section->items, node)
+		if (!strcmp(item->name, name))
+			return item;
+
+	return NULL;
+}
+
+static struct perf_config_section *add_section(struct list_head *sections,
+					       const char *section_name)
+{
+	struct perf_config_section *section = zalloc(sizeof(*section));
+
+	if (!section)
+		return NULL;
+
+	INIT_LIST_HEAD(&section->items);
+	section->name = strdup(section_name);
+	if (!section->name) {
+		pr_debug("%s: strdup failed\n", __func__);
+		free(section);
+		return NULL;
+	}
+
+	list_add_tail(&section->node, sections);
+	return section;
+}
+
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+						const char *name)
+{
+	struct perf_config_item *item = zalloc(sizeof(*item));
+
+	if (!item)
+		return NULL;
+
+	item->name = strdup(name);
+	if (!item->name) {
+		pr_debug("%s: strdup failed\n", __func__);
+		free(item);
+		return NULL;
+	}
+
+	list_add_tail(&item->node, &section->items);
+	return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+	char *val = strdup(value);
+
+	if (!val)
+		return -1;
+
+	zfree(&item->value);
+	item->value = val;
+	return 0;
+}
+
+static int collect_config(const char *var, const char *value,
+			  void *perf_config_set)
+{
+	int ret = -1;
+	char *ptr, *key;
+	char *section_name, *name;
+	struct perf_config_section *section = NULL;
+	struct perf_config_item *item = NULL;
+	struct perf_config_set *set = perf_config_set;
+	struct list_head *sections = &set->sections;
+
+	key = ptr = strdup(var);
+	if (!key) {
+		pr_debug("%s: strdup failed\n", __func__);
+		return -1;
+	}
+
+	section_name = strsep(&ptr, ".");
+	name = ptr;
+	if (name == NULL || value == NULL)
+		goto out_free;
+
+	section = find_section(sections, section_name);
+	if (!section) {
+		section = add_section(sections, section_name);
+		if (!section)
+			goto out_free;
+	}
+
+	item = find_config_item(name, section);
+	if (!item) {
+		item = add_config_item(section, name);
+		if (!item)
+			goto out_free;
+	}
+
+	ret = set_value(item, value);
+	return ret;
+
+out_free:
+	free(key);
+	perf_config_set__delete(set);
+	return -1;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+	struct perf_config_set *set = zalloc(sizeof(*set));
+
+	if (set) {
+		INIT_LIST_HEAD(&set->sections);
+		perf_config(collect_config, set);
+	}
+
+	return set;
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+	zfree(&item->name);
+	zfree(&item->value);
+	free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+	struct perf_config_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, &section->items, node) {
+		list_del_init(&item->node);
+		perf_config_item__delete(item);
+	}
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+	perf_config_section__purge(section);
+	zfree(&section->name);
+	free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+	struct perf_config_section *section, *tmp;
+
+	list_for_each_entry_safe(section, tmp, &set->sections, node) {
+		list_del_init(&section->node);
+		perf_config_section__delete(section);
+	}
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+	perf_config_set__purge(set);
+	free(set);
+}
+
 /*
  * Call this to report error for your variable that should not
  * get a boolean value (i.e. "[my] var" means "true").
@@ -515,49 +706,18 @@
 	return error("Missing value for '%s'", var);
 }
 
-struct buildid_dir_config {
-	char *dir;
-};
-
-static int buildid_dir_command_config(const char *var, const char *value,
-				      void *data)
-{
-	struct buildid_dir_config *c = data;
-	const char *v;
-
-	/* same dir for all commands */
-	if (!strcmp(var, "buildid.dir")) {
-		v = perf_config_dirname(var, value);
-		if (!v)
-			return -1;
-		strncpy(c->dir, v, MAXPATHLEN-1);
-		c->dir[MAXPATHLEN-1] = '\0';
-	}
-	return 0;
-}
-
-static void check_buildid_dir_config(void)
-{
-	struct buildid_dir_config c;
-	c.dir = buildid_dir;
-	perf_config(buildid_dir_command_config, &c);
-}
-
 void set_buildid_dir(const char *dir)
 {
 	if (dir)
 		scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
 
-	/* try config file */
-	if (buildid_dir[0] == '\0')
-		check_buildid_dir_config();
-
 	/* default to $HOME/.debug */
 	if (buildid_dir[0] == '\0') {
-		char *v = getenv("HOME");
-		if (v) {
+		char *home = getenv("HOME");
+
+		if (home) {
 			snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
-				 v, DEBUG_CACHE_DIR);
+				 home, DEBUG_CACHE_DIR);
 		} else {
 			strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
 		}

diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
new file mode 100644
index 0000000..22ec626
--- /dev/null
+++ b/tools/perf/util/config.h

@@ -0,0 +1,26 @@
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+	char *name;
+	char *value;
+	struct list_head node;
+};
+
+struct perf_config_section {
+	char *name;
+	struct list_head items;
+	struct list_head node;
+};
+
+struct perf_config_set {
+	struct list_head sections;
+};
+
+struct perf_config_set *perf_config_set__new(void);
+void perf_config_set__delete(struct perf_config_set *set);
+
+#endif /* __PERF_CONFIG_H */

diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 9bcf2be..02d8016 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c

@@ -587,3 +587,15 @@
 	closedir(dir1);
 	return 0;
 }
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu)
+{
+	int i;
+
+	for (i = 0; i < cpus->nr; ++i) {
+		if (cpus->map[i] == cpu)
+			return true;
+	}
+
+	return false;
+}

diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 81a2562..1a0a350 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h

@@ -66,4 +66,6 @@
 int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
 		       int (*f)(struct cpu_map *map, int cpu, void *data),
 		       void *data);
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu);
 #endif /* __PERF_CPUMAP_H */

diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 1921942..be835161 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c

@@ -136,3 +136,44 @@
 {
 	return writen(file->fd, buf, size);
 }
+
+int perf_data_file__switch(struct perf_data_file *file,
+			   const char *postfix,
+			   size_t pos, bool at_exit)
+{
+	char *new_filepath;
+	int ret;
+
+	if (check_pipe(file))
+		return -EINVAL;
+	if (perf_data_file__is_read(file))
+		return -EINVAL;
+
+	if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0)
+		return -ENOMEM;
+
+	/*
+	 * Only fire a warning, don't return error, continue fill
+	 * original file.
+	 */
+	if (rename(file->path, new_filepath))
+		pr_warning("Failed to rename %s to %s\n", file->path, new_filepath);
+
+	if (!at_exit) {
+		close(file->fd);
+		ret = perf_data_file__open(file);
+		if (ret < 0)
+			goto out;
+
+		if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) {
+			ret = -errno;
+			pr_debug("Failed to lseek to %zu: %s",
+				 pos, strerror(errno));
+			goto out;
+		}
+	}
+	ret = file->fd;
+out:
+	free(new_filepath);
+	return ret;
+}

diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 2b15d0c..ae510ce 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h

@@ -46,5 +46,14 @@
 void perf_data_file__close(struct perf_data_file *file);
 ssize_t perf_data_file__write(struct perf_data_file *file,
 			      void *buf, size_t size);
-
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original file.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data_file__switch(struct perf_data_file *file,
+			   const char *postfix,
+			   size_t pos, bool at_exit);
 #endif /* __PERF_DATA_H */

diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 049438d..8d96c80 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c

@@ -23,6 +23,8 @@
 #include "event.h"
 #include "util.h"
 #include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
 #include "db-export.h"
 
 struct deferred_export {
@@ -258,8 +260,7 @@
 		if (!al->sym) {
 			al->sym = symbol__new(al->addr, 0, 0, "unknown");
 			if (al->sym)
-				symbols__insert(&dso->symbols[al->map->type],
-						al->sym);
+				dso__insert_symbol(dso, al->map->type, al->sym);
 		}
 
 		if (al->sym) {
@@ -276,6 +277,80 @@
 	return 0;
 }
 
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+					       struct machine *machine,
+					       struct thread *thread,
+					       struct perf_sample *sample,
+					       struct perf_evsel *evsel)
+{
+	u64 kernel_start = machine__kernel_start(machine);
+	struct call_path *current = &dbe->cpr->call_path;
+	enum chain_order saved_order = callchain_param.order;
+	int err;
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		return NULL;
+
+	/*
+	 * Since the call path tree must be built starting with the root, we
+	 * must use ORDER_CALL for call chain resolution, in order to process
+	 * the callchain starting with the root node and ending with the leaf.
+	 */
+	callchain_param.order = ORDER_CALLER;
+	err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+					sample, NULL, NULL,
+					sysctl_perf_event_max_stack);
+	if (err) {
+		callchain_param.order = saved_order;
+		return NULL;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+	while (1) {
+		struct callchain_cursor_node *node;
+		struct addr_location al;
+		u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+		memset(&al, 0, sizeof(al));
+
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+		/*
+		 * Handle export of symbol and dso for this node by
+		 * constructing an addr_location struct and then passing it to
+		 * db_ids_from_al() to perform the export.
+		 */
+		al.sym = node->sym;
+		al.map = node->map;
+		al.machine = machine;
+		al.addr = node->ip;
+
+		if (al.map && !al.sym)
+			al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION,
+						  al.addr);
+
+		db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+		/* add node to the call path tree if it doesn't exist */
+		current = call_path__findnew(dbe->cpr, current,
+					     al.sym, node->ip,
+					     kernel_start);
+
+		callchain_cursor_advance(&callchain_cursor);
+	}
+
+	/* Reset the callchain order to its prior value. */
+	callchain_param.order = saved_order;
+
+	if (current == &dbe->cpr->call_path) {
+		/* Bail because the callchain was empty. */
+		return NULL;
+	}
+
+	return current;
+}
+
 int db_export__branch_type(struct db_export *dbe, u32 branch_type,
 			   const char *name)
 {
@@ -329,6 +404,16 @@
 	if (err)
 		goto out_put;
 
+	if (dbe->cpr) {
+		struct call_path *cp = call_path_from_sample(dbe, al->machine,
+							     thread, sample,
+							     evsel);
+		if (cp) {
+			db_export__call_path(dbe, cp);
+			es.call_path_id = cp->db_id;
+		}
+	}
+
 	if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
 	    sample_addr_correlates_sym(&evsel->attr)) {
 		struct addr_location addr_al;

diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index 25e22fd..67bc6b8 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h

@@ -27,6 +27,7 @@
 struct perf_sample;
 struct addr_location;
 struct call_return_processor;
+struct call_path_root;
 struct call_path;
 struct call_return;
 
@@ -43,6 +44,7 @@
 	u64			addr_dso_db_id;
 	u64			addr_sym_db_id;
 	u64			addr_offset; /* addr offset from symbol start */
+	u64			call_path_id;
 };
 
 struct db_export {
@@ -64,6 +66,7 @@
 	int (*export_call_return)(struct db_export *dbe,
 				  struct call_return *cr);
 	struct call_return_processor *crp;
+	struct call_path_root *cpr;
 	u64 evsel_last_db_id;
 	u64 machine_last_db_id;
 	u64 thread_last_db_id;

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 8e639543..3357479 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c

@@ -38,7 +38,7 @@
 				   enum dso_binary_type type,
 				   char *root_dir, char *filename, size_t size)
 {
-	char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+	char build_id_hex[SBUILD_ID_SIZE];
 	int ret = 0;
 	size_t len;
 
@@ -1301,7 +1301,7 @@
 
 size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
 {
-	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+	char sbuild_id[SBUILD_ID_SIZE];
 
 	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
 	return fprintf(fp, "%s", sbuild_id);

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index aea189b..a347b19 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c

@@ -915,8 +915,7 @@
 		tmp = "*";
 	else if (tag == DW_TAG_subroutine_type) {
 		/* Function pointer */
-		strbuf_add(buf, "(function_type)", 15);
-		return 0;
+		return strbuf_add(buf, "(function_type)", 15);
 	} else {
 		if (!dwarf_diename(&type))
 			return -ENOENT;
@@ -927,14 +926,10 @@
 		else if (tag == DW_TAG_enumeration_type)
 			tmp = "enum ";
 		/* Write a base name */
-		strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
-		return 0;
+		return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
 	}
 	ret = die_get_typename(&type, buf);
-	if (ret == 0)
-		strbuf_addstr(buf, tmp);
-
-	return ret;
+	return ret ? ret : strbuf_addstr(buf, tmp);
 }
 
 /**
@@ -951,12 +946,10 @@
 	ret = die_get_typename(vr_die, buf);
 	if (ret < 0) {
 		pr_debug("Failed to get type, make it unknown.\n");
-		strbuf_add(buf, " (unknown_type)", 14);
+		ret = strbuf_add(buf, " (unknown_type)", 14);
 	}
 
-	strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
-
-	return 0;
+	return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
 }
 
 #ifdef HAVE_DWARF_GETLOCATIONS
@@ -999,22 +992,24 @@
 	}
 
 	while ((offset = dwarf_ranges(&scopes[1], offset, &base,
-				&start, &end)) > 0) {
+					&start, &end)) > 0) {
 		start -= entry;
 		end -= entry;
 
 		if (first) {
-			strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
-				name, start, end);
+			ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+					  name, start, end);
 			first = false;
 		} else {
-			strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
-				start, end);
+			ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+					  start, end);
 		}
+		if (ret < 0)
+			goto out;
 	}
 
 	if (!first)
-		strbuf_add(buf, "]>", 2);
+		ret = strbuf_add(buf, "]>", 2);
 
 out:
 	free(scopes);
@@ -1054,31 +1049,32 @@
 	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
 		return -EINVAL;
 
-	while ((offset = dwarf_getlocations(
-				&attr, offset, &base,
-				&start, &end, &op, &nops)) > 0) {
+	while ((offset = dwarf_getlocations(&attr, offset, &base,
+					&start, &end, &op, &nops)) > 0) {
 		if (start == 0) {
 			/* Single Location Descriptions */
 			ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
-			return ret;
+			goto out;
 		}
 
 		/* Location Lists */
 		start -= entry;
 		end -= entry;
 		if (first) {
-			strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
-				name, start, end);
+			ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+					  name, start, end);
 			first = false;
 		} else {
-			strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
-				start, end);
+			ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+					  start, end);
 		}
+		if (ret < 0)
+			goto out;
 	}
 
 	if (!first)
-		strbuf_add(buf, "]>", 2);
-
+		ret = strbuf_add(buf, "]>", 2);
+out:
 	return ret;
 }
 #else

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index edcf4ed..f6fcc68 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c

@@ -45,6 +45,7 @@
 	[PERF_RECORD_STAT]			= "STAT",
 	[PERF_RECORD_STAT_ROUND]		= "STAT_ROUND",
 	[PERF_RECORD_EVENT_UPDATE]		= "EVENT_UPDATE",
+	[PERF_RECORD_TIME_CONV]			= "TIME_CONV",
 };
 
 const char *perf_event__name(unsigned int id)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6bb1c92..8d363d5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h

@@ -233,6 +233,7 @@
 	PERF_RECORD_STAT			= 76,
 	PERF_RECORD_STAT_ROUND			= 77,
 	PERF_RECORD_EVENT_UPDATE		= 78,
+	PERF_RECORD_TIME_CONV			= 79,
 	PERF_RECORD_HEADER_MAX
 };
 
@@ -469,6 +470,13 @@
 	u64				time;
 };
 
+struct time_conv_event {
+	struct perf_event_header header;
+	u64 time_shift;
+	u64 time_mult;
+	u64 time_zero;
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct mmap_event		mmap;
@@ -497,6 +505,7 @@
 	struct stat_config_event	stat_config;
 	struct stat_event		stat;
 	struct stat_round_event		stat_round;
+	struct time_conv_event		time_conv;
 };
 
 void perf_event__print_totals(void);

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 86a0383..c4bfe11 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c

@@ -679,53 +679,52 @@
 	return NULL;
 }
 
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *
+perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
+		u64 end, u64 *prev)
 {
-	struct perf_mmap *md = &evlist->mmap[idx];
-	u64 head;
-	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	union perf_event *event = NULL;
+	int diff = end - start;
 
-	/*
-	 * Check if event was unmapped due to a POLLHUP/POLLERR.
-	 */
-	if (!atomic_read(&md->refcnt))
-		return NULL;
-
-	head = perf_mmap__read_head(md);
-	if (evlist->overwrite) {
+	if (check_messup) {
 		/*
 		 * If we're further behind than half the buffer, there's a chance
 		 * the writer will bite our tail and mess up the samples under us.
 		 *
-		 * If we somehow ended up ahead of the head, we got messed up.
+		 * If we somehow ended up ahead of the 'end', we got messed up.
 		 *
-		 * In either case, truncate and restart at head.
+		 * In either case, truncate and restart at 'end'.
 		 */
-		int diff = head - old;
 		if (diff > md->mask / 2 || diff < 0) {
 			fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
 
 			/*
-			 * head points to a known good entry, start there.
+			 * 'end' points to a known good entry, start there.
 			 */
-			old = head;
+			start = end;
+			diff = 0;
 		}
 	}
 
-	if (old != head) {
+	if (diff >= (int)sizeof(event->header)) {
 		size_t size;
 
-		event = (union perf_event *)&data[old & md->mask];
+		event = (union perf_event *)&data[start & md->mask];
 		size = event->header.size;
 
+		if (size < sizeof(event->header) || diff < (int)size) {
+			event = NULL;
+			goto broken_event;
+		}
+
 		/*
 		 * Event straddles the mmap boundary -- header should always
 		 * be inside due to u64 alignment of output.
 		 */
-		if ((old & md->mask) + size != ((old + size) & md->mask)) {
-			unsigned int offset = old;
+		if ((start & md->mask) + size != ((start + size) & md->mask)) {
+			unsigned int offset = start;
 			unsigned int len = min(sizeof(*event), size), cpy;
 			void *dst = md->event_copy;
 
@@ -740,14 +739,83 @@
 			event = (union perf_event *) md->event_copy;
 		}
 
-		old += size;
+		start += size;
 	}
 
-	md->prev = old;
+broken_event:
+	if (prev)
+		*prev = start;
 
 	return event;
 }
 
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+	struct perf_mmap *md = &evlist->mmap[idx];
+	u64 head;
+	u64 old = md->prev;
+
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!atomic_read(&md->refcnt))
+		return NULL;
+
+	head = perf_mmap__read_head(md);
+
+	return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
+}
+
+union perf_event *
+perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
+{
+	struct perf_mmap *md = &evlist->mmap[idx];
+	u64 head, end;
+	u64 start = md->prev;
+
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!atomic_read(&md->refcnt))
+		return NULL;
+
+	head = perf_mmap__read_head(md);
+	if (!head)
+		return NULL;
+
+	/*
+	 * 'head' pointer starts from 0. Kernel minus sizeof(record) form
+	 * it each time when kernel writes to it, so in fact 'head' is
+	 * negative. 'end' pointer is made manually by adding the size of
+	 * the ring buffer to 'head' pointer, means the validate data can
+	 * read is the whole ring buffer. If 'end' is positive, the ring
+	 * buffer has not fully filled, so we must adjust 'end' to 0.
+	 *
+	 * However, since both 'head' and 'end' is unsigned, we can't
+	 * simply compare 'end' against 0. Here we compare '-head' and
+	 * the size of the ring buffer, where -head is the number of bytes
+	 * kernel write to the ring buffer.
+	 */
+	if (-head < (u64)(md->mask + 1))
+		end = 0;
+	else
+		end = head + md->mask + 1;
+
+	return perf_mmap__read(md, false, start, end, &md->prev);
+}
+
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+{
+	struct perf_mmap *md = &evlist->mmap[idx];
+	u64 head;
+
+	if (!atomic_read(&md->refcnt))
+		return;
+
+	head = perf_mmap__read_head(md);
+	md->prev = head;
+}
+
 static bool perf_mmap__empty(struct perf_mmap *md)
 {
 	return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
@@ -986,26 +1054,34 @@
 	return -1;
 }
 
+unsigned long perf_event_mlock_kb_in_pages(void)
+{
+	unsigned long pages;
+	int max;
+
+	if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+		/*
+		 * Pick a once upon a time good value, i.e. things look
+		 * strange since we can't read a sysctl value, but lets not
+		 * die yet...
+		 */
+		max = 512;
+	} else {
+		max -= (page_size / 1024);
+	}
+
+	pages = (max * 1024) / page_size;
+	if (!is_power_of_2(pages))
+		pages = rounddown_pow_of_two(pages);
+
+	return pages;
+}
+
 static size_t perf_evlist__mmap_size(unsigned long pages)
 {
-	if (pages == UINT_MAX) {
-		int max;
-
-		if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
-			/*
-			 * Pick a once upon a time good value, i.e. things look
-			 * strange since we can't read a sysctl value, but lets not
-			 * die yet...
-			 */
-			max = 512;
-		} else {
-			max -= (page_size / 1024);
-		}
-
-		pages = (max * 1024) / page_size;
-		if (!is_power_of_2(pages))
-			pages = rounddown_pow_of_two(pages);
-	} else if (!is_power_of_2(pages))
+	if (pages == UINT_MAX)
+		pages = perf_event_mlock_kb_in_pages();
+	else if (!is_power_of_2(pages))
 		return 0;
 
 	return (pages + 1) * page_size;
@@ -1192,6 +1268,24 @@
 	perf_evlist__propagate_maps(evlist);
 }
 
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+				   enum perf_event_sample_format bit)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel)
+		__perf_evsel__set_sample_bit(evsel, bit);
+}
+
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+				     enum perf_event_sample_format bit)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel)
+		__perf_evsel__reset_sample_bit(evsel, bit);
+}
+
 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
 {
 	struct perf_evsel *evsel;

diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a0d1522..85d1b598 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h

@@ -87,6 +87,17 @@
 int perf_evlist__add_newtp(struct perf_evlist *evlist,
 			   const char *sys, const char *name, void *handler);
 
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+				   enum perf_event_sample_format bit);
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+				     enum perf_event_sample_format bit);
+
+#define perf_evlist__set_sample_bit(evlist, bit) \
+	__perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+#define perf_evlist__reset_sample_bit(evlist, bit) \
+	__perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
+
 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
@@ -118,16 +129,23 @@
 
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
 
+union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
+						  int idx);
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
+
 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
 
 int perf_evlist__open(struct perf_evlist *evlist);
 void perf_evlist__close(struct perf_evlist *evlist);
 
+struct callchain_param;
+
 void perf_evlist__set_id_pos(struct perf_evlist *evlist);
 bool perf_can_sample_identifier(void);
 bool perf_can_record_switch_events(void);
 bool perf_can_record_cpu_wide(void);
-void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+			 struct callchain_param *callchain);
 int record_opts__config(struct record_opts *opts);
 
 int perf_evlist__prepare_workload(struct perf_evlist *evlist,
@@ -144,6 +162,8 @@
 				  const char *str,
 				  int unset);
 
+unsigned long perf_event_mlock_kb_in_pages(void);
+
 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
 			 bool overwrite, unsigned int auxtrace_pages,
 			 bool auxtrace_overwrite);

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 645dc18..964c7c3 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c

@@ -226,7 +226,8 @@
 		perf_evsel__init(evsel, attr, idx);
 
 	if (perf_evsel__is_bpf_output(evsel)) {
-		evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+		evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+					    PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
 		evsel->attr.sample_period = 1;
 	}
 
@@ -561,10 +562,9 @@
 	return ret;
 }
 
-static void
-perf_evsel__config_callgraph(struct perf_evsel *evsel,
-			     struct record_opts *opts,
-			     struct callchain_param *param)
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+				  struct record_opts *opts,
+				  struct callchain_param *param)
 {
 	bool function = perf_evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->attr;
@@ -704,7 +704,7 @@
 
 		/* set perf-event callgraph */
 		if (param.enabled)
-			perf_evsel__config_callgraph(evsel, opts, &param);
+			perf_evsel__config_callchain(evsel, opts, &param);
 	}
 }
 
@@ -736,7 +736,8 @@
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
+void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
+			struct callchain_param *callchain)
 {
 	struct perf_evsel *leader = evsel->leader;
 	struct perf_event_attr *attr = &evsel->attr;
@@ -811,8 +812,8 @@
 	if (perf_evsel__is_function_event(evsel))
 		evsel->attr.exclude_callchain_user = 1;
 
-	if (callchain_param.enabled && !evsel->no_aux_samples)
-		perf_evsel__config_callgraph(evsel, opts, &callchain_param);
+	if (callchain && callchain->enabled && !evsel->no_aux_samples)
+		perf_evsel__config_callchain(evsel, opts, callchain);
 
 	if (opts->sample_intr_regs) {
 		attr->sample_regs_intr = opts->sample_intr_regs;
@@ -1230,6 +1231,21 @@
 	__p_bits(buf, size, value, bits);
 }
 
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+	struct bit_names bits[] = {
+		bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+		bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+		bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+		bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+		bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+		{ .name = NULL, }
+	};
+#undef bit_name
+	__p_bits(buf, size, value, bits);
+}
+
 static void __p_read_format(char *buf, size_t size, u64 value)
 {
 #define bit_name(n) { PERF_FORMAT_##n, #n }
@@ -1248,6 +1264,7 @@
 #define p_unsigned(val)		snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
 #define p_signed(val)		snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
 #define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
 #define p_read_format(val)	__p_read_format(buf, BUF_SIZE, val)
 
 #define PRINT_ATTRn(_n, _f, _p)				\
@@ -1299,12 +1316,13 @@
 	PRINT_ATTRf(comm_exec, p_unsigned);
 	PRINT_ATTRf(use_clockid, p_unsigned);
 	PRINT_ATTRf(context_switch, p_unsigned);
+	PRINT_ATTRf(write_backward, p_unsigned);
 
 	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
 	PRINT_ATTRf(bp_type, p_unsigned);
 	PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
 	PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
-	PRINT_ATTRf(branch_sample_type, p_unsigned);
+	PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
 	PRINT_ATTRf(sample_regs_user, p_hex);
 	PRINT_ATTRf(sample_stack_user, p_unsigned);
 	PRINT_ATTRf(clockid, p_signed);
@@ -2253,95 +2271,6 @@
 	return 0;
 }
 
-static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
-{
-	va_list args;
-	int ret = 0;
-
-	if (!*first) {
-		ret += fprintf(fp, ",");
-	} else {
-		ret += fprintf(fp, ":");
-		*first = false;
-	}
-
-	va_start(args, fmt);
-	ret += vfprintf(fp, fmt, args);
-	va_end(args);
-	return ret;
-}
-
-static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
-{
-	return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
-}
-
-int perf_evsel__fprintf(struct perf_evsel *evsel,
-			struct perf_attr_details *details, FILE *fp)
-{
-	bool first = true;
-	int printed = 0;
-
-	if (details->event_group) {
-		struct perf_evsel *pos;
-
-		if (!perf_evsel__is_group_leader(evsel))
-			return 0;
-
-		if (evsel->nr_members > 1)
-			printed += fprintf(fp, "%s{", evsel->group_name ?: "");
-
-		printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-		for_each_group_member(pos, evsel)
-			printed += fprintf(fp, ",%s", perf_evsel__name(pos));
-
-		if (evsel->nr_members > 1)
-			printed += fprintf(fp, "}");
-		goto out;
-	}
-
-	printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-
-	if (details->verbose) {
-		printed += perf_event_attr__fprintf(fp, &evsel->attr,
-						    __print_attr__fprintf, &first);
-	} else if (details->freq) {
-		const char *term = "sample_freq";
-
-		if (!evsel->attr.freq)
-			term = "sample_period";
-
-		printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
-					 term, (u64)evsel->attr.sample_freq);
-	}
-
-	if (details->trace_fields) {
-		struct format_field *field;
-
-		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
-			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
-			goto out;
-		}
-
-		field = evsel->tp_format->format.fields;
-		if (field == NULL) {
-			printed += comma_fprintf(fp, &first, " (no trace field)");
-			goto out;
-		}
-
-		printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
-
-		field = field->next;
-		while (field) {
-			printed += comma_fprintf(fp, &first, "%s", field->name);
-			field = field->next;
-		}
-	}
-out:
-	fputc('\n', fp);
-	return ++printed;
-}
-
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 			  char *msg, size_t msgsize)
 {
@@ -2416,10 +2345,18 @@
 			 "Probably the maximum number of open file descriptors has been reached.\n"
 			 "Hint: Try again after reducing the number of events.\n"
 			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
+	case ENOMEM:
+		if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 &&
+		    access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+			return scnprintf(msg, size,
+					 "Not enough memory to setup event with callchain.\n"
+					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+					 "Hint: Current value: %d", sysctl_perf_event_max_stack);
+		break;
 	case ENODEV:
 		if (target->cpu_list)
 			return scnprintf(msg, size, "%s",
-	 "No such device - did you specify an out-of-range profile CPU?\n");
+	 "No such device - did you specify an out-of-range profile CPU?");
 		break;
 	case EOPNOTSUPP:
 		if (evsel->attr.precise_ip)
@@ -2451,7 +2388,7 @@
 	return scnprintf(msg, size,
 	"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
 	"/bin/dmesg may provide additional information.\n"
-	"No CONFIG_PERF_EVENTS=y kernel support configured?\n",
+	"No CONFIG_PERF_EVENTS=y kernel support configured?",
 			 err, strerror_r(err, sbuf, sizeof(sbuf)),
 			 perf_evsel__name(evsel));
 }

diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 501ea6e..8a644fe 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h

@@ -178,8 +178,14 @@
 void perf_evsel__exit(struct perf_evsel *evsel);
 void perf_evsel__delete(struct perf_evsel *evsel);
 
+struct callchain_param;
+
 void perf_evsel__config(struct perf_evsel *evsel,
-			struct record_opts *opts);
+			struct record_opts *opts,
+			struct callchain_param *callchain);
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+				  struct record_opts *opts,
+				  struct callchain_param *callchain);
 
 int __perf_evsel__sample_size(u64 sample_type);
 void perf_evsel__calc_id_pos(struct perf_evsel *evsel);
@@ -381,6 +387,24 @@
 int perf_evsel__fprintf(struct perf_evsel *evsel,
 			struct perf_attr_details *details, FILE *fp);
 
+#define EVSEL__PRINT_IP			(1<<0)
+#define EVSEL__PRINT_SYM		(1<<1)
+#define EVSEL__PRINT_DSO		(1<<2)
+#define EVSEL__PRINT_SYMOFFSET		(1<<3)
+#define EVSEL__PRINT_ONELINE		(1<<4)
+#define EVSEL__PRINT_SRCLINE		(1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR	(1<<6)
+
+struct callchain_cursor;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+			      unsigned int print_opts,
+			      struct callchain_cursor *cursor, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+			int left_alignment, unsigned int print_opts,
+			struct callchain_cursor *cursor, FILE *fp);
+
 bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
 			  char *msg, size_t msgsize);
 int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
@@ -396,7 +420,7 @@
      (_evsel) && (_evsel)->leader == (_leader);					\
      (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
 
-static inline bool has_branch_callstack(struct perf_evsel *evsel)
+static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel)
 {
 	return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
 }

diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
new file mode 100644
index 0000000..3674e77
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.c

@@ -0,0 +1,212 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "callchain.h"
+#include "map.h"
+#include "symbol.h"
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+	va_list args;
+	int ret = 0;
+
+	if (!*first) {
+		ret += fprintf(fp, ",");
+	} else {
+		ret += fprintf(fp, ":");
+		*first = false;
+	}
+
+	va_start(args, fmt);
+	ret += vfprintf(fp, fmt, args);
+	va_end(args);
+	return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+	return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+			struct perf_attr_details *details, FILE *fp)
+{
+	bool first = true;
+	int printed = 0;
+
+	if (details->event_group) {
+		struct perf_evsel *pos;
+
+		if (!perf_evsel__is_group_leader(evsel))
+			return 0;
+
+		if (evsel->nr_members > 1)
+			printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+		printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+		for_each_group_member(pos, evsel)
+			printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+
+		if (evsel->nr_members > 1)
+			printed += fprintf(fp, "}");
+		goto out;
+	}
+
+	printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+
+	if (details->verbose) {
+		printed += perf_event_attr__fprintf(fp, &evsel->attr,
+						    __print_attr__fprintf, &first);
+	} else if (details->freq) {
+		const char *term = "sample_freq";
+
+		if (!evsel->attr.freq)
+			term = "sample_period";
+
+		printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+					 term, (u64)evsel->attr.sample_freq);
+	}
+
+	if (details->trace_fields) {
+		struct format_field *field;
+
+		if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+			goto out;
+		}
+
+		field = evsel->tp_format->format.fields;
+		if (field == NULL) {
+			printed += comma_fprintf(fp, &first, " (no trace field)");
+			goto out;
+		}
+
+		printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+		field = field->next;
+		while (field) {
+			printed += comma_fprintf(fp, &first, "%s", field->name);
+			field = field->next;
+		}
+	}
+out:
+	fputc('\n', fp);
+	return ++printed;
+}
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+			      unsigned int print_opts, struct callchain_cursor *cursor,
+			      FILE *fp)
+{
+	int printed = 0;
+	struct callchain_cursor_node *node;
+	int print_ip = print_opts & EVSEL__PRINT_IP;
+	int print_sym = print_opts & EVSEL__PRINT_SYM;
+	int print_dso = print_opts & EVSEL__PRINT_DSO;
+	int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+	int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+	char s = print_oneline ? ' ' : '\t';
+
+	if (sample->callchain) {
+		struct addr_location node_al;
+
+		callchain_cursor_commit(cursor);
+
+		while (1) {
+			u64 addr = 0;
+
+			node = callchain_cursor_current(cursor);
+			if (!node)
+				break;
+
+			if (node->sym && node->sym->ignore)
+				goto next;
+
+			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+			if (print_ip)
+				printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+
+			if (node->map)
+				addr = node->map->map_ip(node->map, node->ip);
+
+			if (print_sym) {
+				printed += fprintf(fp, " ");
+				node_al.addr = addr;
+				node_al.map  = node->map;
+
+				if (print_symoffset) {
+					printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
+										  print_unknown_as_addr, fp);
+				} else {
+					printed += __symbol__fprintf_symname(node->sym, &node_al,
+									     print_unknown_as_addr, fp);
+				}
+			}
+
+			if (print_dso) {
+				printed += fprintf(fp, " (");
+				printed += map__fprintf_dsoname(node->map, fp);
+				printed += fprintf(fp, ")");
+			}
+
+			if (print_srcline)
+				printed += map__fprintf_srcline(node->map, addr, "\n  ", fp);
+
+			if (!print_oneline)
+				printed += fprintf(fp, "\n");
+next:
+			callchain_cursor_advance(cursor);
+		}
+	}
+
+	return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+			int left_alignment, unsigned int print_opts,
+			struct callchain_cursor *cursor, FILE *fp)
+{
+	int printed = 0;
+	int print_ip = print_opts & EVSEL__PRINT_IP;
+	int print_sym = print_opts & EVSEL__PRINT_SYM;
+	int print_dso = print_opts & EVSEL__PRINT_DSO;
+	int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+	int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+	int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+	if (cursor != NULL) {
+		printed += sample__fprintf_callchain(sample, left_alignment,
+						     print_opts, cursor, fp);
+	} else if (!(al->sym && al->sym->ignore)) {
+		printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+		if (print_ip)
+			printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+		if (print_sym) {
+			printed += fprintf(fp, " ");
+			if (print_symoffset) {
+				printed += __symbol__fprintf_symname_offs(al->sym, al,
+									  print_unknown_as_addr, fp);
+			} else {
+				printed += __symbol__fprintf_symname(al->sym, al,
+								     print_unknown_as_addr, fp);
+			}
+		}
+
+		if (print_dso) {
+			printed += fprintf(fp, " (");
+			printed += map__fprintf_dsoname(al->map, fp);
+			printed += fprintf(fp, ")");
+		}
+
+		if (print_srcline)
+			printed += map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
+	}
+
+	return printed;
+}

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 90680ec..08852dd 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c

@@ -1474,7 +1474,7 @@
 
 	dso = machine__findnew_dso(machine, filename);
 	if (dso != NULL) {
-		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+		char sbuild_id[SBUILD_ID_SIZE];
 
 		dso__set_build_id(dso, &bev->build_id);
 
@@ -1819,7 +1819,8 @@
 
 	ph->env.nr_sibling_cores = nr;
 	size += sizeof(u32);
-	strbuf_init(&sb, 128);
+	if (strbuf_init(&sb, 128) < 0)
+		goto free_cpu;
 
 	for (i = 0; i < nr; i++) {
 		str = do_read_string(fd, ph);
@@ -1827,7 +1828,8 @@
 			goto error;
 
 		/* include a NULL character at the end */
-		strbuf_add(&sb, str, strlen(str) + 1);
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
 		size += string_size(str);
 		free(str);
 	}
@@ -1849,7 +1851,8 @@
 			goto error;
 
 		/* include a NULL character at the end */
-		strbuf_add(&sb, str, strlen(str) + 1);
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
 		size += string_size(str);
 		free(str);
 	}
@@ -1912,13 +1915,14 @@
 	/* nr nodes */
 	ret = readn(fd, &nr, sizeof(nr));
 	if (ret != sizeof(nr))
-		goto error;
+		return -1;
 
 	if (ph->needs_swap)
 		nr = bswap_32(nr);
 
 	ph->env.nr_numa_nodes = nr;
-	strbuf_init(&sb, 256);
+	if (strbuf_init(&sb, 256) < 0)
+		return -1;
 
 	for (i = 0; i < nr; i++) {
 		/* node number */
@@ -1940,15 +1944,17 @@
 			mem_free = bswap_64(mem_free);
 		}
 
-		strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":",
-			    node, mem_total, mem_free);
+		if (strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":",
+				node, mem_total, mem_free) < 0)
+			goto error;
 
 		str = do_read_string(fd, ph);
 		if (!str)
 			goto error;
 
 		/* include a NULL character at the end */
-		strbuf_add(&sb, str, strlen(str) + 1);
+		if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+			goto error;
 		free(str);
 	}
 	ph->env.numa_nodes = strbuf_detach(&sb, NULL);
@@ -1982,7 +1988,8 @@
 	}
 
 	ph->env.nr_pmu_mappings = pmu_num;
-	strbuf_init(&sb, 128);
+	if (strbuf_init(&sb, 128) < 0)
+		return -1;
 
 	while (pmu_num) {
 		if (readn(fd, &type, sizeof(type)) != sizeof(type))
@@ -1994,9 +2001,11 @@
 		if (!name)
 			goto error;
 
-		strbuf_addf(&sb, "%u:%s", type, name);
+		if (strbuf_addf(&sb, "%u:%s", type, name) < 0)
+			goto error;
 		/* include a NULL character at the end */
-		strbuf_add(&sb, "", 1);
+		if (strbuf_add(&sb, "", 1) < 0)
+			goto error;
 
 		if (!strcmp(name, "msr"))
 			ph->env.msr_pmu_type = type;

diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index 43a98a4..d62ccae 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c

@@ -27,16 +27,27 @@
 	return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
 }
 
-static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
 {
-	unsigned int i;
+	unsigned int i, nr = cmds->cnt + old->cnt;
+	void *tmp;
 
-	ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
-
+	if (nr > cmds->alloc) {
+		/* Choose bigger one to alloc */
+		if (alloc_nr(cmds->alloc) < nr)
+			cmds->alloc = nr;
+		else
+			cmds->alloc = alloc_nr(cmds->alloc);
+		tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names));
+		if (!tmp)
+			return -1;
+		cmds->names = tmp;
+	}
 	for (i = 0; i < old->cnt; i++)
 		cmds->names[cmds->cnt++] = old->names[i];
 	zfree(&old->names);
 	old->cnt = 0;
+	return 0;
 }
 
 const char *help_unknown_cmd(const char *cmd)
@@ -52,8 +63,11 @@
 
 	load_command_list("perf-", &main_cmds, &other_cmds);
 
-	add_cmd_list(&main_cmds, &aliases);
-	add_cmd_list(&main_cmds, &other_cmds);
+	if (add_cmd_list(&main_cmds, &aliases) < 0 ||
+	    add_cmd_list(&main_cmds, &other_cmds) < 0) {
+		fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
+		goto end;
+	}
 	qsort(main_cmds.names, main_cmds.cnt,
 	      sizeof(main_cmds.names), cmdname_compare);
 	uniq(&main_cmds);
@@ -99,6 +113,6 @@
 		for (i = 0; i < n; i++)
 			fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
 	}
-
+end:
 	exit(1);
 }

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 31c4641..cfab531 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c

@@ -295,7 +295,7 @@
 		root_in  = &he->parent_he->hroot_in;
 		root_out = &he->parent_he->hroot_out;
 	} else {
-		if (sort__need_collapse)
+		if (hists__has(hists, need_collapse))
 			root_in = &hists->entries_collapsed;
 		else
 			root_in = hists->entries_in;
@@ -953,7 +953,7 @@
 {
 	int err, err2;
 
-	err = sample__resolve_callchain(iter->sample, &iter->parent,
+	err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
 					iter->evsel, al, max_stack_depth);
 	if (err)
 		return err;
@@ -1295,8 +1295,9 @@
 	return ret;
 }
 
-int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
-				 struct hist_entry *he)
+static int hists__collapse_insert_entry(struct hists *hists,
+					struct rb_root *root,
+					struct hist_entry *he)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
@@ -1372,7 +1373,7 @@
 	struct hist_entry *n;
 	int ret;
 
-	if (!sort__need_collapse)
+	if (!hists__has(hists, need_collapse))
 		return 0;
 
 	hists->nr_entries = 0;
@@ -1631,7 +1632,7 @@
 		return;
 	}
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;
@@ -2035,7 +2036,7 @@
 	struct hist_entry *he;
 	int64_t cmp;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		root = &hists->entries_collapsed;
 	else
 		root = hists->entries_in;
@@ -2061,6 +2062,8 @@
 	if (he) {
 		memset(&he->stat, 0, sizeof(he->stat));
 		he->hists = hists;
+		if (symbol_conf.cumulate_callchain)
+			memset(he->stat_acc, 0, sizeof(he->stat));
 		rb_link_node(&he->rb_node_in, parent, p);
 		rb_insert_color(&he->rb_node_in, root);
 		hists__inc_stats(hists, he);
@@ -2075,7 +2078,7 @@
 {
 	struct rb_node *n;
 
-	if (sort__need_collapse)
+	if (hists__has(hists, need_collapse))
 		n = hists->entries_collapsed.rb_node;
 	else
 		n = hists->entries_in->rb_node;
@@ -2104,7 +2107,7 @@
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
-	if (sort__need_collapse)
+	if (hists__has(leader, need_collapse))
 		root = &leader->entries_collapsed;
 	else
 		root = leader->entries_in;
@@ -2129,7 +2132,7 @@
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
-	if (sort__need_collapse)
+	if (hists__has(other, need_collapse))
 		root = &other->entries_collapsed;
 	else
 		root = other->entries_in;

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index bec0cd6..0f84bfb 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h

@@ -82,6 +82,8 @@
 	int			nr_hpp_node;
 };
 
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
 struct hist_entry_iter;
 
 struct hist_iter_ops {
@@ -199,8 +201,6 @@
 int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
 
 struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-int hists__collapse_insert_entry(struct hists *hists,
-				  struct rb_root *root, struct hist_entry *he);
 
 struct perf_hpp {
 	char *buf;
@@ -240,6 +240,14 @@
 struct perf_hpp_list {
 	struct list_head fields;
 	struct list_head sorts;
+
+	int need_collapse;
+	int parent;
+	int sym;
+	int dso;
+	int socket;
+	int thread;
+	int comm;
 };
 
 extern struct perf_hpp_list perf_hpp_list;

diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index abf1366..9df9960 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c

@@ -66,6 +66,7 @@
 	u64				branches_id;
 	size_t				branches_event_size;
 	bool				synth_needs_swap;
+	unsigned long			num_events;
 };
 
 struct intel_bts_queue {
@@ -275,6 +276,10 @@
 	union perf_event event;
 	struct perf_sample sample = { .ip = 0, };
 
+	if (bts->synth_opts.initial_skip &&
+	    bts->num_events++ <= bts->synth_opts.initial_skip)
+		return 0;
+
 	event.sample.header.type = PERF_RECORD_SAMPLE;
 	event.sample.header.misc = PERF_RECORD_MISC_USER;
 	event.sample.header.size = sizeof(struct perf_event_header);

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9409d01..9c8f15d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c

@@ -356,7 +356,7 @@
 
 int intel_pt__strerror(int code, char *buf, size_t buflen)
 {
-	if (code < 1 || code > INTEL_PT_ERR_MAX)
+	if (code < 1 || code >= INTEL_PT_ERR_MAX)
 		code = INTEL_PT_ERR_UNK;
 	strlcpy(buf, intel_pt_err_msgs[code], buflen);
 	return 0;

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 6175784..1371969 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c

@@ -100,6 +100,8 @@
 	u64 cyc_bit;
 	u64 noretcomp_bit;
 	unsigned max_non_turbo_ratio;
+
+	unsigned long num_events;
 };
 
 enum switch_state {
@@ -972,6 +974,10 @@
 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
 		return 0;
 
+	if (pt->synth_opts.initial_skip &&
+	    pt->num_events++ < pt->synth_opts.initial_skip)
+		return 0;
+
 	event->sample.header.type = PERF_RECORD_SAMPLE;
 	event->sample.header.misc = PERF_RECORD_MISC_USER;
 	event->sample.header.size = sizeof(struct perf_event_header);
@@ -1029,6 +1035,10 @@
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
 
+	if (pt->synth_opts.initial_skip &&
+	    pt->num_events++ < pt->synth_opts.initial_skip)
+		return 0;
+
 	event->sample.header.type = PERF_RECORD_SAMPLE;
 	event->sample.header.misc = PERF_RECORD_MISC_USER;
 	event->sample.header.size = sizeof(struct perf_event_header);
@@ -1087,6 +1097,10 @@
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
 
+	if (pt->synth_opts.initial_skip &&
+	    pt->num_events++ < pt->synth_opts.initial_skip)
+		return 0;
+
 	event->sample.header.type = PERF_RECORD_SAMPLE;
 	event->sample.header.misc = PERF_RECORD_MISC_USER;
 	event->sample.header.size = sizeof(struct perf_event_header);
@@ -1199,14 +1213,18 @@
 	ptq->have_sample = false;
 
 	if (pt->sample_instructions &&
-	    (state->type & INTEL_PT_INSTRUCTION)) {
+	    (state->type & INTEL_PT_INSTRUCTION) &&
+	    (!pt->synth_opts.initial_skip ||
+	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
 		err = intel_pt_synth_instruction_sample(ptq);
 		if (err)
 			return err;
 	}
 
 	if (pt->sample_transactions &&
-	    (state->type & INTEL_PT_TRANSACTION)) {
+	    (state->type & INTEL_PT_TRANSACTION) &&
+	    (!pt->synth_opts.initial_skip ||
+	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
 		err = intel_pt_synth_transaction_sample(ptq);
 		if (err)
 			return err;

diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index ad0c0bb..86afe96 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c

@@ -17,6 +17,7 @@
 #include "strlist.h"
 #include <elf.h>
 
+#include "tsc.h"
 #include "session.h"
 #include "jit.h"
 #include "jitdump.h"
@@ -33,6 +34,7 @@
 	size_t           bufsize;
 	FILE             *in;
 	bool		 needs_bswap; /* handles cross-endianess */
+	bool		 use_arch_timestamp;
 	void		 *debug_data;
 	size_t		 nr_debug_entries;
 	uint32_t         code_load_count;
@@ -158,13 +160,16 @@
 		header.flags      = bswap_64(header.flags);
 	}
 
+	jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
 	if (verbose > 2)
-		pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+		pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
 			header.version,
 			header.total_size,
 			(unsigned long long)header.timestamp,
 			header.pid,
-			header.elf_mach);
+			header.elf_mach,
+			jd->use_arch_timestamp);
 
 	if (header.flags & JITDUMP_FLAGS_RESERVED) {
 		pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
@@ -172,10 +177,15 @@
 		goto error;
 	}
 
+	if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+		pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+		goto error;
+	}
+
 	/*
 	 * validate event is using the correct clockid
 	 */
-	if (jit_validate_events(jd->session)) {
+	if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
 		pr_err("error, jitted code must be sampled with perf record -k 1\n");
 		goto error;
 	}
@@ -329,6 +339,23 @@
 	return 0;
 }
 
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+	struct perf_tsc_conversion tc;
+
+	if (!jd->use_arch_timestamp)
+		return timestamp;
+
+	tc.time_shift = jd->session->time_conv.time_shift;
+	tc.time_mult  = jd->session->time_conv.time_mult;
+	tc.time_zero  = jd->session->time_conv.time_zero;
+
+	if (!tc.time_mult)
+		return 0;
+
+	return tsc_to_perf_time(timestamp, &tc);
+}
+
 static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
 {
 	struct perf_sample sample;
@@ -385,7 +412,7 @@
 		return -1;
 	}
 	if (stat(filename, &st))
-		memset(&st, 0, sizeof(stat));
+		memset(&st, 0, sizeof(st));
 
 	event->mmap2.header.type = PERF_RECORD_MMAP2;
 	event->mmap2.header.misc = PERF_RECORD_MISC_USER;
@@ -410,7 +437,7 @@
 		id->tid  = tid;
 	}
 	if (jd->sample_type & PERF_SAMPLE_TIME)
-		id->time = jr->load.p.timestamp;
+		id->time = convert_timestamp(jd, jr->load.p.timestamp);
 
 	/*
 	 * create pseudo sample to induce dso hit increment
@@ -473,7 +500,7 @@
 	size++; /* for \0 */
 
 	if (stat(filename, &st))
-		memset(&st, 0, sizeof(stat));
+		memset(&st, 0, sizeof(st));
 
 	size = PERF_ALIGN(size, sizeof(u64));
 
@@ -499,7 +526,7 @@
 		id->tid  = tid;
 	}
 	if (jd->sample_type & PERF_SAMPLE_TIME)
-		id->time = jr->load.p.timestamp;
+		id->time = convert_timestamp(jd, jr->load.p.timestamp);
 
 	/*
 	 * create pseudo sample to induce dso hit increment

diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
index b66c1f5..bcacd20 100644
--- a/tools/perf/util/jitdump.h
+++ b/tools/perf/util/jitdump.h

@@ -23,9 +23,12 @@
 #define JITHEADER_VERSION 1
 
 enum jitdump_flags_bits {
+	JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
 	JITDUMP_FLAGS_MAX_BIT,
 };
 
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP	(1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
 #define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
 				(~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
 

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 80b9b6a..639a290 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c

@@ -32,6 +32,7 @@
 
 	machine->threads = RB_ROOT;
 	pthread_rwlock_init(&machine->threads_lock, NULL);
+	machine->nr_threads = 0;
 	INIT_LIST_HEAD(&machine->dead_threads);
 	machine->last_match = NULL;
 
@@ -430,6 +431,7 @@
 		 */
 		thread__get(th);
 		machine->last_match = th;
+		++machine->nr_threads;
 	}
 
 	return th;
@@ -681,11 +683,13 @@
 
 size_t machine__fprintf(struct machine *machine, FILE *fp)
 {
-	size_t ret = 0;
+	size_t ret;
 	struct rb_node *nd;
 
 	pthread_rwlock_rdlock(&machine->threads_lock);
 
+	ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+
 	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
 		struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
@@ -908,11 +912,11 @@
 	return machine__create_kernel_maps(machine);
 }
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter)
+int __machine__load_kallsyms(struct machine *machine, const char *filename,
+			     enum map_type type, bool no_kcore, symbol_filter_t filter)
 {
 	struct map *map = machine__kernel_map(machine);
-	int ret = dso__load_kallsyms(map->dso, filename, map, filter);
+	int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter);
 
 	if (ret > 0) {
 		dso__set_loaded(map->dso, type);
@@ -927,6 +931,12 @@
 	return ret;
 }
 
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+			   enum map_type type, symbol_filter_t filter)
+{
+	return __machine__load_kallsyms(machine, filename, type, false, filter);
+}
+
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
 			       symbol_filter_t filter)
 {
@@ -1413,6 +1423,7 @@
 		pthread_rwlock_wrlock(&machine->threads_lock);
 	rb_erase_init(&th->rb_node, &machine->threads);
 	RB_CLEAR_NODE(&th->rb_node);
+	--machine->nr_threads;
 	/*
 	 * Move it first to the dead_threads list, then drop the reference,
 	 * if this is the last reference, then the thread__delete destructor
@@ -1599,6 +1610,7 @@
 }
 
 static int add_callchain_ip(struct thread *thread,
+			    struct callchain_cursor *cursor,
 			    struct symbol **parent,
 			    struct addr_location *root_al,
 			    u8 *cpumode,
@@ -1630,7 +1642,7 @@
 				 * It seems the callchain is corrupted.
 				 * Discard all.
 				 */
-				callchain_cursor_reset(&callchain_cursor);
+				callchain_cursor_reset(cursor);
 				return 1;
 			}
 			return 0;
@@ -1640,7 +1652,7 @@
 	}
 
 	if (al.sym != NULL) {
-		if (sort__has_parent && !*parent &&
+		if (perf_hpp_list.parent && !*parent &&
 		    symbol__match_regex(al.sym, &parent_regex))
 			*parent = al.sym;
 		else if (have_ignore_callees && root_al &&
@@ -1648,13 +1660,13 @@
 			/* Treat this symbol as the root,
 			   forgetting its callees. */
 			*root_al = al;
-			callchain_cursor_reset(&callchain_cursor);
+			callchain_cursor_reset(cursor);
 		}
 	}
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
-	return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
+	return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1724,6 +1736,7 @@
  * negative error code on other errors.
  */
 static int resolve_lbr_callchain_sample(struct thread *thread,
+					struct callchain_cursor *cursor,
 					struct perf_sample *sample,
 					struct symbol **parent,
 					struct addr_location *root_al,
@@ -1756,7 +1769,7 @@
 		 */
 		int mix_chain_nr = i + 1 + lbr_nr + 1;
 
-		if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
+		if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) {
 			pr_warning("corrupted callchain. skipping...\n");
 			return 0;
 		}
@@ -1778,7 +1791,7 @@
 					ip = lbr_stack->entries[0].to;
 			}
 
-			err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+			err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
 			if (err)
 				return (err < 0) ? err : 0;
 		}
@@ -1789,6 +1802,7 @@
 }
 
 static int thread__resolve_callchain_sample(struct thread *thread,
+					    struct callchain_cursor *cursor,
 					    struct perf_evsel *evsel,
 					    struct perf_sample *sample,
 					    struct symbol **parent,
@@ -1803,10 +1817,8 @@
 	int skip_idx = -1;
 	int first_call = 0;
 
-	callchain_cursor_reset(&callchain_cursor);
-
-	if (has_branch_callstack(evsel)) {
-		err = resolve_lbr_callchain_sample(thread, sample, parent,
+	if (perf_evsel__has_branch_callstack(evsel)) {
+		err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
 						   root_al, max_stack);
 		if (err)
 			return (err < 0) ? err : 0;
@@ -1816,7 +1828,7 @@
 	 * Based on DWARF debug information, some architectures skip
 	 * a callchain entry saved by the kernel.
 	 */
-	if (chain->nr < PERF_MAX_STACK_DEPTH)
+	if (chain->nr < sysctl_perf_event_max_stack)
 		skip_idx = arch_skip_callchain_idx(thread, chain);
 
 	/*
@@ -1863,10 +1875,10 @@
 		nr = remove_loops(be, nr);
 
 		for (i = 0; i < nr; i++) {
-			err = add_callchain_ip(thread, parent, root_al,
+			err = add_callchain_ip(thread, cursor, parent, root_al,
 					       NULL, be[i].to);
 			if (!err)
-				err = add_callchain_ip(thread, parent, root_al,
+				err = add_callchain_ip(thread, cursor, parent, root_al,
 						       NULL, be[i].from);
 			if (err == -EINVAL)
 				break;
@@ -1877,7 +1889,7 @@
 	}
 
 check_calls:
-	if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
+	if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) {
 		pr_warning("corrupted callchain. skipping...\n");
 		return 0;
 	}
@@ -1896,7 +1908,7 @@
 #endif
 		ip = chain->ips[j];
 
-		err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+		err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
 
 		if (err)
 			return (err < 0) ? err : 0;
@@ -1915,19 +1927,12 @@
 				       entry->map, entry->sym);
 }
 
-int thread__resolve_callchain(struct thread *thread,
-			      struct perf_evsel *evsel,
-			      struct perf_sample *sample,
-			      struct symbol **parent,
-			      struct addr_location *root_al,
-			      int max_stack)
+static int thread__resolve_callchain_unwind(struct thread *thread,
+					    struct callchain_cursor *cursor,
+					    struct perf_evsel *evsel,
+					    struct perf_sample *sample,
+					    int max_stack)
 {
-	int ret = thread__resolve_callchain_sample(thread, evsel,
-						   sample, parent,
-						   root_al, max_stack);
-	if (ret)
-		return ret;
-
 	/* Can we do dwarf post unwind? */
 	if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
 	      (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
@@ -1938,9 +1943,45 @@
 	    (!sample->user_stack.size))
 		return 0;
 
-	return unwind__get_entries(unwind_entry, &callchain_cursor,
+	return unwind__get_entries(unwind_entry, cursor,
 				   thread, sample, max_stack);
+}
 
+int thread__resolve_callchain(struct thread *thread,
+			      struct callchain_cursor *cursor,
+			      struct perf_evsel *evsel,
+			      struct perf_sample *sample,
+			      struct symbol **parent,
+			      struct addr_location *root_al,
+			      int max_stack)
+{
+	int ret = 0;
+
+	callchain_cursor_reset(&callchain_cursor);
+
+	if (callchain_param.order == ORDER_CALLEE) {
+		ret = thread__resolve_callchain_sample(thread, cursor,
+						       evsel, sample,
+						       parent, root_al,
+						       max_stack);
+		if (ret)
+			return ret;
+		ret = thread__resolve_callchain_unwind(thread, cursor,
+						       evsel, sample,
+						       max_stack);
+	} else {
+		ret = thread__resolve_callchain_unwind(thread, cursor,
+						       evsel, sample,
+						       max_stack);
+		if (ret)
+			return ret;
+		ret = thread__resolve_callchain_sample(thread, cursor,
+						       evsel, sample,
+						       parent, root_al,
+						       max_stack);
+	}
+
+	return ret;
 }
 
 int machine__for_each_thread(struct machine *machine,

diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 8499db2..83f4679 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h

@@ -31,6 +31,7 @@
 	char		  *root_dir;
 	struct rb_root	  threads;
 	pthread_rwlock_t  threads_lock;
+	unsigned int	  nr_threads;
 	struct list_head  dead_threads;
 	struct thread	  *last_match;
 	struct vdso_info  *vdso_info;
@@ -141,7 +142,11 @@
 					   struct addr_location *al);
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 				     struct addr_location *al);
+
+struct callchain_cursor;
+
 int thread__resolve_callchain(struct thread *thread,
+			      struct callchain_cursor *cursor,
 			      struct perf_evsel *evsel,
 			      struct perf_sample *sample,
 			      struct symbol **parent,
@@ -211,6 +216,8 @@
 struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 					const char *filename);
 
+int __machine__load_kallsyms(struct machine *machine, const char *filename,
+			     enum map_type type, bool no_kcore, symbol_filter_t filter);
 int machine__load_kallsyms(struct machine *machine, const char *filename,
 			   enum map_type type, symbol_filter_t filter);
 int machine__load_vmlinux_path(struct machine *machine, enum map_type type,

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 171b6d1..b19bcd3 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c

@@ -289,7 +289,7 @@
 	nr = dso__load(map->dso, map, filter);
 	if (nr < 0) {
 		if (map->dso->has_build_id) {
-			char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+			char sbuild_id[SBUILD_ID_SIZE];
 
 			build_id__sprintf(map->dso->build_id,
 					  sizeof(map->dso->build_id),
@@ -431,6 +431,13 @@
 	if (map->dso->rel)
 		return rip - map->pgoff;
 
+	/*
+	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
+	 * but all kernel modules are ET_REL, so won't get here.
+	 */
+	if (map->dso->kernel == DSO_TYPE_USER)
+		return rip + map->dso->text_offset;
+
 	return map->unmap_ip(map, rip) - map->reloc;
 }
 
@@ -454,6 +461,13 @@
 	if (map->dso->rel)
 		return map->unmap_ip(map, ip + map->pgoff);
 
+	/*
+	 * kernel modules also have DSO_TYPE_USER in dso->kernel,
+	 * but all kernel modules are ET_REL, so won't get here.
+	 */
+	if (map->dso->kernel == DSO_TYPE_USER)
+		return map->unmap_ip(map, ip - map->dso->text_offset);
+
 	return ip + map->reloc;
 }
 

diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index b1b9e23..fe84df1 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c

@@ -308,3 +308,12 @@
 		free(event);
 	}
 }
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+	ordered_events__deliver_t old_deliver = oe->deliver;
+
+	ordered_events__free(oe);
+	memset(oe, '\0', sizeof(*oe));
+	ordered_events__init(oe, old_deliver);
+}

diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index f403991..e11468a 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h

@@ -49,6 +49,7 @@
 int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
 void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
 void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
 
 static inline
 void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index adef23b..ddb0261 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c

@@ -602,14 +602,13 @@
 
 static __u64 pmu_format_max_value(const unsigned long *format)
 {
-	int w;
+	__u64 w = 0;
+	int fbit;
 
-	w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
-	if (!w)
-		return 0;
-	if (w < 64)
-		return (1ULL << w) - 1;
-	return -1;
+	for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
+		w |= (1ULL << fbit);
+
+	return w;
 }
 
 /*
@@ -644,20 +643,20 @@
 static char *pmu_formats_string(struct list_head *formats)
 {
 	struct perf_pmu_format *format;
-	char *str;
-	struct strbuf buf;
+	char *str = NULL;
+	struct strbuf buf = STRBUF_INIT;
 	unsigned i = 0;
 
 	if (!formats)
 		return NULL;
 
-	strbuf_init(&buf, 0);
 	/* sysfs exported terms */
 	list_for_each_entry(format, formats, list)
-		strbuf_addf(&buf, i++ ? ",%s" : "%s",
-			    format->name);
+		if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0)
+			goto error;
 
 	str = strbuf_detach(&buf, NULL);
+error:
 	strbuf_release(&buf);
 
 	return str;

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8319fbb..74401a2 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c

@@ -265,6 +265,65 @@
 	return true;
 }
 
+/*
+ * NOTE:
+ * '.gnu.linkonce.this_module' section of kernel module elf directly
+ * maps to 'struct module' from linux/module.h. This section contains
+ * actual module name which will be used by kernel after loading it.
+ * But, we cannot use 'struct module' here since linux/module.h is not
+ * exposed to user-space. Offset of 'name' has remained same from long
+ * time, so hardcoding it here.
+ */
+#ifdef __LP64__
+#define MOD_NAME_OFFSET 24
+#else
+#define MOD_NAME_OFFSET 12
+#endif
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+	int fd;
+	Elf *elf;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	Elf_Data *data;
+	Elf_Scn *sec;
+	char *mod_name = NULL;
+
+	fd = open(module, O_RDONLY);
+	if (fd < 0)
+		return NULL;
+
+	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+	if (elf == NULL)
+		goto elf_err;
+
+	if (gelf_getehdr(elf, &ehdr) == NULL)
+		goto ret_err;
+
+	sec = elf_section_by_name(elf, &ehdr, &shdr,
+			".gnu.linkonce.this_module", NULL);
+	if (!sec)
+		goto ret_err;
+
+	data = elf_getdata(sec, NULL);
+	if (!data || !data->d_buf)
+		goto ret_err;
+
+	mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
+
+ret_err:
+	elf_end(elf);
+elf_err:
+	close(fd);
+	return mod_name;
+}
+
 #ifdef HAVE_DWARF_SUPPORT
 
 static int kernel_get_module_dso(const char *module, struct dso **pdso)
@@ -486,8 +545,10 @@
 		return -errno;
 
 	elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
-	if (elf == NULL)
-		return -EINVAL;
+	if (elf == NULL) {
+		ret = -EINVAL;
+		goto out_close;
+	}
 
 	if (gelf_getehdr(elf, &ehdr) == NULL)
 		goto out;
@@ -499,6 +560,9 @@
 	ret = 0;
 out:
 	elf_end(elf);
+out_close:
+	close(fd);
+
 	return ret;
 }
 
@@ -583,32 +647,23 @@
 					    int ntevs, const char *module)
 {
 	int i, ret = 0;
-	char *tmp;
+	char *mod_name = NULL;
 
 	if (!module)
 		return 0;
 
-	tmp = strrchr(module, '/');
-	if (tmp) {
-		/* This is a module path -- get the module name */
-		module = strdup(tmp + 1);
-		if (!module)
-			return -ENOMEM;
-		tmp = strchr(module, '.');
-		if (tmp)
-			*tmp = '\0';
-		tmp = (char *)module;	/* For free() */
-	}
+	mod_name = find_module_name(module);
 
 	for (i = 0; i < ntevs; i++) {
-		tevs[i].point.module = strdup(module);
+		tevs[i].point.module =
+			strdup(mod_name ? mod_name : module);
 		if (!tevs[i].point.module) {
 			ret = -ENOMEM;
 			break;
 		}
 	}
 
-	free(tmp);
+	free(mod_name);
 	return ret;
 }
 
@@ -1618,69 +1673,65 @@
 }
 
 /* Compose only probe arg */
-int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
 {
 	struct perf_probe_arg_field *field = pa->field;
-	int ret;
-	char *tmp = buf;
+	struct strbuf buf;
+	char *ret = NULL;
+	int err;
+
+	if (strbuf_init(&buf, 64) < 0)
+		return NULL;
 
 	if (pa->name && pa->var)
-		ret = e_snprintf(tmp, len, "%s=%s", pa->name, pa->var);
+		err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
 	else
-		ret = e_snprintf(tmp, len, "%s", pa->name ? pa->name : pa->var);
-	if (ret <= 0)
-		goto error;
-	tmp += ret;
-	len -= ret;
+		err = strbuf_addstr(&buf, pa->name ?: pa->var);
+	if (err)
+		goto out;
 
 	while (field) {
 		if (field->name[0] == '[')
-			ret = e_snprintf(tmp, len, "%s", field->name);
+			err = strbuf_addstr(&buf, field->name);
 		else
-			ret = e_snprintf(tmp, len, "%s%s",
-					 field->ref ? "->" : ".", field->name);
-		if (ret <= 0)
-			goto error;
-		tmp += ret;
-		len -= ret;
+			err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+					  field->name);
 		field = field->next;
+		if (err)
+			goto out;
 	}
 
-	if (pa->type) {
-		ret = e_snprintf(tmp, len, ":%s", pa->type);
-		if (ret <= 0)
-			goto error;
-		tmp += ret;
-		len -= ret;
-	}
+	if (pa->type)
+		if (strbuf_addf(&buf, ":%s", pa->type) < 0)
+			goto out;
 
-	return tmp - buf;
-error:
-	pr_debug("Failed to synthesize perf probe argument: %d\n", ret);
+	ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
 	return ret;
 }
 
 /* Compose only probe point (not argument) */
 static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
 {
-	char *buf, *tmp;
-	char offs[32] = "", line[32] = "", file[32] = "";
-	int ret, len;
+	struct strbuf buf;
+	char *tmp, *ret = NULL;
+	int len, err = 0;
 
-	buf = zalloc(MAX_CMDLEN);
-	if (buf == NULL) {
-		ret = -ENOMEM;
-		goto error;
-	}
-	if (pp->offset) {
-		ret = e_snprintf(offs, 32, "+%lu", pp->offset);
-		if (ret <= 0)
-			goto error;
-	}
-	if (pp->line) {
-		ret = e_snprintf(line, 32, ":%d", pp->line);
-		if (ret <= 0)
-			goto error;
+	if (strbuf_init(&buf, 64) < 0)
+		return NULL;
+
+	if (pp->function) {
+		if (strbuf_addstr(&buf, pp->function) < 0)
+			goto out;
+		if (pp->offset)
+			err = strbuf_addf(&buf, "+%lu", pp->offset);
+		else if (pp->line)
+			err = strbuf_addf(&buf, ":%d", pp->line);
+		else if (pp->retprobe)
+			err = strbuf_addstr(&buf, "%return");
+		if (err)
+			goto out;
 	}
 	if (pp->file) {
 		tmp = pp->file;
@@ -1689,25 +1740,15 @@
 			tmp = strchr(pp->file + len - 30, '/');
 			tmp = tmp ? tmp + 1 : pp->file + len - 30;
 		}
-		ret = e_snprintf(file, 32, "@%s", tmp);
-		if (ret <= 0)
-			goto error;
+		err = strbuf_addf(&buf, "@%s", tmp);
+		if (!err && !pp->function && pp->line)
+			err = strbuf_addf(&buf, ":%d", pp->line);
 	}
-
-	if (pp->function)
-		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function,
-				 offs, pp->retprobe ? "%return" : "", line,
-				 file);
-	else
-		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line);
-	if (ret <= 0)
-		goto error;
-
-	return buf;
-error:
-	pr_debug("Failed to synthesize perf probe point: %d\n", ret);
-	free(buf);
-	return NULL;
+	if (!err)
+		ret = strbuf_detach(&buf, NULL);
+out:
+	strbuf_release(&buf);
+	return ret;
 }
 
 #if 0
@@ -1736,45 +1777,32 @@
 #endif
 
 static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
-					     char **buf, size_t *buflen,
-					     int depth)
+					    struct strbuf *buf, int depth)
 {
-	int ret;
+	int err;
 	if (ref->next) {
 		depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
-							 buflen, depth + 1);
+							 depth + 1);
 		if (depth < 0)
-			goto out;
+			return depth;
 	}
-
-	ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset);
-	if (ret < 0)
-		depth = ret;
-	else {
-		*buf += ret;
-		*buflen -= ret;
-	}
-out:
-	return depth;
-
+	err = strbuf_addf(buf, "%+ld(", ref->offset);
+	return (err < 0) ? err : depth;
 }
 
 static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
-				       char *buf, size_t buflen)
+				      struct strbuf *buf)
 {
 	struct probe_trace_arg_ref *ref = arg->ref;
-	int ret, depth = 0;
-	char *tmp = buf;
+	int depth = 0, err;
 
 	/* Argument name or separator */
 	if (arg->name)
-		ret = e_snprintf(buf, buflen, " %s=", arg->name);
+		err = strbuf_addf(buf, " %s=", arg->name);
 	else
-		ret = e_snprintf(buf, buflen, " ");
-	if (ret < 0)
-		return ret;
-	buf += ret;
-	buflen -= ret;
+		err = strbuf_addch(buf, ' ');
+	if (err)
+		return err;
 
 	/* Special case: @XXX */
 	if (arg->value[0] == '@' && arg->ref)
@@ -1782,59 +1810,44 @@
 
 	/* Dereferencing arguments */
 	if (ref) {
-		depth = __synthesize_probe_trace_arg_ref(ref, &buf,
-							  &buflen, 1);
+		depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
 		if (depth < 0)
 			return depth;
 	}
 
 	/* Print argument value */
 	if (arg->value[0] == '@' && arg->ref)
-		ret = e_snprintf(buf, buflen, "%s%+ld", arg->value,
-				 arg->ref->offset);
+		err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
 	else
-		ret = e_snprintf(buf, buflen, "%s", arg->value);
-	if (ret < 0)
-		return ret;
-	buf += ret;
-	buflen -= ret;
+		err = strbuf_addstr(buf, arg->value);
 
 	/* Closing */
-	while (depth--) {
-		ret = e_snprintf(buf, buflen, ")");
-		if (ret < 0)
-			return ret;
-		buf += ret;
-		buflen -= ret;
-	}
-	/* Print argument type */
-	if (arg->type) {
-		ret = e_snprintf(buf, buflen, ":%s", arg->type);
-		if (ret <= 0)
-			return ret;
-		buf += ret;
-	}
+	while (!err && depth--)
+		err = strbuf_addch(buf, ')');
 
-	return buf - tmp;
+	/* Print argument type */
+	if (!err && arg->type)
+		err = strbuf_addf(buf, ":%s", arg->type);
+
+	return err;
 }
 
 char *synthesize_probe_trace_command(struct probe_trace_event *tev)
 {
 	struct probe_trace_point *tp = &tev->point;
-	char *buf;
-	int i, len, ret;
-
-	buf = zalloc(MAX_CMDLEN);
-	if (buf == NULL)
-		return NULL;
-
-	len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
-			 tev->group, tev->event);
-	if (len <= 0)
-		goto error;
+	struct strbuf buf;
+	char *ret = NULL;
+	int i, err;
 
 	/* Uprobes must have tp->module */
 	if (tev->uprobes && !tp->module)
+		return NULL;
+
+	if (strbuf_init(&buf, 32) < 0)
+		return NULL;
+
+	if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+			tev->group, tev->event) < 0)
 		goto error;
 	/*
 	 * If tp->address == 0, then this point must be a
@@ -1849,34 +1862,25 @@
 
 	/* Use the tp->address for uprobes */
 	if (tev->uprobes)
-		ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx",
-				 tp->module, tp->address);
+		err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
 	else if (!strncmp(tp->symbol, "0x", 2))
 		/* Absolute address. See try_to_find_absolute_address() */
-		ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx",
-				 tp->module ?: "", tp->module ? ":" : "",
-				 tp->address);
+		err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
+				  tp->module ? ":" : "", tp->address);
 	else
-		ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu",
-				 tp->module ?: "", tp->module ? ":" : "",
-				 tp->symbol, tp->offset);
-
-	if (ret <= 0)
+		err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
+				tp->module ? ":" : "", tp->symbol, tp->offset);
+	if (err)
 		goto error;
-	len += ret;
 
-	for (i = 0; i < tev->nargs; i++) {
-		ret = synthesize_probe_trace_arg(&tev->args[i], buf + len,
-						  MAX_CMDLEN - len);
-		if (ret <= 0)
+	for (i = 0; i < tev->nargs; i++)
+		if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0)
 			goto error;
-		len += ret;
-	}
 
-	return buf;
+	ret = strbuf_detach(&buf, NULL);
 error:
-	free(buf);
-	return NULL;
+	strbuf_release(&buf);
+	return ret;
 }
 
 static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
@@ -1958,7 +1962,7 @@
 static int convert_to_perf_probe_event(struct probe_trace_event *tev,
 			       struct perf_probe_event *pev, bool is_kprobe)
 {
-	char buf[64] = "";
+	struct strbuf buf = STRBUF_INIT;
 	int i, ret;
 
 	/* Convert event/group name */
@@ -1981,14 +1985,15 @@
 		if (tev->args[i].name)
 			pev->args[i].name = strdup(tev->args[i].name);
 		else {
-			ret = synthesize_probe_trace_arg(&tev->args[i],
-							  buf, 64);
-			pev->args[i].name = strdup(buf);
+			if ((ret = strbuf_init(&buf, 32)) < 0)
+				goto error;
+			ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+			pev->args[i].name = strbuf_detach(&buf, NULL);
 		}
 		if (pev->args[i].name == NULL && ret >= 0)
 			ret = -ENOMEM;
 	}
-
+error:
 	if (ret < 0)
 		clear_perf_probe_event(pev);
 
@@ -2162,35 +2167,38 @@
 				     struct strbuf *result)
 {
 	int i, ret;
-	char buf[128];
-	char *place;
+	char *buf;
+
+	if (asprintf(&buf, "%s:%s", group, event) < 0)
+		return -errno;
+	ret = strbuf_addf(result, "  %-20s (on ", buf);
+	free(buf);
+	if (ret)
+		return ret;
 
 	/* Synthesize only event probe point */
-	place = synthesize_perf_probe_point(&pev->point);
-	if (!place)
-		return -EINVAL;
+	buf = synthesize_perf_probe_point(&pev->point);
+	if (!buf)
+		return -ENOMEM;
+	ret = strbuf_addstr(result, buf);
+	free(buf);
 
-	ret = e_snprintf(buf, 128, "%s:%s", group, event);
-	if (ret < 0)
-		goto out;
+	if (!ret && module)
+		ret = strbuf_addf(result, " in %s", module);
 
-	strbuf_addf(result, "  %-20s (on %s", buf, place);
-	if (module)
-		strbuf_addf(result, " in %s", module);
-
-	if (pev->nargs > 0) {
-		strbuf_add(result, " with", 5);
-		for (i = 0; i < pev->nargs; i++) {
-			ret = synthesize_perf_probe_arg(&pev->args[i],
-							buf, 128);
-			if (ret < 0)
-				goto out;
-			strbuf_addf(result, " %s", buf);
+	if (!ret && pev->nargs > 0) {
+		ret = strbuf_add(result, " with", 5);
+		for (i = 0; !ret && i < pev->nargs; i++) {
+			buf = synthesize_perf_probe_arg(&pev->args[i]);
+			if (!buf)
+				return -ENOMEM;
+			ret = strbuf_addf(result, " %s", buf);
+			free(buf);
 		}
 	}
-	strbuf_addch(result, ')');
-out:
-	free(place);
+	if (!ret)
+		ret = strbuf_addch(result, ')');
+
 	return ret;
 }
 
@@ -2498,7 +2506,8 @@
 
 void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
 				struct probe_trace_event *tev __maybe_unused,
-				struct map *map __maybe_unused) { }
+				struct map *map __maybe_unused,
+				struct symbol *sym __maybe_unused) { }
 
 /*
  * Find probe function addresses from map.
@@ -2516,6 +2525,7 @@
 	struct probe_trace_point *tp;
 	int num_matched_functions;
 	int ret, i, j, skipped = 0;
+	char *mod_name;
 
 	map = get_target_map(pev->target, pev->uprobes);
 	if (!map) {
@@ -2600,9 +2610,19 @@
 		tp->realname = strdup_or_goto(sym->name, nomem_out);
 
 		tp->retprobe = pp->retprobe;
-		if (pev->target)
-			tev->point.module = strdup_or_goto(pev->target,
-							   nomem_out);
+		if (pev->target) {
+			if (pev->uprobes) {
+				tev->point.module = strdup_or_goto(pev->target,
+								   nomem_out);
+			} else {
+				mod_name = find_module_name(pev->target);
+				tev->point.module =
+					strdup(mod_name ? mod_name : pev->target);
+				free(mod_name);
+				if (!tev->point.module)
+					goto nomem_out;
+			}
+		}
 		tev->uprobes = pev->uprobes;
 		tev->nargs = pev->nargs;
 		if (tev->nargs) {
@@ -2624,7 +2644,7 @@
 					strdup_or_goto(pev->args[i].type,
 							nomem_out);
 		}
-		arch__fix_tev_from_maps(pev, tev, map);
+		arch__fix_tev_from_maps(pev, tev, map, sym);
 	}
 	if (ret == skipped) {
 		ret = -ENOENT;
@@ -2743,9 +2763,13 @@
 {
 	int ret;
 
-	if (pev->uprobes && !pev->group) {
-		/* Replace group name if not given */
-		ret = convert_exec_to_group(pev->target, &pev->group);
+	if (!pev->group) {
+		/* Set group name if not given */
+		if (!pev->uprobes) {
+			pev->group = strdup(PERFPROBE_GROUP);
+			ret = pev->group ? 0 : -ENOMEM;
+		} else
+			ret = convert_exec_to_group(pev->target, &pev->group);
 		if (ret != 0) {
 			pr_warning("Failed to make a group name.\n");
 			return ret;

diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e54e7b0..5a27eb4 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h

@@ -120,7 +120,7 @@
 /* Events to command string */
 char *synthesize_perf_probe_command(struct perf_probe_event *pev);
 char *synthesize_probe_trace_command(struct probe_trace_event *tev);
-int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
 
 /* Check the perf_probe_event needs debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
@@ -154,7 +154,8 @@
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
 bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
-			     struct probe_trace_event *tev, struct map *map);
+			     struct probe_trace_event *tev, struct map *map,
+			     struct symbol *sym);
 
 /* If there is no space to write, returns -E2BIG. */
 int e_snprintf(char *str, size_t size, const char *format, ...)

diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index e3b3b92..3fe6214 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c

@@ -220,8 +220,7 @@
 
 	pr_debug("Writing event: %s\n", buf);
 	if (!probe_event_dry_run) {
-		ret = write(fd, buf, strlen(buf));
-		if (ret <= 0) {
+		if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
 			ret = -errno;
 			pr_warning("Failed to write event: %s\n",
 				   strerror_r(errno, sbuf, sizeof(sbuf)));

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b3bd0fb..1259839 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c

@@ -553,7 +553,7 @@
 static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
 {
 	Dwarf_Die vr_die;
-	char buf[32], *ptr;
+	char *buf, *ptr;
 	int ret = 0;
 
 	/* Copy raw parameters */
@@ -563,13 +563,13 @@
 	if (pf->pvar->name)
 		pf->tvar->name = strdup(pf->pvar->name);
 	else {
-		ret = synthesize_perf_probe_arg(pf->pvar, buf, 32);
-		if (ret < 0)
-			return ret;
+		buf = synthesize_perf_probe_arg(pf->pvar);
+		if (!buf)
+			return -ENOMEM;
 		ptr = strchr(buf, ':');	/* Change type separator to _ */
 		if (ptr)
 			*ptr = '_';
-		pf->tvar->name = strdup(buf);
+		pf->tvar->name = buf;
 	}
 	if (pf->tvar->name == NULL)
 		return -ENOMEM;
@@ -1294,6 +1294,7 @@
 {
 	struct available_var_finder *af = data;
 	struct variable_list *vl;
+	struct strbuf buf = STRBUF_INIT;
 	int tag, ret;
 
 	vl = &af->vls[af->nvls - 1];
@@ -1307,25 +1308,26 @@
 		if (ret == 0 || ret == -ERANGE) {
 			int ret2;
 			bool externs = !af->child;
-			struct strbuf buf;
 
-			strbuf_init(&buf, 64);
+			if (strbuf_init(&buf, 64) < 0)
+				goto error;
 
 			if (probe_conf.show_location_range) {
-				if (!externs) {
-					if (ret)
-						strbuf_add(&buf, "[INV]\t", 6);
-					else
-						strbuf_add(&buf, "[VAL]\t", 6);
-				} else
-					strbuf_add(&buf, "[EXT]\t", 6);
+				if (!externs)
+					ret2 = strbuf_add(&buf,
+						ret ? "[INV]\t" : "[VAL]\t", 6);
+				else
+					ret2 = strbuf_add(&buf, "[EXT]\t", 6);
+				if (ret2)
+					goto error;
 			}
 
 			ret2 = die_get_varname(die_mem, &buf);
 
 			if (!ret2 && probe_conf.show_location_range &&
 				!externs) {
-				strbuf_addch(&buf, '\t');
+				if (strbuf_addch(&buf, '\t') < 0)
+					goto error;
 				ret2 = die_get_var_range(&af->pf.sp_die,
 							die_mem, &buf);
 			}
@@ -1343,6 +1345,10 @@
 		return DIE_FIND_CB_CONTINUE;
 	else
 		return DIE_FIND_CB_SIBLING;
+error:
+	strbuf_release(&buf);
+	pr_debug("Error in strbuf\n");
+	return DIE_FIND_CB_END;
 }
 
 /* Add a found vars into available variables list */

diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 8162ba0..36c6862 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources

@@ -23,3 +23,4 @@
 util/trace-event.c
 ../lib/rbtree.c
 util/string.c
+util/symbol_fprintf.c

diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
index 01f0324..c6d4ee2 100644
--- a/tools/perf/util/quote.c
+++ b/tools/perf/util/quote.c

@@ -17,38 +17,42 @@
 	return (c == '\'' || c == '!');
 }
 
-static void sq_quote_buf(struct strbuf *dst, const char *src)
+static int sq_quote_buf(struct strbuf *dst, const char *src)
 {
 	char *to_free = NULL;
+	int ret;
 
 	if (dst->buf == src)
 		to_free = strbuf_detach(dst, NULL);
 
-	strbuf_addch(dst, '\'');
-	while (*src) {
+	ret = strbuf_addch(dst, '\'');
+	while (!ret && *src) {
 		size_t len = strcspn(src, "'!");
-		strbuf_add(dst, src, len);
+		ret = strbuf_add(dst, src, len);
 		src += len;
-		while (need_bs_quote(*src)) {
-			strbuf_addstr(dst, "'\\");
-			strbuf_addch(dst, *src++);
-			strbuf_addch(dst, '\'');
-		}
+		while (!ret && need_bs_quote(*src))
+			ret = strbuf_addf(dst, "'\\%c\'", *src++);
 	}
-	strbuf_addch(dst, '\'');
+	if (!ret)
+		ret = strbuf_addch(dst, '\'');
 	free(to_free);
+
+	return ret;
 }
 
-void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
 {
-	int i;
+	int i, ret;
 
 	/* Copy into destination buffer. */
-	strbuf_grow(dst, 255);
-	for (i = 0; argv[i]; ++i) {
-		strbuf_addch(dst, ' ');
-		sq_quote_buf(dst, argv[i]);
+	ret = strbuf_grow(dst, 255);
+	for (i = 0; !ret && argv[i]; ++i) {
+		ret = strbuf_addch(dst, ' ');
+		if (ret)
+			break;
+		ret = sq_quote_buf(dst, argv[i]);
 		if (maxlen && dst->len > maxlen)
 			die("Too many or long arguments");
 	}
+	return ret;
 }

diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index 3340c9c..e1ec191 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h

@@ -24,6 +24,6 @@
  * sq_quote() in a real application.
  */
 
-void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
 
 #endif /* __PERF_QUOTE_H */

diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644
index 0000000..abc76e3
--- /dev/null
+++ b/tools/perf/util/rb_resort.h

@@ -0,0 +1,149 @@
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+				    b->thread->shortname) < 0,
+	struct thread *thread;
+)
+{
+	entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+	DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+	struct rb_node *nd;
+
+	resort_rb__for_each(nd, threads) {
+		struct thread *t = threads_entry;
+		printf("%s: %d\n", t->shortname, t->tid);
+	}
+
+ * Then delete it:
+
+	resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * 	struct threads_sorted_entry {}
+ * 	threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...)					\
+struct __name##_sorted_entry {							\
+	struct rb_node	rb_node;						\
+	__VA_ARGS__								\
+};										\
+static void __name##_sorted__init_entry(struct rb_node *nd,			\
+					struct __name##_sorted_entry *entry);	\
+										\
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb)	\
+{										\
+	struct __name##_sorted_entry *a, *b;					\
+	a = rb_entry(nda, struct __name##_sorted_entry, rb_node);		\
+	b = rb_entry(ndb, struct __name##_sorted_entry, rb_node);		\
+	return __comp;								\
+}										\
+										\
+struct __name##_sorted {							\
+       struct rb_root		    entries;					\
+       struct __name##_sorted_entry nd[0];					\
+};										\
+										\
+static void __name##_sorted__insert(struct __name##_sorted *sorted,		\
+				      struct rb_node *sorted_nd)		\
+{										\
+	struct rb_node **p = &sorted->entries.rb_node, *parent = NULL;		\
+	while (*p != NULL) {							\
+		parent = *p;							\
+		if (__name##_sorted__cmp(sorted_nd, parent))			\
+			p = &(*p)->rb_left;					\
+		else								\
+			p = &(*p)->rb_right;					\
+	}									\
+	rb_link_node(sorted_nd, parent, p);					\
+	rb_insert_color(sorted_nd, &sorted->entries);				\
+}										\
+										\
+static void __name##_sorted__sort(struct __name##_sorted *sorted,		\
+				    struct rb_root *entries)			\
+{										\
+	struct rb_node *nd;							\
+	unsigned int i = 0;							\
+	for (nd = rb_first(entries); nd; nd = rb_next(nd)) {			\
+		struct __name##_sorted_entry *snd = &sorted->nd[i++];		\
+		__name##_sorted__init_entry(nd, snd);				\
+		__name##_sorted__insert(sorted, &snd->rb_node);			\
+	}									\
+}										\
+										\
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries,	\
+						    int nr_entries)		\
+{										\
+	struct __name##_sorted *sorted;						\
+	sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries);	\
+	if (sorted) {								\
+		sorted->entries = RB_ROOT;					\
+		__name##_sorted__sort(sorted, entries);				\
+	}									\
+	return sorted;								\
+}										\
+										\
+static void __name##_sorted__delete(struct __name##_sorted *sorted)		\
+{										\
+	free(sorted);								\
+}										\
+										\
+static void __name##_sorted__init_entry(struct rb_node *nd,			\
+					struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name)						\
+struct __name##_sorted_entry *__name##_entry;					\
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each(__nd, __name)					\
+	for (__nd = rb_first(&__name->entries);					\
+	     __name##_entry = rb_entry(__nd, struct __name##_sorted_entry,	\
+				       rb_node), __nd;				\
+	     __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name)						\
+	__name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist)				\
+	DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries,			\
+				  __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine)			\
+	DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+
+#endif /* _PERF_RESORT_RB_H_ */

diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 0467367..481792c 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c

@@ -129,7 +129,8 @@
 	return true;
 }
 
-void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+			 struct callchain_param *callchain)
 {
 	struct perf_evsel *evsel;
 	bool use_sample_identifier = false;
@@ -148,7 +149,7 @@
 	use_comm_exec = perf_can_comm_exec();
 
 	evlist__for_each(evlist, evsel) {
-		perf_evsel__config(evsel, opts);
+		perf_evsel__config(evsel, opts, callchain);
 		if (evsel->tracking && use_comm_exec)
 			evsel->attr.comm_exec = 1;
 	}

diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b3aabc0..62c7f69 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c

@@ -31,6 +31,8 @@
 #include <perl.h>
 
 #include "../../perf.h"
+#include "../callchain.h"
+#include "../machine.h"
 #include "../thread.h"
 #include "../event.h"
 #include "../trace-event.h"
@@ -248,10 +250,90 @@
 		define_event_symbols(event, ev_name, args->next);
 }
 
+static SV *perl_process_callchain(struct perf_sample *sample,
+				  struct perf_evsel *evsel,
+				  struct addr_location *al)
+{
+	AV *list;
+
+	list = newAV();
+	if (!list)
+		goto exit;
+
+	if (!symbol_conf.use_callchain || !sample->callchain)
+		goto exit;
+
+	if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+				      sample, NULL, NULL,
+				      sysctl_perf_event_max_stack) != 0) {
+		pr_err("Failed to resolve callchain. Skipping\n");
+		goto exit;
+	}
+	callchain_cursor_commit(&callchain_cursor);
+
+
+	while (1) {
+		HV *elem;
+		struct callchain_cursor_node *node;
+		node = callchain_cursor_current(&callchain_cursor);
+		if (!node)
+			break;
+
+		elem = newHV();
+		if (!elem)
+			goto exit;
+
+		if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+			hv_undef(elem);
+			goto exit;
+		}
+
+		if (node->sym) {
+			HV *sym = newHV();
+			if (!sym) {
+				hv_undef(elem);
+				goto exit;
+			}
+			if (!hv_stores(sym, "start",   newSVuv(node->sym->start)) ||
+			    !hv_stores(sym, "end",     newSVuv(node->sym->end)) ||
+			    !hv_stores(sym, "binding", newSVuv(node->sym->binding)) ||
+			    !hv_stores(sym, "name",    newSVpvn(node->sym->name,
+								node->sym->namelen)) ||
+			    !hv_stores(elem, "sym",    newRV_noinc((SV*)sym))) {
+				hv_undef(sym);
+				hv_undef(elem);
+				goto exit;
+			}
+		}
+
+		if (node->map) {
+			struct map *map = node->map;
+			const char *dsoname = "[unknown]";
+			if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+				if (symbol_conf.show_kernel_path && map->dso->long_name)
+					dsoname = map->dso->long_name;
+				else if (map->dso->name)
+					dsoname = map->dso->name;
+			}
+			if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+				hv_undef(elem);
+				goto exit;
+			}
+		}
+
+		callchain_cursor_advance(&callchain_cursor);
+		av_push(list, newRV_noinc((SV*)elem));
+	}
+
+exit:
+	return newRV_noinc((SV*)list);
+}
+
 static void perl_process_tracepoint(struct perf_sample *sample,
 				    struct perf_evsel *evsel,
-				    struct thread *thread)
+				    struct addr_location *al)
 {
+	struct thread *thread = al->thread;
 	struct event_format *event = evsel->tp_format;
 	struct format_field *field;
 	static char handler[256];
@@ -295,6 +377,7 @@
 	XPUSHs(sv_2mortal(newSVuv(ns)));
 	XPUSHs(sv_2mortal(newSViv(pid)));
 	XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+	XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
 
 	/* common fields other than pid can be accessed via xsub fns */
 
@@ -329,6 +412,7 @@
 		XPUSHs(sv_2mortal(newSVuv(nsecs)));
 		XPUSHs(sv_2mortal(newSViv(pid)));
 		XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+		XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
 		call_pv("main::trace_unhandled", G_SCALAR);
 	}
 	SPAGAIN;
@@ -366,7 +450,7 @@
 			       struct perf_evsel *evsel,
 			       struct addr_location *al)
 {
-	perl_process_tracepoint(sample, evsel, al->thread);
+	perl_process_tracepoint(sample, evsel, al);
 	perl_process_event_generic(event, sample, evsel);
 }
 
@@ -490,7 +574,27 @@
 	fprintf(ofp, "use Perf::Trace::Util;\n\n");
 
 	fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
-	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
+	fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+	fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+	my $callchain = shift;\n\
+	for my $node (@$callchain)\n\
+	{\n\
+		if(exists $node->{sym})\n\
+		{\n\
+			printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+		}\n\
+		else\n\
+		{\n\
+			printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+		}\n\
+	}\n\
+}\n\n\
+");
+
 
 	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
@@ -502,7 +606,8 @@
 		fprintf(ofp, "$common_secs, ");
 		fprintf(ofp, "$common_nsecs,\n");
 		fprintf(ofp, "\t    $common_pid, ");
-		fprintf(ofp, "$common_comm,\n\t    ");
+		fprintf(ofp, "$common_comm, ");
+		fprintf(ofp, "$common_callchain,\n\t    ");
 
 		not_first = 0;
 		count = 0;
@@ -519,7 +624,7 @@
 
 		fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
 			"$common_secs, $common_nsecs,\n\t             "
-			"$common_pid, $common_comm);\n\n");
+			"$common_pid, $common_comm, $common_callchain);\n\n");
 
 		fprintf(ofp, "\tprintf(\"");
 
@@ -581,17 +686,22 @@
 				fprintf(ofp, "$%s", f->name);
 		}
 
-		fprintf(ofp, ");\n");
+		fprintf(ofp, ");\n\n");
+
+		fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
 		fprintf(ofp, "}\n\n");
 	}
 
 	fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
 		"$common_cpu, $common_secs, $common_nsecs,\n\t    "
-		"$common_pid, $common_comm) = @_;\n\n");
+		"$common_pid, $common_comm, $common_callchain) = @_;\n\n");
 
 	fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
 		"$common_secs, $common_nsecs,\n\t             $common_pid, "
-		"$common_comm);\n}\n\n");
+		"$common_comm, $common_callchain);\n");
+	fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+	fprintf(ofp, "}\n\n");
 
 	fprintf(ofp, "sub print_header\n{\n"
 		"\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index fbd0524..ff13470 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c

@@ -41,6 +41,7 @@
 #include "../thread-stack.h"
 #include "../trace-event.h"
 #include "../machine.h"
+#include "../call-path.h"
 #include "thread_map.h"
 #include "cpumap.h"
 #include "stat.h"
@@ -323,7 +324,7 @@
 	if (!symbol_conf.use_callchain || !sample->callchain)
 		goto exit;
 
-	if (thread__resolve_callchain(al->thread, evsel,
+	if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
 				      sample, NULL, NULL,
 				      scripting_max_stack) != 0) {
 		pr_err("Failed to resolve callchain. Skipping\n");
@@ -407,8 +408,11 @@
 	if (!t)
 		Py_FatalError("couldn't create Python tuple");
 
-	if (!event)
-		die("ug! no event found for type %d", (int)evsel->attr.config);
+	if (!event) {
+		snprintf(handler_name, sizeof(handler_name),
+			 "ug! no event found for type %" PRIu64, (u64)evsel->attr.config);
+		Py_FatalError(handler_name);
+	}
 
 	pid = raw_field_value(event, "common_pid", data);
 
@@ -614,7 +618,7 @@
 			     struct machine *machine)
 {
 	struct tables *tables = container_of(dbe, struct tables, dbe);
-	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+	char sbuild_id[SBUILD_ID_SIZE];
 	PyObject *t;
 
 	build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
@@ -681,7 +685,7 @@
 	struct tables *tables = container_of(dbe, struct tables, dbe);
 	PyObject *t;
 
-	t = tuple_new(21);
+	t = tuple_new(22);
 
 	tuple_set_u64(t, 0, es->db_id);
 	tuple_set_u64(t, 1, es->evsel->db_id);
@@ -704,6 +708,7 @@
 	tuple_set_u64(t, 18, es->sample->data_src);
 	tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
 	tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+	tuple_set_u64(t, 21, es->call_path_id);
 
 	call_object(tables->sample_handler, t, "sample_table");
 
@@ -998,8 +1003,10 @@
 {
 	const char *perf_db_export_mode = "perf_db_export_mode";
 	const char *perf_db_export_calls = "perf_db_export_calls";
-	PyObject *db_export_mode, *db_export_calls;
+	const char *perf_db_export_callchains = "perf_db_export_callchains";
+	PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
 	bool export_calls = false;
+	bool export_callchains = false;
 	int ret;
 
 	memset(tables, 0, sizeof(struct tables));
@@ -1016,6 +1023,7 @@
 	if (!ret)
 		return;
 
+	/* handle export calls */
 	tables->dbe.crp = NULL;
 	db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
 	if (db_export_calls) {
@@ -1033,6 +1041,33 @@
 			Py_FatalError("failed to create calls processor");
 	}
 
+	/* handle export callchains */
+	tables->dbe.cpr = NULL;
+	db_export_callchains = PyDict_GetItemString(main_dict,
+						    perf_db_export_callchains);
+	if (db_export_callchains) {
+		ret = PyObject_IsTrue(db_export_callchains);
+		if (ret == -1)
+			handler_call_die(perf_db_export_callchains);
+		export_callchains = !!ret;
+	}
+
+	if (export_callchains) {
+		/*
+		 * Attempt to use the call path root from the call return
+		 * processor, if the call return processor is in use. Otherwise,
+		 * we allocate a new call path root. This prevents exporting
+		 * duplicate call path ids when both are in use simultaniously.
+		 */
+		if (tables->dbe.crp)
+			tables->dbe.cpr = tables->dbe.crp->cpr;
+		else
+			tables->dbe.cpr = call_path_root__new();
+
+		if (!tables->dbe.cpr)
+			Py_FatalError("failed to create call path root");
+	}
+
 	tables->db_export_mode = true;
 	/*
 	 * Reserve per symbol space for symbol->db_id via symbol__priv()

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4abd85c..2335b28 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c

@@ -409,6 +409,8 @@
 		tool->stat = process_stat_stub;
 	if (tool->stat_round == NULL)
 		tool->stat_round = process_stat_round_stub;
+	if (tool->time_conv == NULL)
+		tool->time_conv = process_event_op2_stub;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -794,6 +796,7 @@
 	[PERF_RECORD_STAT]		  = perf_event__stat_swap,
 	[PERF_RECORD_STAT_ROUND]	  = perf_event__stat_round_swap,
 	[PERF_RECORD_EVENT_UPDATE]	  = perf_event__event_update_swap,
+	[PERF_RECORD_TIME_CONV]		  = perf_event__all64_swap,
 	[PERF_RECORD_HEADER_MAX]	  = NULL,
 };
 
@@ -904,7 +907,7 @@
 	unsigned int i;
 	struct ip_callchain *callchain = sample->callchain;
 
-	if (has_branch_callstack(evsel))
+	if (perf_evsel__has_branch_callstack(evsel))
 		callchain__lbr_callstack_printf(sample);
 
 	printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
@@ -1078,7 +1081,7 @@
 	if (sample_type & PERF_SAMPLE_CALLCHAIN)
 		callchain__printf(evsel, sample);
 
-	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel))
+	if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
 		branch_stack__printf(sample);
 
 	if (sample_type & PERF_SAMPLE_REGS_USER)
@@ -1341,6 +1344,9 @@
 		return tool->stat(tool, event, session);
 	case PERF_RECORD_STAT_ROUND:
 		return tool->stat_round(tool, event, session);
+	case PERF_RECORD_TIME_CONV:
+		session->time_conv = event->time_conv;
+		return tool->time_conv(tool, event, session);
 	default:
 		return -EINVAL;
 	}
@@ -1830,7 +1836,11 @@
 out_err:
 	ui_progress__finish();
 	perf_session__warn_about_errors(session);
-	ordered_events__free(&session->ordered_events);
+	/*
+	 * We may switching perf.data output, make ordered_events
+	 * reusable.
+	 */
+	ordered_events__reinit(&session->ordered_events);
 	auxtrace__free_events(session);
 	session->one_mmap = false;
 	return err;
@@ -1947,105 +1957,6 @@
 	return NULL;
 }
 
-void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
-			  struct addr_location *al,
-			  unsigned int print_opts, unsigned int stack_depth)
-{
-	struct callchain_cursor_node *node;
-	int print_ip = print_opts & PRINT_IP_OPT_IP;
-	int print_sym = print_opts & PRINT_IP_OPT_SYM;
-	int print_dso = print_opts & PRINT_IP_OPT_DSO;
-	int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET;
-	int print_oneline = print_opts & PRINT_IP_OPT_ONELINE;
-	int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE;
-	char s = print_oneline ? ' ' : '\t';
-
-	if (symbol_conf.use_callchain && sample->callchain) {
-		struct addr_location node_al;
-
-		if (thread__resolve_callchain(al->thread, evsel,
-					      sample, NULL, NULL,
-					      stack_depth) != 0) {
-			if (verbose)
-				error("Failed to resolve callchain. Skipping\n");
-			return;
-		}
-		callchain_cursor_commit(&callchain_cursor);
-
-		if (print_symoffset)
-			node_al = *al;
-
-		while (stack_depth) {
-			u64 addr = 0;
-
-			node = callchain_cursor_current(&callchain_cursor);
-			if (!node)
-				break;
-
-			if (node->sym && node->sym->ignore)
-				goto next;
-
-			if (print_ip)
-				printf("%c%16" PRIx64, s, node->ip);
-
-			if (node->map)
-				addr = node->map->map_ip(node->map, node->ip);
-
-			if (print_sym) {
-				printf(" ");
-				if (print_symoffset) {
-					node_al.addr = addr;
-					node_al.map  = node->map;
-					symbol__fprintf_symname_offs(node->sym, &node_al, stdout);
-				} else
-					symbol__fprintf_symname(node->sym, stdout);
-			}
-
-			if (print_dso) {
-				printf(" (");
-				map__fprintf_dsoname(node->map, stdout);
-				printf(")");
-			}
-
-			if (print_srcline)
-				map__fprintf_srcline(node->map, addr, "\n  ",
-						     stdout);
-
-			if (!print_oneline)
-				printf("\n");
-
-			stack_depth--;
-next:
-			callchain_cursor_advance(&callchain_cursor);
-		}
-
-	} else {
-		if (al->sym && al->sym->ignore)
-			return;
-
-		if (print_ip)
-			printf("%16" PRIx64, sample->ip);
-
-		if (print_sym) {
-			printf(" ");
-			if (print_symoffset)
-				symbol__fprintf_symname_offs(al->sym, al,
-							     stdout);
-			else
-				symbol__fprintf_symname(al->sym, stdout);
-		}
-
-		if (print_dso) {
-			printf(" (");
-			map__fprintf_dsoname(al->map, stdout);
-			printf(")");
-		}
-
-		if (print_srcline)
-			map__fprintf_srcline(al->map, al->addr, "\n  ", stdout);
-	}
-}
-
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap)
 {

diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5f792e3..4bd7585 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h

@@ -26,6 +26,7 @@
 	struct itrace_synth_opts *itrace_synth_opts;
 	struct list_head	auxtrace_index;
 	struct trace_event	tevent;
+	struct time_conv_event	time_conv;
 	bool			repipe;
 	bool			one_mmap;
 	void			*one_mmap_addr;
@@ -35,13 +36,6 @@
 	struct perf_tool	*tool;
 };
 
-#define PRINT_IP_OPT_IP		(1<<0)
-#define PRINT_IP_OPT_SYM		(1<<1)
-#define PRINT_IP_OPT_DSO		(1<<2)
-#define PRINT_IP_OPT_SYMOFFSET	(1<<3)
-#define PRINT_IP_OPT_ONELINE	(1<<4)
-#define PRINT_IP_OPT_SRCLINE	(1<<5)
-
 struct perf_tool;
 
 struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -103,10 +97,6 @@
 struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
 					    unsigned int type);
 
-void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
-			  struct addr_location *al,
-			  unsigned int print_opts, unsigned int stack_depth);
-
 int perf_session__cpu_bitmap(struct perf_session *session,
 			     const char *cpu_list, unsigned long *cpu_bitmap);
 

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f5ba111..20e69ed 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c

@@ -21,13 +21,6 @@
 const char	*field_order;
 regex_t		ignore_callees_regex;
 int		have_ignore_callees = 0;
-int		sort__need_collapse = 0;
-int		sort__has_parent = 0;
-int		sort__has_sym = 0;
-int		sort__has_dso = 0;
-int		sort__has_socket = 0;
-int		sort__has_thread = 0;
-int		sort__has_comm = 0;
 enum sort_mode	sort__mode = SORT_MODE__NORMAL;
 
 /*
@@ -244,7 +237,7 @@
 	 * comparing symbol address alone is not enough since it's a
 	 * relative address within a dso.
 	 */
-	if (!sort__has_dso) {
+	if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
 		ret = sort__dso_cmp(left, right);
 		if (ret != 0)
 			return ret;
@@ -2163,7 +2156,7 @@
 		return -1;
 
 	if (sd->entry->se_collapse)
-		sort__need_collapse = 1;
+		list->need_collapse = 1;
 
 	sd->taken = 1;
 
@@ -2245,9 +2238,9 @@
 				pr_err("Invalid regex: %s\n%s", parent_pattern, err);
 				return -EINVAL;
 			}
-			sort__has_parent = 1;
+			list->parent = 1;
 		} else if (sd->entry == &sort_sym) {
-			sort__has_sym = 1;
+			list->sym = 1;
 			/*
 			 * perf diff displays the performance difference amongst
 			 * two or more perf.data files. Those files could come
@@ -2258,13 +2251,13 @@
 				sd->entry->se_collapse = sort__sym_sort;
 
 		} else if (sd->entry == &sort_dso) {
-			sort__has_dso = 1;
+			list->dso = 1;
 		} else if (sd->entry == &sort_socket) {
-			sort__has_socket = 1;
+			list->socket = 1;
 		} else if (sd->entry == &sort_thread) {
-			sort__has_thread = 1;
+			list->thread = 1;
 		} else if (sd->entry == &sort_comm) {
-			sort__has_comm = 1;
+			list->comm = 1;
 		}
 
 		return __sort_dimension__add(sd, list, level);
@@ -2289,7 +2282,7 @@
 			return -EINVAL;
 
 		if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
-			sort__has_sym = 1;
+			list->sym = 1;
 
 		__sort_dimension__add(sd, list, level);
 		return 0;
@@ -2305,7 +2298,7 @@
 			return -EINVAL;
 
 		if (sd->entry == &sort_mem_daddr_sym)
-			sort__has_sym = 1;
+			list->sym = 1;
 
 		__sort_dimension__add(sd, list, level);
 		return 0;
@@ -2749,10 +2742,10 @@
 
 void reset_output_field(void)
 {
-	sort__need_collapse = 0;
-	sort__has_parent = 0;
-	sort__has_sym = 0;
-	sort__has_dso = 0;
+	perf_hpp_list.need_collapse = 0;
+	perf_hpp_list.parent = 0;
+	perf_hpp_list.sym = 0;
+	perf_hpp_list.dso = 0;
 
 	field_order = NULL;
 	sort_order = NULL;

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3f4e359..42927f4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h

@@ -31,13 +31,6 @@
 extern const char default_sort_order[];
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
-extern int sort__need_collapse;
-extern int sort__has_dso;
-extern int sort__has_parent;
-extern int sort__has_sym;
-extern int sort__has_socket;
-extern int sort__has_thread;
-extern int sort__has_comm;
 extern enum sort_mode sort__mode;
 extern struct sort_entry sort_comm;
 extern struct sort_entry sort_dso;

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 4d9b481..ffa1d06 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c

@@ -307,6 +307,7 @@
 	struct perf_counts_values *aggr = &counter->counts->aggr;
 	struct perf_stat_evsel *ps = counter->priv;
 	u64 *count = counter->counts->aggr.values;
+	u64 val;
 	int i, ret;
 
 	aggr->val = aggr->ena = aggr->run = 0;
@@ -346,7 +347,8 @@
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	perf_stat__update_shadow_stats(counter, count, 0);
+	val = counter->scale * *count;
+	perf_stat__update_shadow_stats(counter, &val, 0);
 
 	return 0;
 }

diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 8fb7329..f95f682 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c

@@ -1,3 +1,4 @@
+#include "debug.h"
 #include "cache.h"
 #include <linux/kernel.h>
 
@@ -17,12 +18,13 @@
  */
 char strbuf_slopbuf[1];
 
-void strbuf_init(struct strbuf *sb, ssize_t hint)
+int strbuf_init(struct strbuf *sb, ssize_t hint)
 {
 	sb->alloc = sb->len = 0;
 	sb->buf = strbuf_slopbuf;
 	if (hint)
-		strbuf_grow(sb, hint);
+		return strbuf_grow(sb, hint);
+	return 0;
 }
 
 void strbuf_release(struct strbuf *sb)
@@ -42,67 +44,104 @@
 	return res;
 }
 
-void strbuf_grow(struct strbuf *sb, size_t extra)
+int strbuf_grow(struct strbuf *sb, size_t extra)
 {
-	if (sb->len + extra + 1 <= sb->len)
-		die("you want to use way too much memory");
-	if (!sb->alloc)
-		sb->buf = NULL;
-	ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+	char *buf;
+	size_t nr = sb->len + extra + 1;
+
+	if (nr < sb->alloc)
+		return 0;
+
+	if (nr <= sb->len)
+		return -E2BIG;
+
+	if (alloc_nr(sb->alloc) > nr)
+		nr = alloc_nr(sb->alloc);
+
+	/*
+	 * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is
+	 * a static variable. Thus we have to avoid passing it to realloc.
+	 */
+	buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf));
+	if (!buf)
+		return -ENOMEM;
+
+	sb->buf = buf;
+	sb->alloc = nr;
+	return 0;
 }
 
-void strbuf_addch(struct strbuf *sb, int c)
+int strbuf_addch(struct strbuf *sb, int c)
 {
-	strbuf_grow(sb, 1);
+	int ret = strbuf_grow(sb, 1);
+	if (ret)
+		return ret;
+
 	sb->buf[sb->len++] = c;
 	sb->buf[sb->len] = '\0';
+	return 0;
 }
 
-void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+int strbuf_add(struct strbuf *sb, const void *data, size_t len)
 {
-	strbuf_grow(sb, len);
+	int ret = strbuf_grow(sb, len);
+	if (ret)
+		return ret;
+
 	memcpy(sb->buf + sb->len, data, len);
-	strbuf_setlen(sb, sb->len + len);
+	return strbuf_setlen(sb, sb->len + len);
 }
 
-static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
 {
-	int len;
+	int len, ret;
 	va_list ap_saved;
 
-	if (!strbuf_avail(sb))
-		strbuf_grow(sb, 64);
+	if (!strbuf_avail(sb)) {
+		ret = strbuf_grow(sb, 64);
+		if (ret)
+			return ret;
+	}
 
 	va_copy(ap_saved, ap);
 	len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
 	if (len < 0)
-		die("your vsnprintf is broken");
+		return len;
 	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
+		ret = strbuf_grow(sb, len);
+		if (ret)
+			return ret;
 		len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
 		va_end(ap_saved);
 		if (len > strbuf_avail(sb)) {
-			die("this should not happen, your vsnprintf is broken");
+			pr_debug("this should not happen, your vsnprintf is broken");
+			return -EINVAL;
 		}
 	}
-	strbuf_setlen(sb, sb->len + len);
+	return strbuf_setlen(sb, sb->len + len);
 }
 
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 {
 	va_list ap;
+	int ret;
 
 	va_start(ap, fmt);
-	strbuf_addv(sb, fmt, ap);
+	ret = strbuf_addv(sb, fmt, ap);
 	va_end(ap);
+	return ret;
 }
 
 ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint)
 {
 	size_t oldlen = sb->len;
 	size_t oldalloc = sb->alloc;
+	int ret;
 
-	strbuf_grow(sb, hint ? hint : 8192);
+	ret = strbuf_grow(sb, hint ? hint : 8192);
+	if (ret)
+		return ret;
+
 	for (;;) {
 		ssize_t cnt;
 
@@ -112,12 +151,14 @@
 				strbuf_release(sb);
 			else
 				strbuf_setlen(sb, oldlen);
-			return -1;
+			return cnt;
 		}
 		if (!cnt)
 			break;
 		sb->len += cnt;
-		strbuf_grow(sb, 8192);
+		ret = strbuf_grow(sb, 8192);
+		if (ret)
+			return ret;
 	}
 
 	sb->buf[sb->len] = '\0';

diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index ab9be0fb..54b4092 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h

@@ -51,7 +51,7 @@
 #define STRBUF_INIT  { 0, 0, strbuf_slopbuf }
 
 /*----- strbuf life cycle -----*/
-void strbuf_init(struct strbuf *buf, ssize_t hint);
+int strbuf_init(struct strbuf *buf, ssize_t hint);
 void strbuf_release(struct strbuf *buf);
 char *strbuf_detach(struct strbuf *buf, size_t *);
 
@@ -60,26 +60,31 @@
 	return sb->alloc ? sb->alloc - sb->len - 1 : 0;
 }
 
-void strbuf_grow(struct strbuf *buf, size_t);
+int strbuf_grow(struct strbuf *buf, size_t);
 
-static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
-	if (!sb->alloc)
-		strbuf_grow(sb, 0);
+static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
+	int ret;
+	if (!sb->alloc) {
+		ret = strbuf_grow(sb, 0);
+		if (ret)
+			return ret;
+	}
 	assert(len < sb->alloc);
 	sb->len = len;
 	sb->buf[len] = '\0';
+	return 0;
 }
 
 /*----- add data in your buffer -----*/
-void strbuf_addch(struct strbuf *sb, int c);
+int strbuf_addch(struct strbuf *sb, int c);
 
-void strbuf_add(struct strbuf *buf, const void *, size_t);
-static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
-	strbuf_add(sb, s, strlen(s));
+int strbuf_add(struct strbuf *buf, const void *, size_t);
+static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
+	return strbuf_add(sb, s, strlen(s));
 }
 
 __attribute__((format(printf,2,3)))
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...);
 
 /* XXX: if read fails, any partial read is undone */
 ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index bc229a7..87a297d 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c

@@ -709,17 +709,10 @@
 	if (ss->opdshdr.sh_type != SHT_PROGBITS)
 		ss->opdsec = NULL;
 
-	if (dso->kernel == DSO_TYPE_USER) {
-		GElf_Shdr shdr;
-		ss->adjust_symbols = (ehdr.e_type == ET_EXEC ||
-				ehdr.e_type == ET_REL ||
-				dso__is_vdso(dso) ||
-				elf_section_by_name(elf, &ehdr, &shdr,
-						     ".gnu.prelink_undo",
-						     NULL) != NULL);
-	} else {
+	if (dso->kernel == DSO_TYPE_USER)
+		ss->adjust_symbols = true;
+	else
 		ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
-	}
 
 	ss->name   = strdup(name);
 	if (!ss->name) {
@@ -777,7 +770,8 @@
 	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
 }
 
-void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+		GElf_Sym *sym __maybe_unused) { }
 
 int dso__load_sym(struct dso *dso, struct map *map,
 		  struct symsrc *syms_ss, struct symsrc *runtime_ss,
@@ -954,8 +948,6 @@
 		    (sym.st_value & 1))
 			--sym.st_value;
 
-		arch__elf_sym_adjust(&sym);
-
 		if (dso->kernel || kmodule) {
 			char dso_name[PATH_MAX];
 
@@ -1089,6 +1081,8 @@
 		if (!f)
 			goto out_elf_end;
 
+		arch__sym_update(f, &sym);
+
 		if (filter && filter(curr_map, f))
 			symbol__delete(f);
 		else {

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e7588dc..7fb3330 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c

@@ -255,40 +255,6 @@
 	free(((void *)sym) - symbol_conf.priv_size);
 }
 
-size_t symbol__fprintf(struct symbol *sym, FILE *fp)
-{
-	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
-		       sym->start, sym->end,
-		       sym->binding == STB_GLOBAL ? 'g' :
-		       sym->binding == STB_LOCAL  ? 'l' : 'w',
-		       sym->name);
-}
-
-size_t symbol__fprintf_symname_offs(const struct symbol *sym,
-				    const struct addr_location *al, FILE *fp)
-{
-	unsigned long offset;
-	size_t length;
-
-	if (sym && sym->name) {
-		length = fprintf(fp, "%s", sym->name);
-		if (al) {
-			if (al->addr < sym->end)
-				offset = al->addr - sym->start;
-			else
-				offset = al->addr - al->map->start - sym->start;
-			length += fprintf(fp, "+0x%lx", offset);
-		}
-		return length;
-	} else
-		return fprintf(fp, "[unknown]");
-}
-
-size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
-{
-	return symbol__fprintf_symname_offs(sym, NULL, fp);
-}
-
 void symbols__delete(struct rb_root *symbols)
 {
 	struct symbol *pos;
@@ -335,7 +301,7 @@
 
 		if (ip < s->start)
 			n = n->rb_left;
-		else if (ip >= s->end)
+		else if (ip > s->end || (ip == s->end && ip != s->start))
 			n = n->rb_right;
 		else
 			return s;
@@ -364,11 +330,6 @@
 	return NULL;
 }
 
-struct symbol_name_rb_node {
-	struct rb_node	rb_node;
-	struct symbol	sym;
-};
-
 static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
 {
 	struct rb_node **p = &symbols->rb_node;
@@ -452,6 +413,18 @@
 	}
 }
 
+void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
+{
+	symbols__insert(&dso->symbols[type], sym);
+
+	/* update the symbol cache if necessary */
+	if (dso->last_find_result[type].addr >= sym->start &&
+	    (dso->last_find_result[type].addr < sym->end ||
+	    sym->start == sym->end)) {
+		dso->last_find_result[type].symbol = sym;
+	}
+}
+
 struct symbol *dso__find_symbol(struct dso *dso,
 				enum map_type type, u64 addr)
 {
@@ -497,21 +470,6 @@
 				     &dso->symbols[type]);
 }
 
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
-				    enum map_type type, FILE *fp)
-{
-	size_t ret = 0;
-	struct rb_node *nd;
-	struct symbol_name_rb_node *pos;
-
-	for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
-		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
-		fprintf(fp, "%s\n", pos->sym.name);
-	}
-
-	return ret;
-}
-
 int modules__parse(const char *filename, void *arg,
 		   int (*process_module)(void *arg, const char *name,
 					 u64 start))
@@ -1262,8 +1220,8 @@
 	return 0;
 }
 
-int dso__load_kallsyms(struct dso *dso, const char *filename,
-		       struct map *map, symbol_filter_t filter)
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+			 struct map *map, bool no_kcore, symbol_filter_t filter)
 {
 	u64 delta = 0;
 
@@ -1284,12 +1242,18 @@
 	else
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
-	if (!dso__load_kcore(dso, map, filename))
+	if (!no_kcore && !dso__load_kcore(dso, map, filename))
 		return dso__split_kallsyms_for_kcore(dso, map, filter);
 	else
 		return dso__split_kallsyms(dso, map, delta, filter);
 }
 
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+		       struct map *map, symbol_filter_t filter)
+{
+	return __dso__load_kallsyms(dso, filename, map, false, filter);
+}
+
 static int dso__load_perf_map(struct dso *dso, struct map *map,
 			      symbol_filter_t filter)
 {
@@ -1644,25 +1608,27 @@
 	return err;
 }
 
+static bool visible_dir_filter(const char *name, struct dirent *d)
+{
+	if (d->d_type != DT_DIR)
+		return false;
+	return lsdir_no_dot_filter(name, d);
+}
+
 static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
 {
 	char kallsyms_filename[PATH_MAX];
-	struct dirent *dent;
 	int ret = -1;
-	DIR *d;
+	struct strlist *dirs;
+	struct str_node *nd;
 
-	d = opendir(dir);
-	if (!d)
+	dirs = lsdir(dir, visible_dir_filter);
+	if (!dirs)
 		return -1;
 
-	while (1) {
-		dent = readdir(d);
-		if (!dent)
-			break;
-		if (dent->d_type != DT_DIR)
-			continue;
+	strlist__for_each(nd, dirs) {
 		scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
-			  "%s/%s/kallsyms", dir, dent->d_name);
+			  "%s/%s/kallsyms", dir, nd->s);
 		if (!validate_kcore_addresses(kallsyms_filename, map)) {
 			strlcpy(dir, kallsyms_filename, dir_sz);
 			ret = 0;
@@ -1670,7 +1636,7 @@
 		}
 	}
 
-	closedir(d);
+	strlist__delete(dirs);
 
 	return ret;
 }
@@ -1678,7 +1644,7 @@
 static char *dso__find_kallsyms(struct dso *dso, struct map *map)
 {
 	u8 host_build_id[BUILD_ID_SIZE];
-	char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+	char sbuild_id[SBUILD_ID_SIZE];
 	bool is_host = false;
 	char path[PATH_MAX];
 

diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index c8b7544..2b5e4ed 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h

@@ -55,6 +55,7 @@
 	u16		namelen;
 	u8		binding;
 	bool		ignore;
+	u8		arch_sym;
 	char		name[0];
 };
 
@@ -140,6 +141,11 @@
 
 extern struct symbol_conf symbol_conf;
 
+struct symbol_name_rb_node {
+	struct rb_node	rb_node;
+	struct symbol	sym;
+};
+
 static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
 {
 	return path__join(bf, size, symbol_conf.symfs, path);
@@ -235,9 +241,14 @@
 		      symbol_filter_t filter);
 int dso__load_vmlinux_path(struct dso *dso, struct map *map,
 			   symbol_filter_t filter);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+			 bool no_kcore, symbol_filter_t filter);
 int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
 		       symbol_filter_t filter);
 
+void dso__insert_symbol(struct dso *dso, enum map_type type,
+			struct symbol *sym);
+
 struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
 				u64 addr);
 struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
@@ -262,8 +273,14 @@
 void symbol__exit(void);
 void symbol__elf_init(void);
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+				      const struct addr_location *al,
+				      bool unknown_as_addr, FILE *fp);
 size_t symbol__fprintf_symname_offs(const struct symbol *sym,
 				    const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+				 const struct addr_location *al,
+				 bool unknown_as_addr, FILE *fp);
 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
 size_t symbol__fprintf(struct symbol *sym, FILE *fp);
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
@@ -310,7 +327,7 @@
 
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
-void arch__elf_sym_adjust(GElf_Sym *sym);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
 #endif
 
 #define SYMBOL_A 0

diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
new file mode 100644
index 0000000..a680bda
--- /dev/null
+++ b/tools/perf/util/symbol_fprintf.c

@@ -0,0 +1,71 @@
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+	return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+		       sym->start, sym->end,
+		       sym->binding == STB_GLOBAL ? 'g' :
+		       sym->binding == STB_LOCAL  ? 'l' : 'w',
+		       sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+				      const struct addr_location *al,
+				      bool unknown_as_addr, FILE *fp)
+{
+	unsigned long offset;
+	size_t length;
+
+	if (sym && sym->name) {
+		length = fprintf(fp, "%s", sym->name);
+		if (al) {
+			if (al->addr < sym->end)
+				offset = al->addr - sym->start;
+			else
+				offset = al->addr - al->map->start - sym->start;
+			length += fprintf(fp, "+0x%lx", offset);
+		}
+		return length;
+	} else if (al && unknown_as_addr)
+		return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+	else
+		return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+				    const struct addr_location *al,
+				    FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, al, false, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+				 const struct addr_location *al,
+				 bool unknown_as_addr, FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+	return __symbol__fprintf_symname_offs(sym, NULL, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+				    enum map_type type, FILE *fp)
+{
+	size_t ret = 0;
+	struct rb_node *nd;
+	struct symbol_name_rb_node *pos;
+
+	for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+		fprintf(fp, "%s\n", pos->sym.name);
+	}
+
+	return ret;
+}

diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
new file mode 100644
index 0000000..bbb4c19
--- /dev/null
+++ b/tools/perf/util/syscalltbl.c

@@ -0,0 +1,134 @@
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+
+#ifdef HAVE_SYSCALL_TABLE
+#include <linux/compiler.h>
+#include <string.h>
+#include "util.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_x86_64;
+#endif
+
+struct syscall {
+	int id;
+	const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+	const char *key = vkey;
+	const struct syscall *entry = ventry;
+
+	return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+	const struct syscall *a = va, *b = vb;
+
+	return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+	int nr_entries = 0, i, j;
+	struct syscall *entries;
+
+	for (i = 0; i <= syscalltbl_native_max_id; ++i)
+		if (syscalltbl_native[i])
+			++nr_entries;
+
+	entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+	if (tbl->syscalls.entries == NULL)
+		return -1;
+
+	for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+		if (syscalltbl_native[i]) {
+			entries[j].name = syscalltbl_native[i];
+			entries[j].id = i;
+			++j;
+		}
+	}
+
+	qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+	tbl->syscalls.nr_entries = nr_entries;
+	return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+	struct syscalltbl *tbl = malloc(sizeof(*tbl));
+	if (tbl) {
+		if (syscalltbl__init_native(tbl)) {
+			free(tbl);
+			return NULL;
+		}
+	}
+	return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+	zfree(&tbl->syscalls.entries);
+	free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+	return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+	struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+				     tbl->syscalls.nr_entries, sizeof(*sc),
+				     syscallcmpname);
+
+	return sc ? sc->id : -1;
+}
+
+#else /* HAVE_SYSCALL_TABLE */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+	struct syscalltbl *tbl = malloc(sizeof(*tbl));
+	if (tbl)
+		tbl->audit_machine = audit_detect_machine();
+	return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+	free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+	return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+	return audit_name_to_syscall(name, tbl->audit_machine);
+}
+#endif /* HAVE_SYSCALL_TABLE */

diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
new file mode 100644
index 0000000..e295151
--- /dev/null
+++ b/tools/perf/util/syscalltbl.h

@@ -0,0 +1,20 @@
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+	union {
+		int audit_machine;
+		struct {
+			int nr_entries;
+			void *entries;
+		} syscalls;
+	};
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+#endif /* __PERF_SYSCALLTBL_H */

diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 679688e..825086a 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c

@@ -22,44 +22,9 @@
 #include "debug.h"
 #include "symbol.h"
 #include "comm.h"
+#include "call-path.h"
 #include "thread-stack.h"
 
-#define CALL_PATH_BLOCK_SHIFT 8
-#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
-#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
-
-struct call_path_block {
-	struct call_path cp[CALL_PATH_BLOCK_SIZE];
-	struct list_head node;
-};
-
-/**
- * struct call_path_root - root of all call paths.
- * @call_path: root call path
- * @blocks: list of blocks to store call paths
- * @next: next free space
- * @sz: number of spaces
- */
-struct call_path_root {
-	struct call_path call_path;
-	struct list_head blocks;
-	size_t next;
-	size_t sz;
-};
-
-/**
- * struct call_return_processor - provides a call-back to consume call-return
- *                                information.
- * @cpr: call path root
- * @process: call-back that accepts call/return information
- * @data: anonymous data for call-back
- */
-struct call_return_processor {
-	struct call_path_root *cpr;
-	int (*process)(struct call_return *cr, void *data);
-	void *data;
-};
-
 #define STACK_GROWTH 2048
 
 /**
@@ -335,108 +300,6 @@
 		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
 }
 
-static void call_path__init(struct call_path *cp, struct call_path *parent,
-			    struct symbol *sym, u64 ip, bool in_kernel)
-{
-	cp->parent = parent;
-	cp->sym = sym;
-	cp->ip = sym ? 0 : ip;
-	cp->db_id = 0;
-	cp->in_kernel = in_kernel;
-	RB_CLEAR_NODE(&cp->rb_node);
-	cp->children = RB_ROOT;
-}
-
-static struct call_path_root *call_path_root__new(void)
-{
-	struct call_path_root *cpr;
-
-	cpr = zalloc(sizeof(struct call_path_root));
-	if (!cpr)
-		return NULL;
-	call_path__init(&cpr->call_path, NULL, NULL, 0, false);
-	INIT_LIST_HEAD(&cpr->blocks);
-	return cpr;
-}
-
-static void call_path_root__free(struct call_path_root *cpr)
-{
-	struct call_path_block *pos, *n;
-
-	list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
-		list_del(&pos->node);
-		free(pos);
-	}
-	free(cpr);
-}
-
-static struct call_path *call_path__new(struct call_path_root *cpr,
-					struct call_path *parent,
-					struct symbol *sym, u64 ip,
-					bool in_kernel)
-{
-	struct call_path_block *cpb;
-	struct call_path *cp;
-	size_t n;
-
-	if (cpr->next < cpr->sz) {
-		cpb = list_last_entry(&cpr->blocks, struct call_path_block,
-				      node);
-	} else {
-		cpb = zalloc(sizeof(struct call_path_block));
-		if (!cpb)
-			return NULL;
-		list_add_tail(&cpb->node, &cpr->blocks);
-		cpr->sz += CALL_PATH_BLOCK_SIZE;
-	}
-
-	n = cpr->next++ & CALL_PATH_BLOCK_MASK;
-	cp = &cpb->cp[n];
-
-	call_path__init(cp, parent, sym, ip, in_kernel);
-
-	return cp;
-}
-
-static struct call_path *call_path__findnew(struct call_path_root *cpr,
-					    struct call_path *parent,
-					    struct symbol *sym, u64 ip, u64 ks)
-{
-	struct rb_node **p;
-	struct rb_node *node_parent = NULL;
-	struct call_path *cp;
-	bool in_kernel = ip >= ks;
-
-	if (sym)
-		ip = 0;
-
-	if (!parent)
-		return call_path__new(cpr, parent, sym, ip, in_kernel);
-
-	p = &parent->children.rb_node;
-	while (*p != NULL) {
-		node_parent = *p;
-		cp = rb_entry(node_parent, struct call_path, rb_node);
-
-		if (cp->sym == sym && cp->ip == ip)
-			return cp;
-
-		if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	cp = call_path__new(cpr, parent, sym, ip, in_kernel);
-	if (!cp)
-		return NULL;
-
-	rb_link_node(&cp->rb_node, node_parent, p);
-	rb_insert_color(&cp->rb_node, &parent->children);
-
-	return cp;
-}
-
 struct call_return_processor *
 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
 			   void *data)

diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1528f1..ad44c79 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h

@@ -19,17 +19,16 @@
 #include <sys/types.h>
 
 #include <linux/types.h>
-#include <linux/rbtree.h>
 
 struct thread;
 struct comm;
 struct ip_callchain;
 struct symbol;
 struct dso;
-struct call_return_processor;
 struct comm;
 struct perf_sample;
 struct addr_location;
+struct call_path;
 
 /*
  * Call/Return flags.
@@ -69,26 +68,16 @@
 };
 
 /**
- * struct call_path - node in list of calls leading to a function call.
- * @parent: call path to the parent function call
- * @sym: symbol of function called
- * @ip: only if sym is null, the ip of the function
- * @db_id: id used for db-export
- * @in_kernel: whether function is a in the kernel
- * @rb_node: node in parent's tree of called functions
- * @children: tree of call paths of functions called
- *
- * In combination with the call_return structure, the call_path structure
- * defines a context-sensitve call-graph.
+ * struct call_return_processor - provides a call-back to consume call-return
+ *                                information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
  */
-struct call_path {
-	struct call_path *parent;
-	struct symbol *sym;
-	u64 ip;
-	u64 db_id;
-	bool in_kernel;
-	struct rb_node rb_node;
-	struct rb_root children;
+struct call_return_processor {
+	struct call_path_root *cpr;
+	int (*process)(struct call_return *cr, void *data);
+	void *data;
 };
 
 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index dfd00c6..45fcb71 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c

@@ -10,6 +10,8 @@
 #include "comm.h"
 #include "unwind.h"
 
+#include <api/fs/fs.h>
+
 int thread__init_map_groups(struct thread *thread, struct machine *machine)
 {
 	struct thread *leader;
@@ -153,6 +155,23 @@
 	return 0;
 }
 
+int thread__set_comm_from_proc(struct thread *thread)
+{
+	char path[64];
+	char *comm = NULL;
+	size_t sz;
+	int err = -1;
+
+	if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+		       thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+	    procfs__read_str(path, &comm, &sz) == 0) {
+		comm[sz - 1] = '\0';
+		err = thread__set_comm(thread, comm, 0);
+	}
+
+	return err;
+}
+
 const char *thread__comm_str(const struct thread *thread)
 {
 	const struct comm *comm = thread__comm(thread);
@@ -233,7 +252,7 @@
 					struct addr_location *al)
 {
 	size_t i;
-	const u8 const cpumodes[] = {
+	const u8 cpumodes[] = {
 		PERF_RECORD_MISC_USER,
 		PERF_RECORD_MISC_KERNEL,
 		PERF_RECORD_MISC_GUEST_USER,

diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index a0ac031..45fba13 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h

@@ -9,6 +9,9 @@
 #include "symbol.h"
 #include <strlist.h>
 #include <intlist.h>
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#include <libunwind.h>
+#endif
 
 struct thread_stack;
 
@@ -32,6 +35,9 @@
 
 	void			*priv;
 	struct thread_stack	*ts;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+	unw_addr_space_t	addr_space;
+#endif
 };
 
 struct machine;
@@ -65,6 +71,8 @@
 	return __thread__set_comm(thread, comm, timestamp, false);
 }
 
+int thread__set_comm_from_proc(struct thread *thread);
+
 int thread__comm_len(struct thread *thread);
 struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);

diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 267112b..5654fe1 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c

@@ -260,7 +260,7 @@
 	return threads;
 }
 
-static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 {
 	struct thread_map *threads = NULL, *nt;
 	int ntasks = 0;
@@ -436,3 +436,15 @@
 
 	return threads;
 }
+
+bool thread_map__has(struct thread_map *threads, pid_t pid)
+{
+	int i;
+
+	for (i = 0; i < threads->nr; ++i) {
+		if (threads->map[i].pid == pid)
+			return true;
+	}
+
+	return false;
+}

diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 85e4c7c..bd3b971 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h

@@ -31,6 +31,8 @@
 struct thread_map *thread_map__new_str(const char *pid,
 		const char *tid, uid_t uid);
 
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
 size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
 
 static inline int thread_map__nr(struct thread_map *threads)
@@ -55,4 +57,5 @@
 }
 
 void thread_map__read_comms(struct thread_map *threads);
+bool thread_map__has(struct thread_map *threads, pid_t pid);
 #endif	/* __PERF_THREAD_MAP_H */

diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 55de4cf..ac2590a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h

@@ -57,6 +57,7 @@
 			id_index,
 			auxtrace_info,
 			auxtrace_error,
+			time_conv,
 			thread_map,
 			cpu_map,
 			stat_config,

diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
new file mode 100644
index 0000000..e97d701
--- /dev/null
+++ b/tools/perf/util/trigger.h

@@ -0,0 +1,94 @@
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "util/debug.h"
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ *  OFF--(on)--> READY --(hit)--> HIT
+ *                 ^               |
+ *                 |            (ready)
+ *                 |               |
+ *                  \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+	volatile enum {
+		TRIGGER_ERROR		= -2,
+		TRIGGER_OFF		= -1,
+		TRIGGER_READY		= 0,
+		TRIGGER_HIT		= 1,
+	} state;
+	const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+	WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+		  t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+	return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+	return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+	TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+	t->state = TRIGGER_READY;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+	t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+	if (!trigger_is_available(t))
+		return;
+	t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+	t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+	return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+	return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif

diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
index a8b78f1..d5b11e2 100644
--- a/tools/perf/util/tsc.h
+++ b/tools/perf/util/tsc.h

@@ -3,10 +3,29 @@
 
 #include <linux/types.h>
 
-#include "../arch/x86/util/tsc.h"
+#include "event.h"
+
+struct perf_tsc_conversion {
+	u16 time_shift;
+	u32 time_mult;
+	u64 time_zero;
+};
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+			     struct perf_tsc_conversion *tc);
 
 u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
 u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
 u64 rdtsc(void);
 
+struct perf_event_mmap_page;
+struct perf_tool;
+struct machine;
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+				struct perf_tool *tool,
+				perf_event__handler_t process,
+				struct machine *machine);
+
 #endif

diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index ee7e372..63687d3 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c

@@ -32,6 +32,7 @@
 #include "symbol.h"
 #include "util.h"
 #include "debug.h"
+#include "asm/bug.h"
 
 extern int
 UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
@@ -580,43 +581,33 @@
 
 int unwind__prepare_access(struct thread *thread)
 {
-	unw_addr_space_t addr_space;
-
 	if (callchain_param.record_mode != CALLCHAIN_DWARF)
 		return 0;
 
-	addr_space = unw_create_addr_space(&accessors, 0);
-	if (!addr_space) {
+	thread->addr_space = unw_create_addr_space(&accessors, 0);
+	if (!thread->addr_space) {
 		pr_err("unwind: Can't create unwind address space.\n");
 		return -ENOMEM;
 	}
 
-	unw_set_caching_policy(addr_space, UNW_CACHE_GLOBAL);
-	thread__set_priv(thread, addr_space);
-
+	unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
 	return 0;
 }
 
 void unwind__flush_access(struct thread *thread)
 {
-	unw_addr_space_t addr_space;
-
 	if (callchain_param.record_mode != CALLCHAIN_DWARF)
 		return;
 
-	addr_space = thread__priv(thread);
-	unw_flush_cache(addr_space, 0, 0);
+	unw_flush_cache(thread->addr_space, 0, 0);
 }
 
 void unwind__finish_access(struct thread *thread)
 {
-	unw_addr_space_t addr_space;
-
 	if (callchain_param.record_mode != CALLCHAIN_DWARF)
 		return;
 
-	addr_space = thread__priv(thread);
-	unw_destroy_addr_space(addr_space);
+	unw_destroy_addr_space(thread->addr_space);
 }
 
 static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
@@ -639,7 +630,9 @@
 	 * unwind itself.
 	 */
 	if (max_stack - 1 > 0) {
-		addr_space = thread__priv(ui->thread);
+		WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+		addr_space = ui->thread->addr_space;
+
 		if (addr_space == NULL)
 			return -1;
 

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index b7766c5..eab077a 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c

@@ -33,6 +33,8 @@
 unsigned int page_size;
 int cacheline_size;
 
+unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
@@ -117,6 +119,40 @@
 	return rmdir(path);
 }
 
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+	return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+		      bool (*filter)(const char *, struct dirent *))
+{
+	struct strlist *list = NULL;
+	DIR *dir;
+	struct dirent *d;
+
+	dir = opendir(name);
+	if (!dir)
+		return NULL;
+
+	list = strlist__new(NULL, NULL);
+	if (!list) {
+		errno = ENOMEM;
+		goto out;
+	}
+
+	while ((d = readdir(dir)) != NULL) {
+		if (!filter || filter(name, d))
+			strlist__add(list, d->d_name);
+	}
+
+out:
+	closedir(dir);
+	return list;
+}
+
 static int slow_copyfile(const char *from, const char *to)
 {
 	int err = -1;
@@ -471,7 +507,6 @@
 				       "needed for --call-graph fp\n");
 			break;
 
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
 		/* Dwarf style */
 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
 			const unsigned long default_stack_dump_size = 8192;
@@ -487,7 +522,6 @@
 				ret = get_stack_size(tok, &size);
 				param->dump_size = size;
 			}
-#endif /* HAVE_DWARF_UNWIND_SUPPORT */
 		} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
 			if (!strtok_r(NULL, ",", &saveptr)) {
 				param->record_mode = CALLCHAIN_LBR;

diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 8298d60..7651633 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h

@@ -79,6 +79,7 @@
 #include <termios.h>
 #include <linux/bitops.h>
 #include <termios.h>
+#include "strlist.h"
 
 extern const char *graph_line;
 extern const char *graph_dotted_line;
@@ -159,12 +160,6 @@
 }
 #endif
 
-/*
- * Wrappers:
- */
-void *xrealloc(void *ptr, size_t size) __attribute__((weak));
-
-
 static inline void *zalloc(size_t size)
 {
 	return calloc(1, size);
@@ -222,6 +217,8 @@
 
 int mkdir_p(char *path, mode_t mode);
 int rm_rf(char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
 int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
@@ -254,11 +251,17 @@
 char *ltrim(char *s);
 char *rtrim(char *s);
 
+static inline char *trim(char *s)
+{
+	return ltrim(rtrim(s));
+}
+
 void dump_stack(void);
 void sighandler_dump_stack(int sig);
 
 extern unsigned int page_size;
 extern int cacheline_size;
+extern unsigned int sysctl_perf_event_max_stack;
 
 struct parse_tag {
 	char tag;

diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
deleted file mode 100644
index 5f1a07c..0000000
--- a/tools/perf/util/wrapper.c
+++ /dev/null

@@ -1,29 +0,0 @@
-/*
- * Various trivial helper wrappers around standard functions
- */
-#include "cache.h"
-
-/*
- * There's no pack memory to release - but stay close to the Git
- * version so wrap this away:
- */
-static inline void release_pack_memory(size_t size __maybe_unused,
-				       int flag __maybe_unused)
-{
-}
-
-void *xrealloc(void *ptr, size_t size)
-{
-	void *ret = realloc(ptr, size);
-	if (!ret && !size)
-		ret = realloc(ptr, 1);
-	if (!ret) {
-		release_pack_memory(size, -1);
-		ret = realloc(ptr, size);
-		if (!ret && !size)
-			ret = realloc(ptr, 1);
-		if (!ret)
-			die("Out of memory, realloc failed");
-	}
-	return ret;
-}

diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index d0e6b85..546cf4a 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c

@@ -91,7 +91,7 @@
 			 char *signature,
 			 u32 instance,
 			 struct acpi_table_header **table,
-			 acpi_physical_address * address);
+			 acpi_physical_address *address);
 
 static acpi_status osl_list_bios_tables(void);
 
@@ -99,7 +99,7 @@
 osl_get_bios_table(char *signature,
 		   u32 instance,
 		   struct acpi_table_header **table,
-		   acpi_physical_address * address);
+		   acpi_physical_address *address);
 
 static acpi_status osl_get_last_status(acpi_status default_status);
 
@@ -187,7 +187,7 @@
 
 acpi_status
 acpi_os_get_table_by_address(acpi_physical_address address,
-			     struct acpi_table_header ** table)
+			     struct acpi_table_header **table)
 {
 	u32 table_length;
 	struct acpi_table_header *mapped_table;
@@ -252,8 +252,8 @@
 acpi_status
 acpi_os_get_table_by_name(char *signature,
 			  u32 instance,
-			  struct acpi_table_header ** table,
-			  acpi_physical_address * address)
+			  struct acpi_table_header **table,
+			  acpi_physical_address *address)
 {
 	acpi_status status;
 
@@ -380,8 +380,8 @@
 
 acpi_status
 acpi_os_get_table_by_index(u32 index,
-			   struct acpi_table_header ** table,
-			   u32 *instance, acpi_physical_address * address)
+			   struct acpi_table_header **table,
+			   u32 *instance, acpi_physical_address *address)
 {
 	struct osl_table_info *info;
 	acpi_status status;
@@ -447,7 +447,7 @@
 		}
 	}
 
-	return ((acpi_physical_address) (address));
+	return ((acpi_physical_address)(address));
 }
 
 /******************************************************************************
@@ -751,10 +751,10 @@
 	for (i = 0; i < number_of_tables; ++i, table_data += item_size) {
 		if (osl_can_use_xsdt()) {
 			table_address =
-			    (acpi_physical_address) (*ACPI_CAST64(table_data));
+			    (acpi_physical_address)(*ACPI_CAST64(table_data));
 		} else {
 			table_address =
-			    (acpi_physical_address) (*ACPI_CAST32(table_data));
+			    (acpi_physical_address)(*ACPI_CAST32(table_data));
 		}
 
 		/* Skip NULL entries in RSDT/XSDT */
@@ -800,7 +800,7 @@
 osl_get_bios_table(char *signature,
 		   u32 instance,
 		   struct acpi_table_header **table,
-		   acpi_physical_address * address)
+		   acpi_physical_address *address)
 {
 	struct acpi_table_header *local_table = NULL;
 	struct acpi_table_header *mapped_table = NULL;
@@ -833,38 +833,37 @@
 			if ((gbl_fadt->header.length >= MIN_FADT_FOR_XDSDT) &&
 			    gbl_fadt->Xdsdt) {
 				table_address =
-				    (acpi_physical_address) gbl_fadt->Xdsdt;
+				    (acpi_physical_address)gbl_fadt->Xdsdt;
 			} else
 			    if ((gbl_fadt->header.length >= MIN_FADT_FOR_DSDT)
 				&& gbl_fadt->dsdt) {
 				table_address =
-				    (acpi_physical_address) gbl_fadt->dsdt;
+				    (acpi_physical_address)gbl_fadt->dsdt;
 			}
 		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
 			if ((gbl_fadt->header.length >= MIN_FADT_FOR_XFACS) &&
 			    gbl_fadt->Xfacs) {
 				table_address =
-				    (acpi_physical_address) gbl_fadt->Xfacs;
+				    (acpi_physical_address)gbl_fadt->Xfacs;
 			} else
 			    if ((gbl_fadt->header.length >= MIN_FADT_FOR_FACS)
 				&& gbl_fadt->facs) {
 				table_address =
-				    (acpi_physical_address) gbl_fadt->facs;
+				    (acpi_physical_address)gbl_fadt->facs;
 			}
 		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) {
 			if (!gbl_revision) {
 				return (AE_BAD_SIGNATURE);
 			}
 			table_address =
-			    (acpi_physical_address) gbl_rsdp.
+			    (acpi_physical_address)gbl_rsdp.
 			    xsdt_physical_address;
 		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) {
 			table_address =
-			    (acpi_physical_address) gbl_rsdp.
+			    (acpi_physical_address)gbl_rsdp.
 			    rsdt_physical_address;
 		} else {
-			table_address =
-			    (acpi_physical_address) gbl_rsdp_address;
+			table_address = (acpi_physical_address)gbl_rsdp_address;
 			signature = ACPI_SIG_RSDP;
 		}
 
@@ -904,12 +903,12 @@
 		for (i = 0; i < number_of_tables; ++i, table_data += item_size) {
 			if (osl_can_use_xsdt()) {
 				table_address =
-				    (acpi_physical_address) (*ACPI_CAST64
-							     (table_data));
+				    (acpi_physical_address)(*ACPI_CAST64
+							    (table_data));
 			} else {
 				table_address =
-				    (acpi_physical_address) (*ACPI_CAST32
-							     (table_data));
+				    (acpi_physical_address)(*ACPI_CAST32
+							    (table_data));
 			}
 
 			/* Skip NULL entries in RSDT/XSDT */
@@ -1301,7 +1300,7 @@
 			 char *signature,
 			 u32 instance,
 			 struct acpi_table_header **table,
-			 acpi_physical_address * address)
+			 acpi_physical_address *address)
 {
 	void *table_dir;
 	u32 current_instance = 0;

diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 3818fd0..cbfbce1 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c

@@ -54,7 +54,7 @@
 #ifndef O_BINARY
 #define O_BINARY 0
 #endif
-#if defined(_dragon_fly) || defined(_free_BSD)
+#if defined(_dragon_fly) || defined(_free_BSD) || defined(_QNX)
 #define MMAP_FLAGS          MAP_SHARED
 #else
 #define MMAP_FLAGS          MAP_PRIVATE

diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 08cb8b2..88aa66e 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c

@@ -246,8 +246,8 @@
  *****************************************************************************/
 
 acpi_status
-acpi_os_predefined_override(const struct acpi_predefined_names * init_val,
-			    acpi_string * new_val)
+acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
+			    acpi_string *new_val)
 {
 
 	if (!init_val || !new_val) {
@@ -274,8 +274,8 @@
  *****************************************************************************/
 
 acpi_status
-acpi_os_table_override(struct acpi_table_header * existing_table,
-		       struct acpi_table_header ** new_table)
+acpi_os_table_override(struct acpi_table_header *existing_table,
+		       struct acpi_table_header **new_table)
 {
 
 	if (!existing_table || !new_table) {
@@ -311,8 +311,8 @@
  *****************************************************************************/
 
 acpi_status
-acpi_os_physical_table_override(struct acpi_table_header * existing_table,
-				acpi_physical_address * new_address,
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+				acpi_physical_address *new_address,
 				u32 *new_table_length)
 {
 
@@ -506,7 +506,7 @@
 void *acpi_os_map_memory(acpi_physical_address where, acpi_size length)
 {
 
-	return (ACPI_TO_POINTER((acpi_size) where));
+	return (ACPI_TO_POINTER((acpi_size)where));
 }
 
 /******************************************************************************
@@ -603,9 +603,9 @@
 
 acpi_status
 acpi_os_create_semaphore(u32 max_units,
-			 u32 initial_units, acpi_handle * out_handle)
+			 u32 initial_units, acpi_handle *out_handle)
 {
-	*out_handle = (acpi_handle) 1;
+	*out_handle = (acpi_handle)1;
 	return (AE_OK);
 }
 
@@ -640,7 +640,7 @@
 
 acpi_status
 acpi_os_create_semaphore(u32 max_units,
-			 u32 initial_units, acpi_handle * out_handle)
+			 u32 initial_units, acpi_handle *out_handle)
 {
 	sem_t *sem;
 
@@ -672,7 +672,7 @@
 	}
 #endif
 
-	*out_handle = (acpi_handle) sem;
+	*out_handle = (acpi_handle)sem;
 	return (AE_OK);
 }
 
@@ -1035,7 +1035,7 @@
  *****************************************************************************/
 
 acpi_status
-acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id,
+acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id,
 				u32 pci_register, u64 value, u32 width)
 {
 

diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c
index d070fcc..a88ac45 100644
--- a/tools/power/acpi/tools/acpidbg/acpidbg.c
+++ b/tools/power/acpi/tools/acpidbg/acpidbg.c

@@ -375,7 +375,7 @@
 
 int main(int argc, char **argv)
 {
-	int fd = 0;
+	int fd = -1;
 	int ch;
 	int len;
 	int ret = EXIT_SUCCESS;
@@ -430,7 +430,7 @@
 	acpi_aml_loop(fd);
 
 exit:
-	if (fd < 0)
+	if (fd >= 0)
 		close(fd);
 	if (acpi_aml_batch_cmd)
 		free(acpi_aml_batch_cmd);

diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile
index 8d76157..2942cdc 100644
--- a/tools/power/acpi/tools/acpidump/Makefile
+++ b/tools/power/acpi/tools/acpidump/Makefile

@@ -31,6 +31,7 @@
 	osunixxf.o\
 	tbprint.o\
 	tbxfroot.o\
+	utascii.o\
 	utbuffer.o\
 	utdebug.o\
 	utexcep.o\

diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index da44458..fb8f1d9 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c

@@ -68,7 +68,7 @@
 
 		/* Make sure signature is all ASCII and a valid ACPI name */
 
-		if (!acpi_ut_valid_acpi_name(table->signature)) {
+		if (!acpi_ut_valid_nameseg(table->signature)) {
 			acpi_log_error("Table signature (0x%8.8X) is invalid\n",
 				       *(u32 *)table->signature);
 			return (FALSE);
@@ -286,14 +286,15 @@
 
 	/* Convert argument to an integer physical address */
 
-	status = acpi_ut_strtoul64(ascii_address, 0, &long_address);
+	status = acpi_ut_strtoul64(ascii_address, ACPI_ANY_BASE,
+				   ACPI_MAX64_BYTE_WIDTH, &long_address);
 	if (ACPI_FAILURE(status)) {
 		acpi_log_error("%s: Could not convert to a physical address\n",
 			       ascii_address);
 		return (-1);
 	}
 
-	address = (acpi_physical_address) long_address;
+	address = (acpi_physical_address)long_address;
 	status = acpi_os_get_table_by_address(address, &table);
 	if (ACPI_FAILURE(status)) {
 		acpi_log_error("Could not get table at 0x%8.8X%8.8X, %s\n",
@@ -406,6 +407,12 @@
 		return (-1);
 	}
 
+	if (!acpi_ut_valid_nameseg(table->signature)) {
+		acpi_log_error
+		    ("No valid ACPI signature was found in input file %s\n",
+		     pathname);
+	}
+
 	/* File must be at least as long as the table length */
 
 	if (table->length > file_size) {

diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index c3c0915..7692e6b 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c

@@ -209,7 +209,8 @@
 		case 'r':	/* Dump tables from specified RSDP */
 
 			status =
-			    acpi_ut_strtoul64(acpi_gbl_optarg, 0,
+			    acpi_ut_strtoul64(acpi_gbl_optarg, ACPI_ANY_BASE,
+					      ACPI_MAX64_BYTE_WIDTH,
 					      &gbl_rsdp_base);
 			if (ACPI_FAILURE(status)) {
 				acpi_log_error

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 0adaf0c..8358863 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile

@@ -63,7 +63,7 @@
 # and _should_ modify the PACKAGE_BUGREPORT definition
 
 VERSION=			$(shell ./utils/version-gen.sh)
-LIB_MAJ=			0.0.0
+LIB_MAJ=			0.0.1
 LIB_MIN=			0
 
 PACKAGE =			cpupower
@@ -129,7 +129,7 @@
 CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
 		-DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE
 
-UTIL_OBJS =  utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \
+UTIL_OBJS =  utils/helpers/amd.o utils/helpers/msr.o \
 	utils/helpers/sysfs.o utils/helpers/misc.o utils/helpers/cpuid.o \
 	utils/helpers/pci.o utils/helpers/bitmask.o \
 	utils/idle_monitor/nhm_idle.o utils/idle_monitor/snb_idle.o \
@@ -148,9 +148,9 @@
 	utils/helpers/bitmask.h \
 	utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
 
-LIB_HEADERS = 	lib/cpufreq.h lib/sysfs.h
-LIB_SRC = 	lib/cpufreq.c lib/sysfs.c
-LIB_OBJS = 	lib/cpufreq.o lib/sysfs.o
+LIB_HEADERS = 	lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
+LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
+LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
 LIB_OBJS :=	$(addprefix $(OUTPUT),$(LIB_OBJS))
 
 CFLAGS +=	-pipe
@@ -280,6 +280,7 @@
 	$(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/
 	$(INSTALL) -d $(DESTDIR)${includedir}
 	$(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h
+	$(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h
 
 install-tools:
 	$(INSTALL) -d $(DESTDIR)${bindir}
@@ -315,6 +316,7 @@
 uninstall:
 	- rm -f $(DESTDIR)${libdir}/libcpupower.*
 	- rm -f $(DESTDIR)${includedir}/cpufreq.h
+	- rm -f $(DESTDIR)${includedir}/cpuidle.h
 	- rm -f $(DESTDIR)${bindir}/utils/cpupower
 	- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
 	- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1

diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
index d0f879b..3e59f1a 100644
--- a/tools/power/cpupower/bench/Makefile
+++ b/tools/power/cpupower/bench/Makefile

@@ -22,7 +22,7 @@
 
 $(OUTPUT)cpufreq-bench: $(OBJS)
 	$(ECHO) "  CC      " $@
-	$(QUIET) $(CC) -o $@ $(CFLAGS) $(OBJS) $(LIBS)
+	$(QUIET) $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS)
 
 all: $(OUTPUT)cpufreq-bench
 

diff --git a/tools/power/cpupower/bench/README-BENCH b/tools/power/cpupower/bench/README-BENCH
index 8093ec7..97727ae 100644
--- a/tools/power/cpupower/bench/README-BENCH
+++ b/tools/power/cpupower/bench/README-BENCH

@@ -113,7 +113,7 @@
 -c, --cpu=<unsigned int>        CPU Number to use, starting at 0
 -p, --prio=<priority>           scheduler priority, HIGH, LOW or DEFAULT
 -g, --governor=<governor>       cpufreq governor to test
--n, --cycles=<int>              load/sleep cycles to get an avarage value to compare
+-n, --cycles=<int>              load/sleep cycles to get an average value to compare
 -r, --rounds<int>               load/sleep rounds
 -f, --file=<configfile>         config file to use
 -o, --output=<dir>              output dir, must exist

diff --git a/tools/power/cpupower/bench/benchmark.c b/tools/power/cpupower/bench/benchmark.c
index 81b1c48..429d51a 100644
--- a/tools/power/cpupower/bench/benchmark.c
+++ b/tools/power/cpupower/bench/benchmark.c

@@ -130,7 +130,7 @@
 			_round, load_time, sleep_time);
 
 		if (config->verbose)
-			printf("avarage: %lius, rps:%li\n",
+			printf("average: %lius, rps:%li\n",
 				load_time / calculations,
 				1000000 * calculations / load_time);
 
@@ -177,7 +177,7 @@
 
 		progress_time += sleep_time + load_time;
 
-		/* compare the avarage sleep/load cycles  */
+		/* compare the average sleep/load cycles  */
 		fprintf(config->output, "%li ",
 			powersave_time / config->cycles);
 		fprintf(config->output, "%.3f\n",

diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index f503fb5..9b65f05 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c

@@ -65,7 +65,7 @@
 {
 	FILE *output = NULL;
 	int len;
-	char *filename;
+	char *filename, *filename_tmp;
 	struct utsname sysdata;
 	DIR *dir;
 
@@ -81,16 +81,22 @@
 
 	len = strlen(dirname) + 30;
 	filename = malloc(sizeof(char) * len);
+	if (!filename) {
+		perror("malloc");
+		goto out_dir;
+	}
 
 	if (uname(&sysdata) == 0) {
 		len += strlen(sysdata.nodename) + strlen(sysdata.release);
-		filename = realloc(filename, sizeof(char) * len);
+		filename_tmp = realloc(filename, sizeof(*filename) * len);
 
-		if (filename == NULL) {
+		if (filename_tmp == NULL) {
+			free(filename);
 			perror("realloc");
-			return NULL;
+			goto out_dir;
 		}
 
+		filename = filename_tmp;
 		snprintf(filename, len - 1, "%s/benchmark_%s_%s_%li.log",
 			dirname, sysdata.nodename, sysdata.release, time(NULL));
 	} else {
@@ -104,12 +110,16 @@
 	if (output == NULL) {
 		perror("fopen");
 		fprintf(stderr, "error: unable to open logfile\n");
+		goto out;
 	}
 
 	fprintf(stdout, "Logfile: %s\n", filename);
 
-	free(filename);
 	fprintf(output, "#round load sleep performance powersave percentage\n");
+out:
+	free(filename);
+out_dir:
+	closedir(dir);
 	return output;
 }
 

diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c
index f01e3f4..c25a74a 100644
--- a/tools/power/cpupower/bench/system.c
+++ b/tools/power/cpupower/bench/system.c

@@ -26,6 +26,7 @@
 #include <sched.h>
 
 #include <cpufreq.h>
+#include <cpupower.h>
 
 #include "config.h"
 #include "system.h"
@@ -60,7 +61,7 @@
 
 	dprintf("set %s as cpufreq governor\n", governor);
 
-	if (cpufreq_cpu_exists(cpu) != 0) {
+	if (cpupower_is_cpu_online(cpu) != 0) {
 		perror("cpufreq_cpu_exists");
 		fprintf(stderr, "error: cpu %u does not exist\n", cpu);
 		return -1;

diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index d961101..1b993fe 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c

@@ -9,28 +9,190 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "cpufreq.h"
-#include "sysfs.h"
+#include "cpupower_intern.h"
 
-int cpufreq_cpu_exists(unsigned int cpu)
+/* CPUFREQ sysfs access **************************************************/
+
+/* helper function to read file from /sys into given buffer */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
+					    char *buf, size_t buflen)
 {
-	return sysfs_cpu_exists(cpu);
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+			 cpu, fname);
+	return sysfs_read_file(path, buf, buflen);
 }
 
+/* helper function to write a new value to a /sys file */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
+					     const char *fname,
+					     const char *value, size_t len)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numwrite;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+			 cpu, fname);
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwrite = write(fd, value, len);
+	if (numwrite < 1) {
+		close(fd);
+		return 0;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum cpufreq_value {
+	CPUINFO_CUR_FREQ,
+	CPUINFO_MIN_FREQ,
+	CPUINFO_MAX_FREQ,
+	CPUINFO_LATENCY,
+	SCALING_CUR_FREQ,
+	SCALING_MIN_FREQ,
+	SCALING_MAX_FREQ,
+	STATS_NUM_TRANSITIONS,
+	MAX_CPUFREQ_VALUE_READ_FILES
+};
+
+static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
+	[CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
+	[CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
+	[CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
+	[CPUINFO_LATENCY]  = "cpuinfo_transition_latency",
+	[SCALING_CUR_FREQ] = "scaling_cur_freq",
+	[SCALING_MIN_FREQ] = "scaling_min_freq",
+	[SCALING_MAX_FREQ] = "scaling_max_freq",
+	[STATS_NUM_TRANSITIONS] = "stats/total_trans"
+};
+
+
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+						 enum cpufreq_value which)
+{
+	unsigned long value;
+	unsigned int len;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+
+	if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+		return 0;
+
+	len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
+				linebuf, sizeof(linebuf));
+
+	if (len == 0)
+		return 0;
+
+	value = strtoul(linebuf, &endp, 0);
+
+	if (endp == linebuf || errno == ERANGE)
+		return 0;
+
+	return value;
+}
+
+/* read access to files which contain one string */
+
+enum cpufreq_string {
+	SCALING_DRIVER,
+	SCALING_GOVERNOR,
+	MAX_CPUFREQ_STRING_FILES
+};
+
+static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
+	[SCALING_DRIVER] = "scaling_driver",
+	[SCALING_GOVERNOR] = "scaling_governor",
+};
+
+
+static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
+					   enum cpufreq_string which)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *result;
+	unsigned int len;
+
+	if (which >= MAX_CPUFREQ_STRING_FILES)
+		return NULL;
+
+	len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	result = strdup(linebuf);
+	if (result == NULL)
+		return NULL;
+
+	if (result[strlen(result) - 1] == '\n')
+		result[strlen(result) - 1] = '\0';
+
+	return result;
+}
+
+/* write access */
+
+enum cpufreq_write {
+	WRITE_SCALING_MIN_FREQ,
+	WRITE_SCALING_MAX_FREQ,
+	WRITE_SCALING_GOVERNOR,
+	WRITE_SCALING_SET_SPEED,
+	MAX_CPUFREQ_WRITE_FILES
+};
+
+static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
+	[WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
+	[WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
+	[WRITE_SCALING_GOVERNOR] = "scaling_governor",
+	[WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
+};
+
+static int sysfs_cpufreq_write_one_value(unsigned int cpu,
+					 enum cpufreq_write which,
+					 const char *new_value, size_t len)
+{
+	if (which >= MAX_CPUFREQ_WRITE_FILES)
+		return 0;
+
+	if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
+					new_value, len) != len)
+		return -ENODEV;
+
+	return 0;
+};
+
 unsigned long cpufreq_get_freq_kernel(unsigned int cpu)
 {
-	return sysfs_get_freq_kernel(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
 }
 
 unsigned long cpufreq_get_freq_hardware(unsigned int cpu)
 {
-	return sysfs_get_freq_hardware(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
 }
 
 unsigned long cpufreq_get_transition_latency(unsigned int cpu)
 {
-	return sysfs_get_freq_transition_latency(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
 }
 
 int cpufreq_get_hardware_limits(unsigned int cpu,
@@ -39,12 +201,21 @@
 {
 	if ((!min) || (!max))
 		return -EINVAL;
-	return sysfs_get_freq_hardware_limits(cpu, min, max);
+
+	*min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
+	if (!*min)
+		return -ENODEV;
+
+	*max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
+	if (!*max)
+		return -ENODEV;
+
+	return 0;
 }
 
 char *cpufreq_get_driver(unsigned int cpu)
 {
-	return sysfs_get_freq_driver(cpu);
+	return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
 }
 
 void cpufreq_put_driver(char *ptr)
@@ -56,7 +227,26 @@
 
 struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu)
 {
-	return sysfs_get_freq_policy(cpu);
+	struct cpufreq_policy *policy;
+
+	policy = malloc(sizeof(struct cpufreq_policy));
+	if (!policy)
+		return NULL;
+
+	policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
+	if (!policy->governor) {
+		free(policy);
+		return NULL;
+	}
+	policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+	policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
+	if ((!policy->min) || (!policy->max)) {
+		free(policy->governor);
+		free(policy);
+		return NULL;
+	}
+
+	return policy;
 }
 
 void cpufreq_put_policy(struct cpufreq_policy *policy)
@@ -72,7 +262,57 @@
 struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned
 								int cpu)
 {
-	return sysfs_get_freq_available_governors(cpu);
+	struct cpufreq_available_governors *first = NULL;
+	struct cpufreq_available_governors *current = NULL;
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+			if (i - pos < 2)
+				continue;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			current->governor = malloc(i - pos + 1);
+			if (!current->governor)
+				goto error_out;
+
+			memcpy(current->governor, linebuf + pos, i - pos);
+			current->governor[i - pos] = '\0';
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		if (first->governor)
+			free(first->governor);
+		free(first);
+		first = current;
+	}
+	return NULL;
 }
 
 void cpufreq_put_available_governors(struct cpufreq_available_governors *any)
@@ -96,7 +336,57 @@
 struct cpufreq_available_frequencies
 *cpufreq_get_available_frequencies(unsigned int cpu)
 {
-	return sysfs_get_available_frequencies(cpu);
+	struct cpufreq_available_frequencies *first = NULL;
+	struct cpufreq_available_frequencies *current = NULL;
+	char one_value[SYSFS_PATH_MAX];
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+			if (i - pos < 2)
+				continue;
+			if (i - pos >= SYSFS_PATH_MAX)
+				goto error_out;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			memcpy(one_value, linebuf + pos, i - pos);
+			one_value[i - pos] = '\0';
+			if (sscanf(one_value, "%lu", &current->frequency) != 1)
+				goto error_out;
+
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		free(first);
+		first = current;
+	}
+	return NULL;
 }
 
 void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies
@@ -114,10 +404,65 @@
 	}
 }
 
+static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
+							const char *file)
+{
+	struct cpufreq_affected_cpus *first = NULL;
+	struct cpufreq_affected_cpus *current = NULL;
+	char one_value[SYSFS_PATH_MAX];
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
+			if (i - pos  < 1)
+				continue;
+			if (i - pos >= SYSFS_PATH_MAX)
+				goto error_out;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			memcpy(one_value, linebuf + pos, i - pos);
+			one_value[i - pos] = '\0';
+
+			if (sscanf(one_value, "%u", &current->cpu) != 1)
+				goto error_out;
+
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		free(first);
+		first = current;
+	}
+	return NULL;
+}
 
 struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned int cpu)
 {
-	return sysfs_get_freq_affected_cpus(cpu);
+	return sysfs_get_cpu_list(cpu, "affected_cpus");
 }
 
 void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any)
@@ -138,7 +483,7 @@
 
 struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned int cpu)
 {
-	return sysfs_get_freq_related_cpus(cpu);
+	return sysfs_get_cpu_list(cpu, "related_cpus");
 }
 
 void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any)
@@ -146,45 +491,208 @@
 	cpufreq_put_affected_cpus(any);
 }
 
+static int verify_gov(char *new_gov, char *passed_gov)
+{
+	unsigned int i, j = 0;
+
+	if (!passed_gov || (strlen(passed_gov) > 19))
+		return -EINVAL;
+
+	strncpy(new_gov, passed_gov, 20);
+	for (i = 0; i < 20; i++) {
+		if (j) {
+			new_gov[i] = '\0';
+			continue;
+		}
+		if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
+			continue;
+
+		if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
+			continue;
+
+		if (new_gov[i] == '-')
+			continue;
+
+		if (new_gov[i] == '_')
+			continue;
+
+		if (new_gov[i] == '\0') {
+			j = 1;
+			continue;
+		}
+		return -EINVAL;
+	}
+	new_gov[19] = '\0';
+	return 0;
+}
 
 int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy)
 {
+	char min[SYSFS_PATH_MAX];
+	char max[SYSFS_PATH_MAX];
+	char gov[SYSFS_PATH_MAX];
+	int ret;
+	unsigned long old_min;
+	int write_max_first;
+
 	if (!policy || !(policy->governor))
 		return -EINVAL;
 
-	return sysfs_set_freq_policy(cpu, policy);
+	if (policy->max < policy->min)
+		return -EINVAL;
+
+	if (verify_gov(gov, policy->governor))
+		return -EINVAL;
+
+	snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
+	snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
+
+	old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+	write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
+
+	if (write_max_first) {
+		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+						    max, strlen(max));
+		if (ret)
+			return ret;
+	}
+
+	ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
+					    strlen(min));
+	if (ret)
+		return ret;
+
+	if (!write_max_first) {
+		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+						    max, strlen(max));
+		if (ret)
+			return ret;
+	}
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+					     gov, strlen(gov));
 }
 
 
 int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq)
 {
-	return sysfs_modify_freq_policy_min(cpu, min_freq);
+	char value[SYSFS_PATH_MAX];
+
+	snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
+					     value, strlen(value));
 }
 
 
 int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq)
 {
-	return sysfs_modify_freq_policy_max(cpu, max_freq);
-}
+	char value[SYSFS_PATH_MAX];
 
+	snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+					     value, strlen(value));
+}
 
 int cpufreq_modify_policy_governor(unsigned int cpu, char *governor)
 {
+	char new_gov[SYSFS_PATH_MAX];
+
 	if ((!governor) || (strlen(governor) > 19))
 		return -EINVAL;
 
-	return sysfs_modify_freq_policy_governor(cpu, governor);
+	if (verify_gov(new_gov, governor))
+		return -EINVAL;
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+					     new_gov, strlen(new_gov));
 }
 
 int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency)
 {
-	return sysfs_set_frequency(cpu, target_frequency);
+	struct cpufreq_policy *pol = cpufreq_get_policy(cpu);
+	char userspace_gov[] = "userspace";
+	char freq[SYSFS_PATH_MAX];
+	int ret;
+
+	if (!pol)
+		return -ENODEV;
+
+	if (strncmp(pol->governor, userspace_gov, 9) != 0) {
+		ret = cpufreq_modify_policy_governor(cpu, userspace_gov);
+		if (ret) {
+			cpufreq_put_policy(pol);
+			return ret;
+		}
+	}
+
+	cpufreq_put_policy(pol);
+
+	snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
+
+	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
+					     freq, strlen(freq));
 }
 
 struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
 					unsigned long long *total_time)
 {
-	return sysfs_get_freq_stats(cpu, total_time);
+	struct cpufreq_stats *first = NULL;
+	struct cpufreq_stats *current = NULL;
+	char one_value[SYSFS_PATH_MAX];
+	char linebuf[MAX_LINE_LEN];
+	unsigned int pos, i;
+	unsigned int len;
+
+	len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	*total_time = 0;
+	pos = 0;
+	for (i = 0; i < len; i++) {
+		if (i == strlen(linebuf) || linebuf[i] == '\n')	{
+			if (i - pos < 2)
+				continue;
+			if ((i - pos) >= SYSFS_PATH_MAX)
+				goto error_out;
+			if (current) {
+				current->next = malloc(sizeof(*current));
+				if (!current->next)
+					goto error_out;
+				current = current->next;
+			} else {
+				first = malloc(sizeof(*first));
+				if (!first)
+					goto error_out;
+				current = first;
+			}
+			current->first = first;
+			current->next = NULL;
+
+			memcpy(one_value, linebuf + pos, i - pos);
+			one_value[i - pos] = '\0';
+			if (sscanf(one_value, "%lu %llu",
+					&current->frequency,
+					&current->time_in_state) != 2)
+				goto error_out;
+
+			*total_time = *total_time + current->time_in_state;
+			pos = i + 1;
+		}
+	}
+
+	return first;
+
+ error_out:
+	while (first) {
+		current = first->next;
+		free(first);
+		first = current;
+	}
+	return NULL;
 }
 
 void cpufreq_put_stats(struct cpufreq_stats *any)
@@ -204,5 +712,5 @@
 
 unsigned long cpufreq_get_transitions(unsigned int cpu)
 {
-	return sysfs_get_freq_transitions(cpu);
+	return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
 }

diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 3aae8e7..3b005c3 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h

@@ -17,8 +17,8 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#ifndef _CPUFREQ_H
-#define _CPUFREQ_H 1
+#ifndef __CPUPOWER_CPUFREQ_H__
+#define __CPUPOWER_CPUFREQ_H__
 
 struct cpufreq_policy {
 	unsigned long min;
@@ -58,13 +58,6 @@
 extern "C" {
 #endif
 
-/*
- * returns 0 if the specified CPU is present (it doesn't say
- * whether it is online!), and an error value if not.
- */
-
-extern int cpufreq_cpu_exists(unsigned int cpu);
-
 /* determine current CPU frequency
  * - _kernel variant means kernel's opinion of CPU frequency
  * - _hardware variant means actual hardware CPU frequency,
@@ -73,9 +66,9 @@
  * returns 0 on failure, else frequency in kHz.
  */
 
-extern unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
 
-extern unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
 
 #define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
 
@@ -84,7 +77,7 @@
  *
  * returns 0 on failure, else transition latency in 10^(-9) s = nanoseconds
  */
-extern unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
 
 
 /* determine hardware CPU frequency limits
@@ -93,7 +86,7 @@
  * considerations by cpufreq policy notifiers in the kernel.
  */
 
-extern int cpufreq_get_hardware_limits(unsigned int cpu,
+int cpufreq_get_hardware_limits(unsigned int cpu,
 				unsigned long *min,
 				unsigned long *max);
 
@@ -104,9 +97,9 @@
  * to avoid memory leakage, please.
  */
 
-extern char *cpufreq_get_driver(unsigned int cpu);
+char *cpufreq_get_driver(unsigned int cpu);
 
-extern void cpufreq_put_driver(char *ptr);
+void cpufreq_put_driver(char *ptr);
 
 
 /* determine CPUfreq policy currently used
@@ -116,9 +109,9 @@
  */
 
 
-extern struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
 
-extern void cpufreq_put_policy(struct cpufreq_policy *policy);
+void cpufreq_put_policy(struct cpufreq_policy *policy);
 
 
 /* determine CPUfreq governors currently available
@@ -129,10 +122,10 @@
  */
 
 
-extern struct cpufreq_available_governors
+struct cpufreq_available_governors
 *cpufreq_get_available_governors(unsigned int cpu);
 
-extern void cpufreq_put_available_governors(
+void cpufreq_put_available_governors(
 	struct cpufreq_available_governors *first);
 
 
@@ -143,10 +136,10 @@
  * cpufreq_put_available_frequencies after use.
  */
 
-extern struct cpufreq_available_frequencies
+struct cpufreq_available_frequencies
 *cpufreq_get_available_frequencies(unsigned int cpu);
 
-extern void cpufreq_put_available_frequencies(
+void cpufreq_put_available_frequencies(
 		struct cpufreq_available_frequencies *first);
 
 
@@ -156,10 +149,10 @@
  * to avoid memory leakage, please.
  */
 
-extern struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
 							int cpu);
 
-extern void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
 
 
 /* determine related CPUs
@@ -168,10 +161,10 @@
  * to avoid memory leakage, please.
  */
 
-extern struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
 							int cpu);
 
-extern void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
 
 
 /* determine stats for cpufreq subsystem
@@ -179,12 +172,12 @@
  * This is not available in all kernel versions or configurations.
  */
 
-extern struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
 					unsigned long long *total_time);
 
-extern void cpufreq_put_stats(struct cpufreq_stats *stats);
+void cpufreq_put_stats(struct cpufreq_stats *stats);
 
-extern unsigned long cpufreq_get_transitions(unsigned int cpu);
+unsigned long cpufreq_get_transitions(unsigned int cpu);
 
 
 /* set new cpufreq policy
@@ -193,7 +186,7 @@
  * but results may differ depending e.g. on governors being available.
  */
 
-extern int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
 
 
 /* modify a policy by only changing min/max freq or governor
@@ -201,9 +194,9 @@
  * Does not check whether result is what was intended.
  */
 
-extern int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
-extern int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
-extern int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
 
 
 /* set a specific frequency
@@ -213,7 +206,7 @@
  * occurs. Also does not work on ->range() cpufreq drivers.
  */
 
-extern int cpufreq_set_frequency(unsigned int cpu,
+int cpufreq_set_frequency(unsigned int cpu,
 				unsigned long target_frequency);
 
 #ifdef __cplusplus

diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
new file mode 100644
index 0000000..9bd4c76
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.c

@@ -0,0 +1,380 @@
+/*
+ *  (C) 2004-2009  Dominik Brodowski <linux@dominikbrodowski.de>
+ *  (C) 2011       Thomas Renninger <trenn@novell.com> Novell Inc.
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpuidle.h"
+#include "cpupower_intern.h"
+
+/*
+ * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir
+ * exists.
+ * For example the functionality to disable c-states was introduced in later
+ * kernel versions, this function can be used to explicitly check for this
+ * feature.
+ *
+ * returns 1 if the file exists, 0 otherwise.
+ */
+static
+unsigned int cpuidle_state_file_exists(unsigned int cpu,
+				       unsigned int idlestate,
+				       const char *fname)
+{
+	char path[SYSFS_PATH_MAX];
+	struct stat statbuf;
+
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+		 cpu, idlestate, fname);
+	if (stat(path, &statbuf) != 0)
+		return 0;
+	return 1;
+}
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpuX/cpuidle/stateX/" dir
+ * cstates starting with 0, C0 is not counted as cstate.
+ * This means if you want C1 info, pass 0 as idlestate param
+ */
+static
+unsigned int cpuidle_state_read_file(unsigned int cpu,
+					    unsigned int idlestate,
+					    const char *fname, char *buf,
+					    size_t buflen)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numread;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+		 cpu, idlestate, fname);
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+/*
+ * helper function to write a new value to a /sys file
+ * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir
+ *
+ * Returns the number of bytes written or 0 on error
+ */
+static
+unsigned int cpuidle_state_write_file(unsigned int cpu,
+				      unsigned int idlestate,
+				      const char *fname,
+				      const char *value, size_t len)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numwrite;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+		 cpu, idlestate, fname);
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwrite = write(fd, value, len);
+	if (numwrite < 1) {
+		close(fd);
+		return 0;
+	}
+
+	close(fd);
+
+	return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum idlestate_value {
+	IDLESTATE_USAGE,
+	IDLESTATE_POWER,
+	IDLESTATE_LATENCY,
+	IDLESTATE_TIME,
+	IDLESTATE_DISABLE,
+	MAX_IDLESTATE_VALUE_FILES
+};
+
+static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
+	[IDLESTATE_USAGE] = "usage",
+	[IDLESTATE_POWER] = "power",
+	[IDLESTATE_LATENCY] = "latency",
+	[IDLESTATE_TIME]  = "time",
+	[IDLESTATE_DISABLE]  = "disable",
+};
+
+static
+unsigned long long cpuidle_state_get_one_value(unsigned int cpu,
+					       unsigned int idlestate,
+					       enum idlestate_value which)
+{
+	unsigned long long value;
+	unsigned int len;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+
+	if (which >= MAX_IDLESTATE_VALUE_FILES)
+		return 0;
+
+	len = cpuidle_state_read_file(cpu, idlestate,
+				      idlestate_value_files[which],
+				      linebuf, sizeof(linebuf));
+	if (len == 0)
+		return 0;
+
+	value = strtoull(linebuf, &endp, 0);
+
+	if (endp == linebuf || errno == ERANGE)
+		return 0;
+
+	return value;
+}
+
+/* read access to files which contain one string */
+
+enum idlestate_string {
+	IDLESTATE_DESC,
+	IDLESTATE_NAME,
+	MAX_IDLESTATE_STRING_FILES
+};
+
+static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = {
+	[IDLESTATE_DESC] = "desc",
+	[IDLESTATE_NAME] = "name",
+};
+
+
+static char *cpuidle_state_get_one_string(unsigned int cpu,
+					unsigned int idlestate,
+					enum idlestate_string which)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *result;
+	unsigned int len;
+
+	if (which >= MAX_IDLESTATE_STRING_FILES)
+		return NULL;
+
+	len = cpuidle_state_read_file(cpu, idlestate,
+				      idlestate_string_files[which],
+				      linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	result = strdup(linebuf);
+	if (result == NULL)
+		return NULL;
+
+	if (result[strlen(result) - 1] == '\n')
+		result[strlen(result) - 1] = '\0';
+
+	return result;
+}
+
+/*
+ * Returns:
+ *    1  if disabled
+ *    0  if enabled
+ *    -1 if idlestate is not available
+ *    -2 if disabling is not supported by the kernel
+ */
+int cpuidle_is_state_disabled(unsigned int cpu,
+				unsigned int idlestate)
+{
+	if (cpuidle_state_count(cpu) <= idlestate)
+		return -1;
+
+	if (!cpuidle_state_file_exists(cpu, idlestate,
+				 idlestate_value_files[IDLESTATE_DISABLE]))
+		return -2;
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_DISABLE);
+}
+
+/*
+ * Pass 1 as last argument to disable or 0 to enable the state
+ * Returns:
+ *    0  on success
+ *    negative values on error, for example:
+ *      -1 if idlestate is not available
+ *      -2 if disabling is not supported by the kernel
+ *      -3 No write access to disable/enable C-states
+ */
+int cpuidle_state_disable(unsigned int cpu,
+			    unsigned int idlestate,
+			    unsigned int disable)
+{
+	char value[SYSFS_PATH_MAX];
+	int bytes_written;
+
+	if (cpuidle_state_count(cpu) <= idlestate)
+		return -1;
+
+	if (!cpuidle_state_file_exists(cpu, idlestate,
+				 idlestate_value_files[IDLESTATE_DISABLE]))
+		return -2;
+
+	snprintf(value, SYSFS_PATH_MAX, "%u", disable);
+
+	bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable",
+						   value, sizeof(disable));
+	if (bytes_written)
+		return 0;
+	return -3;
+}
+
+unsigned long cpuidle_state_latency(unsigned int cpu,
+					  unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
+}
+
+unsigned long cpuidle_state_usage(unsigned int cpu,
+					unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_USAGE);
+}
+
+unsigned long long cpuidle_state_time(unsigned int cpu,
+					unsigned int idlestate)
+{
+	return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_TIME);
+}
+
+char *cpuidle_state_name(unsigned int cpu, unsigned int idlestate)
+{
+	return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_NAME);
+}
+
+char *cpuidle_state_desc(unsigned int cpu, unsigned int idlestate)
+{
+	return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_DESC);
+}
+
+/*
+ * Returns number of supported C-states of CPU core cpu
+ * Negativ in error case
+ * Zero if cpuidle does not export any C-states
+ */
+unsigned int cpuidle_state_count(unsigned int cpu)
+{
+	char file[SYSFS_PATH_MAX];
+	struct stat statbuf;
+	int idlestates = 1;
+
+
+	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle");
+	if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+		return 0;
+
+	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu);
+	if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+		return 0;
+
+	while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) {
+		snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU
+			 "cpu%u/cpuidle/state%d", cpu, idlestates);
+		idlestates++;
+	}
+	idlestates--;
+	return idlestates;
+}
+
+/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpu/cpuidle/" dir
+ */
+static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
+					    size_t buflen)
+{
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
+
+	return sysfs_read_file(path, buf, buflen);
+}
+
+
+
+/* read access to files which contain one string */
+
+enum cpuidle_string {
+	CPUIDLE_GOVERNOR,
+	CPUIDLE_GOVERNOR_RO,
+	CPUIDLE_DRIVER,
+	MAX_CPUIDLE_STRING_FILES
+};
+
+static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = {
+	[CPUIDLE_GOVERNOR]	= "current_governor",
+	[CPUIDLE_GOVERNOR_RO]	= "current_governor_ro",
+	[CPUIDLE_DRIVER]	= "current_driver",
+};
+
+
+static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *result;
+	unsigned int len;
+
+	if (which >= MAX_CPUIDLE_STRING_FILES)
+		return NULL;
+
+	len = sysfs_cpuidle_read_file(cpuidle_string_files[which],
+				linebuf, sizeof(linebuf));
+	if (len == 0)
+		return NULL;
+
+	result = strdup(linebuf);
+	if (result == NULL)
+		return NULL;
+
+	if (result[strlen(result) - 1] == '\n')
+		result[strlen(result) - 1] = '\0';
+
+	return result;
+}
+
+char *cpuidle_get_governor(void)
+{
+	char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO);
+	if (!tmp)
+		return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR);
+	else
+		return tmp;
+}
+
+char *cpuidle_get_driver(void)
+{
+	return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER);
+}
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */

diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
new file mode 100644
index 0000000..04eb3cf
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.h

@@ -0,0 +1,23 @@
+#ifndef __CPUPOWER_CPUIDLE_H__
+#define __CPUPOWER_CPUIDLE_H__
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+				       unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+				   unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+						unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+					unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+						unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+				unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+				unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+char *cpuidle_get_driver(void);
+
+#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */

diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
new file mode 100644
index 0000000..9c395ec9
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.c

@@ -0,0 +1,192 @@
+/*
+ *  (C) 2004-2009  Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ *  Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "cpupower.h"
+#include "cpupower_intern.h"
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
+{
+	int fd;
+	ssize_t numread;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ *     1 -> if CPU is online
+ *     0 -> if CPU is offline
+ *     negative errno values in error case
+ */
+int cpupower_is_cpu_online(unsigned int cpu)
+{
+	char path[SYSFS_PATH_MAX];
+	int fd;
+	ssize_t numread;
+	unsigned long long value;
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+	struct stat statbuf;
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+	if (stat(path, &statbuf) != 0)
+		return 0;
+
+	/*
+	 * kernel without CONFIG_HOTPLUG_CPU
+	 * -> cpuX directory exists, but not cpuX/online file
+	 */
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+	if (stat(path, &statbuf) != 0)
+		return 1;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return -errno;
+
+	numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+	if (numread < 1) {
+		close(fd);
+		return -EIO;
+	}
+	linebuf[numread] = '\0';
+	close(fd);
+
+	value = strtoull(linebuf, &endp, 0);
+	if (value > 1)
+		return -EINVAL;
+
+	return value;
+}
+
+/* returns -1 on failure, 0 on success */
+static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
+{
+	char linebuf[MAX_LINE_LEN];
+	char *endp;
+	char path[SYSFS_PATH_MAX];
+
+	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+			 cpu, fname);
+	if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
+		return -1;
+	*result = strtol(linebuf, &endp, 0);
+	if (endp == linebuf || errno == ERANGE)
+		return -1;
+	return 0;
+}
+
+static int __compare(const void *t1, const void *t2)
+{
+	struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+	struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+	if (top1->pkg < top2->pkg)
+		return -1;
+	else if (top1->pkg > top2->pkg)
+		return 1;
+	else if (top1->core < top2->core)
+		return -1;
+	else if (top1->core > top2->core)
+		return 1;
+	else if (top1->cpu < top2->cpu)
+		return -1;
+	else if (top1->cpu > top2->cpu)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * Returns amount of cpus, negative on error, cpu_top must be
+ * passed to cpu_topology_release to free resources
+ *
+ * Array is sorted after ->pkg, ->core, then ->cpu
+ */
+int get_cpu_topology(struct cpupower_topology *cpu_top)
+{
+	int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+	cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
+	if (cpu_top->core_info == NULL)
+		return -ENOMEM;
+	cpu_top->pkgs = cpu_top->cores = 0;
+	for (cpu = 0; cpu < cpus; cpu++) {
+		cpu_top->core_info[cpu].cpu = cpu;
+		cpu_top->core_info[cpu].is_online = cpupower_is_cpu_online(cpu);
+		if(sysfs_topology_read_file(
+			cpu,
+			"physical_package_id",
+			&(cpu_top->core_info[cpu].pkg)) < 0) {
+			cpu_top->core_info[cpu].pkg = -1;
+			cpu_top->core_info[cpu].core = -1;
+			continue;
+		}
+		if(sysfs_topology_read_file(
+			cpu,
+			"core_id",
+			&(cpu_top->core_info[cpu].core)) < 0) {
+			cpu_top->core_info[cpu].pkg = -1;
+			cpu_top->core_info[cpu].core = -1;
+			continue;
+		}
+	}
+
+	qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+	      __compare);
+
+	/* Count the number of distinct pkgs values. This works
+	   because the primary sort of the core_info struct was just
+	   done by pkg value. */
+	last_pkg = cpu_top->core_info[0].pkg;
+	for(cpu = 1; cpu < cpus; cpu++) {
+		if (cpu_top->core_info[cpu].pkg != last_pkg &&
+				cpu_top->core_info[cpu].pkg != -1) {
+
+			last_pkg = cpu_top->core_info[cpu].pkg;
+			cpu_top->pkgs++;
+		}
+	}
+	if (!(cpu_top->core_info[0].pkg == -1))
+		cpu_top->pkgs++;
+
+	/* Intel's cores count is not consecutively numbered, there may
+	 * be a core_id of 3, but none of 2. Assume there always is 0
+	 * Get amount of cores by counting duplicates in a package
+	for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
+		if (cpu_top->core_info[cpu].core == 0)
+	cpu_top->cores++;
+	*/
+	return cpus;
+}
+
+void cpu_topology_release(struct cpupower_topology cpu_top)
+{
+	free(cpu_top.core_info);
+}

diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
new file mode 100644
index 0000000..fa031fc
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.h

@@ -0,0 +1,35 @@
+#ifndef __CPUPOWER_CPUPOWER_H__
+#define __CPUPOWER_CPUPOWER_H__
+
+struct cpupower_topology {
+	/* Amount of CPU cores, packages and threads per core in the system */
+	unsigned int cores;
+	unsigned int pkgs;
+	unsigned int threads; /* per core */
+
+	/* Array gets mallocated with cores entries, holding per core info */
+	struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+	int pkg;
+	int core;
+	int cpu;
+
+	/* flags */
+	unsigned int is_online:1;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+void cpu_topology_release(struct cpupower_topology cpu_top);
+int cpupower_is_cpu_online(unsigned int cpu);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
new file mode 100644
index 0000000..f8ec400
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower_intern.h

@@ -0,0 +1,5 @@
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);

diff --git a/tools/power/cpupower/lib/sysfs.c b/tools/power/cpupower/lib/sysfs.c
deleted file mode 100644
index 870713a..0000000
--- a/tools/power/cpupower/lib/sysfs.c
+++ /dev/null

@@ -1,672 +0,0 @@
-/*
- *  (C) 2004-2009  Dominik Brodowski <linux@dominikbrodowski.de>
- *
- *  Licensed under the terms of the GNU GPL License version 2.
- */
-
-#include <stdio.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "cpufreq.h"
-
-#define PATH_TO_CPU "/sys/devices/system/cpu/"
-#define MAX_LINE_LEN 4096
-#define SYSFS_PATH_MAX 255
-
-
-static unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
-{
-	int fd;
-	ssize_t numread;
-
-	fd = open(path, O_RDONLY);
-	if (fd == -1)
-		return 0;
-
-	numread = read(fd, buf, buflen - 1);
-	if (numread < 1) {
-		close(fd);
-		return 0;
-	}
-
-	buf[numread] = '\0';
-	close(fd);
-
-	return (unsigned int) numread;
-}
-
-
-/* CPUFREQ sysfs access **************************************************/
-
-/* helper function to read file from /sys into given buffer */
-/* fname is a relative path under "cpuX/cpufreq" dir */
-static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
-					    char *buf, size_t buflen)
-{
-	char path[SYSFS_PATH_MAX];
-
-	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
-			 cpu, fname);
-	return sysfs_read_file(path, buf, buflen);
-}
-
-/* helper function to write a new value to a /sys file */
-/* fname is a relative path under "cpuX/cpufreq" dir */
-static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
-					     const char *fname,
-					     const char *value, size_t len)
-{
-	char path[SYSFS_PATH_MAX];
-	int fd;
-	ssize_t numwrite;
-
-	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
-			 cpu, fname);
-
-	fd = open(path, O_WRONLY);
-	if (fd == -1)
-		return 0;
-
-	numwrite = write(fd, value, len);
-	if (numwrite < 1) {
-		close(fd);
-		return 0;
-	}
-
-	close(fd);
-
-	return (unsigned int) numwrite;
-}
-
-/* read access to files which contain one numeric value */
-
-enum cpufreq_value {
-	CPUINFO_CUR_FREQ,
-	CPUINFO_MIN_FREQ,
-	CPUINFO_MAX_FREQ,
-	CPUINFO_LATENCY,
-	SCALING_CUR_FREQ,
-	SCALING_MIN_FREQ,
-	SCALING_MAX_FREQ,
-	STATS_NUM_TRANSITIONS,
-	MAX_CPUFREQ_VALUE_READ_FILES
-};
-
-static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
-	[CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
-	[CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
-	[CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
-	[CPUINFO_LATENCY]  = "cpuinfo_transition_latency",
-	[SCALING_CUR_FREQ] = "scaling_cur_freq",
-	[SCALING_MIN_FREQ] = "scaling_min_freq",
-	[SCALING_MAX_FREQ] = "scaling_max_freq",
-	[STATS_NUM_TRANSITIONS] = "stats/total_trans"
-};
-
-
-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
-						 enum cpufreq_value which)
-{
-	unsigned long value;
-	unsigned int len;
-	char linebuf[MAX_LINE_LEN];
-	char *endp;
-
-	if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
-		return 0;
-
-	len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
-				linebuf, sizeof(linebuf));
-
-	if (len == 0)
-		return 0;
-
-	value = strtoul(linebuf, &endp, 0);
-
-	if (endp == linebuf || errno == ERANGE)
-		return 0;
-
-	return value;
-}
-
-/* read access to files which contain one string */
-
-enum cpufreq_string {
-	SCALING_DRIVER,
-	SCALING_GOVERNOR,
-	MAX_CPUFREQ_STRING_FILES
-};
-
-static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
-	[SCALING_DRIVER] = "scaling_driver",
-	[SCALING_GOVERNOR] = "scaling_governor",
-};
-
-
-static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
-					   enum cpufreq_string which)
-{
-	char linebuf[MAX_LINE_LEN];
-	char *result;
-	unsigned int len;
-
-	if (which >= MAX_CPUFREQ_STRING_FILES)
-		return NULL;
-
-	len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	result = strdup(linebuf);
-	if (result == NULL)
-		return NULL;
-
-	if (result[strlen(result) - 1] == '\n')
-		result[strlen(result) - 1] = '\0';
-
-	return result;
-}
-
-/* write access */
-
-enum cpufreq_write {
-	WRITE_SCALING_MIN_FREQ,
-	WRITE_SCALING_MAX_FREQ,
-	WRITE_SCALING_GOVERNOR,
-	WRITE_SCALING_SET_SPEED,
-	MAX_CPUFREQ_WRITE_FILES
-};
-
-static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
-	[WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
-	[WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
-	[WRITE_SCALING_GOVERNOR] = "scaling_governor",
-	[WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
-};
-
-static int sysfs_cpufreq_write_one_value(unsigned int cpu,
-					 enum cpufreq_write which,
-					 const char *new_value, size_t len)
-{
-	if (which >= MAX_CPUFREQ_WRITE_FILES)
-		return 0;
-
-	if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
-					new_value, len) != len)
-		return -ENODEV;
-
-	return 0;
-};
-
-unsigned long sysfs_get_freq_kernel(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
-}
-
-unsigned long sysfs_get_freq_hardware(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
-}
-
-unsigned long sysfs_get_freq_transition_latency(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
-}
-
-int sysfs_get_freq_hardware_limits(unsigned int cpu,
-			      unsigned long *min,
-			      unsigned long *max)
-{
-	if ((!min) || (!max))
-		return -EINVAL;
-
-	*min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
-	if (!*min)
-		return -ENODEV;
-
-	*max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
-	if (!*max)
-		return -ENODEV;
-
-	return 0;
-}
-
-char *sysfs_get_freq_driver(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
-}
-
-struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-
-	policy = malloc(sizeof(struct cpufreq_policy));
-	if (!policy)
-		return NULL;
-
-	policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
-	if (!policy->governor) {
-		free(policy);
-		return NULL;
-	}
-	policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
-	policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
-	if ((!policy->min) || (!policy->max)) {
-		free(policy->governor);
-		free(policy);
-		return NULL;
-	}
-
-	return policy;
-}
-
-struct cpufreq_available_governors *
-sysfs_get_freq_available_governors(unsigned int cpu) {
-	struct cpufreq_available_governors *first = NULL;
-	struct cpufreq_available_governors *current = NULL;
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
-			if (i - pos < 2)
-				continue;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			current->governor = malloc(i - pos + 1);
-			if (!current->governor)
-				goto error_out;
-
-			memcpy(current->governor, linebuf + pos, i - pos);
-			current->governor[i - pos] = '\0';
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		if (first->governor)
-			free(first->governor);
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-
-struct cpufreq_available_frequencies *
-sysfs_get_available_frequencies(unsigned int cpu) {
-	struct cpufreq_available_frequencies *first = NULL;
-	struct cpufreq_available_frequencies *current = NULL;
-	char one_value[SYSFS_PATH_MAX];
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (linebuf[i] == ' ' || linebuf[i] == '\n') {
-			if (i - pos < 2)
-				continue;
-			if (i - pos >= SYSFS_PATH_MAX)
-				goto error_out;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			memcpy(one_value, linebuf + pos, i - pos);
-			one_value[i - pos] = '\0';
-			if (sscanf(one_value, "%lu", &current->frequency) != 1)
-				goto error_out;
-
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
-							const char *file)
-{
-	struct cpufreq_affected_cpus *first = NULL;
-	struct cpufreq_affected_cpus *current = NULL;
-	char one_value[SYSFS_PATH_MAX];
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
-			if (i - pos  < 1)
-				continue;
-			if (i - pos >= SYSFS_PATH_MAX)
-				goto error_out;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			memcpy(one_value, linebuf + pos, i - pos);
-			one_value[i - pos] = '\0';
-
-			if (sscanf(one_value, "%u", &current->cpu) != 1)
-				goto error_out;
-
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(unsigned int cpu)
-{
-	return sysfs_get_cpu_list(cpu, "affected_cpus");
-}
-
-struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(unsigned int cpu)
-{
-	return sysfs_get_cpu_list(cpu, "related_cpus");
-}
-
-struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu,
-					unsigned long long *total_time) {
-	struct cpufreq_stats *first = NULL;
-	struct cpufreq_stats *current = NULL;
-	char one_value[SYSFS_PATH_MAX];
-	char linebuf[MAX_LINE_LEN];
-	unsigned int pos, i;
-	unsigned int len;
-
-	len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
-				linebuf, sizeof(linebuf));
-	if (len == 0)
-		return NULL;
-
-	*total_time = 0;
-	pos = 0;
-	for (i = 0; i < len; i++) {
-		if (i == strlen(linebuf) || linebuf[i] == '\n')	{
-			if (i - pos < 2)
-				continue;
-			if ((i - pos) >= SYSFS_PATH_MAX)
-				goto error_out;
-			if (current) {
-				current->next = malloc(sizeof(*current));
-				if (!current->next)
-					goto error_out;
-				current = current->next;
-			} else {
-				first = malloc(sizeof(*first));
-				if (!first)
-					goto error_out;
-				current = first;
-			}
-			current->first = first;
-			current->next = NULL;
-
-			memcpy(one_value, linebuf + pos, i - pos);
-			one_value[i - pos] = '\0';
-			if (sscanf(one_value, "%lu %llu",
-					&current->frequency,
-					&current->time_in_state) != 2)
-				goto error_out;
-
-			*total_time = *total_time + current->time_in_state;
-			pos = i + 1;
-		}
-	}
-
-	return first;
-
- error_out:
-	while (first) {
-		current = first->next;
-		free(first);
-		first = current;
-	}
-	return NULL;
-}
-
-unsigned long sysfs_get_freq_transitions(unsigned int cpu)
-{
-	return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
-}
-
-static int verify_gov(char *new_gov, char *passed_gov)
-{
-	unsigned int i, j = 0;
-
-	if (!passed_gov || (strlen(passed_gov) > 19))
-		return -EINVAL;
-
-	strncpy(new_gov, passed_gov, 20);
-	for (i = 0; i < 20; i++) {
-		if (j) {
-			new_gov[i] = '\0';
-			continue;
-		}
-		if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
-			continue;
-
-		if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
-			continue;
-
-		if (new_gov[i] == '-')
-			continue;
-
-		if (new_gov[i] == '_')
-			continue;
-
-		if (new_gov[i] == '\0') {
-			j = 1;
-			continue;
-		}
-		return -EINVAL;
-	}
-	new_gov[19] = '\0';
-	return 0;
-}
-
-int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor)
-{
-	char new_gov[SYSFS_PATH_MAX];
-
-	if (!governor)
-		return -EINVAL;
-
-	if (verify_gov(new_gov, governor))
-		return -EINVAL;
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
-					     new_gov, strlen(new_gov));
-};
-
-int sysfs_modify_freq_policy_max(unsigned int cpu, unsigned long max_freq)
-{
-	char value[SYSFS_PATH_MAX];
-
-	snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
-					     value, strlen(value));
-};
-
-
-int sysfs_modify_freq_policy_min(unsigned int cpu, unsigned long min_freq)
-{
-	char value[SYSFS_PATH_MAX];
-
-	snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
-					     value, strlen(value));
-};
-
-
-int sysfs_set_freq_policy(unsigned int cpu, struct cpufreq_policy *policy)
-{
-	char min[SYSFS_PATH_MAX];
-	char max[SYSFS_PATH_MAX];
-	char gov[SYSFS_PATH_MAX];
-	int ret;
-	unsigned long old_min;
-	int write_max_first;
-
-	if (!policy || !(policy->governor))
-		return -EINVAL;
-
-	if (policy->max < policy->min)
-		return -EINVAL;
-
-	if (verify_gov(gov, policy->governor))
-		return -EINVAL;
-
-	snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
-	snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
-
-	old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
-	write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
-
-	if (write_max_first) {
-		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
-						    max, strlen(max));
-		if (ret)
-			return ret;
-	}
-
-	ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
-					    strlen(min));
-	if (ret)
-		return ret;
-
-	if (!write_max_first) {
-		ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
-						    max, strlen(max));
-		if (ret)
-			return ret;
-	}
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
-					     gov, strlen(gov));
-}
-
-int sysfs_set_frequency(unsigned int cpu, unsigned long target_frequency)
-{
-	struct cpufreq_policy *pol = sysfs_get_freq_policy(cpu);
-	char userspace_gov[] = "userspace";
-	char freq[SYSFS_PATH_MAX];
-	int ret;
-
-	if (!pol)
-		return -ENODEV;
-
-	if (strncmp(pol->governor, userspace_gov, 9) != 0) {
-		ret = sysfs_modify_freq_policy_governor(cpu, userspace_gov);
-		if (ret) {
-			cpufreq_put_policy(pol);
-			return ret;
-		}
-	}
-
-	cpufreq_put_policy(pol);
-
-	snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
-
-	return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
-					     freq, strlen(freq));
-}
-
-/* CPUFREQ sysfs access **************************************************/
-
-/* General sysfs access **************************************************/
-int sysfs_cpu_exists(unsigned int cpu)
-{
-	char file[SYSFS_PATH_MAX];
-	struct stat statbuf;
-
-	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/", cpu);
-
-	if (stat(file, &statbuf) != 0)
-		return -ENOSYS;
-
-	return S_ISDIR(statbuf.st_mode) ? 0 : -ENOSYS;
-}
-
-/* General sysfs access **************************************************/

diff --git a/tools/power/cpupower/lib/sysfs.h b/tools/power/cpupower/lib/sysfs.h
deleted file mode 100644
index c76a5e0..0000000
--- a/tools/power/cpupower/lib/sysfs.h
+++ /dev/null

@@ -1,31 +0,0 @@
-/* General */
-extern unsigned int sysfs_cpu_exists(unsigned int cpu);
-
-/* CPUfreq */
-extern unsigned long sysfs_get_freq_kernel(unsigned int cpu);
-extern unsigned long sysfs_get_freq_hardware(unsigned int cpu);
-extern unsigned long sysfs_get_freq_transition_latency(unsigned int cpu);
-extern int sysfs_get_freq_hardware_limits(unsigned int cpu,
-					unsigned long *min, unsigned long *max);
-extern char *sysfs_get_freq_driver(unsigned int cpu);
-extern struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu);
-extern struct cpufreq_available_governors *sysfs_get_freq_available_governors(
-	unsigned int cpu);
-extern struct cpufreq_available_frequencies *sysfs_get_available_frequencies(
-	unsigned int cpu);
-extern struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(
-	unsigned int cpu);
-extern struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(
-	unsigned int cpu);
-extern struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu,
-						unsigned long long *total_time);
-extern unsigned long sysfs_get_freq_transitions(unsigned int cpu);
-extern int sysfs_set_freq_policy(unsigned int cpu,
-				struct cpufreq_policy *policy);
-extern int sysfs_modify_freq_policy_min(unsigned int cpu,
-					unsigned long min_freq);
-extern int sysfs_modify_freq_policy_max(unsigned int cpu,
-					unsigned long max_freq);
-extern int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor);
-extern int sysfs_set_frequency(unsigned int cpu,
-			unsigned long target_frequency);

diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 9c85a38..6aa8d23 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1

@@ -1,7 +1,7 @@
 .TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP 
-cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
+cpupower\-frequency\-info \- Utility to retrieve cpufreq kernel information
 .SH "SYNTAX"
 .LP 
 cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]

diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1
index 3eacc8d..b505702 100644
--- a/tools/power/cpupower/man/cpupower-frequency-set.1
+++ b/tools/power/cpupower/man/cpupower-frequency-set.1

@@ -1,7 +1,7 @@
 .TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP 
-cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
+cpupower\-frequency\-set \- A small tool which allows to modify cpufreq settings.
 .SH "SYNTAX"
 .LP 
 cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]

diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
index 7b3646a..80a1311 100644
--- a/tools/power/cpupower/man/cpupower-idle-info.1
+++ b/tools/power/cpupower/man/cpupower-idle-info.1

@@ -1,7 +1,7 @@
 .TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP
-cpupower idle\-info \- Utility to retrieve cpu idle kernel information
+cpupower\-idle\-info \- Utility to retrieve cpu idle kernel information
 .SH "SYNTAX"
 .LP
 cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]

diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
index 580c4e3..21916cf 100644
--- a/tools/power/cpupower/man/cpupower-idle-set.1
+++ b/tools/power/cpupower/man/cpupower-idle-set.1

@@ -1,7 +1,7 @@
 .TH "CPUPOWER-IDLE-SET" "1" "0.1" "" "cpupower Manual"
 .SH "NAME"
 .LP
-cpupower idle\-set \- Utility to set cpu idle state specific kernel options
+cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
 .SH "SYNTAX"
 .LP
 cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]

diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 0fbd1a2..b4bf769 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c

@@ -16,8 +16,8 @@
 #include <getopt.h>
 
 #include "cpufreq.h"
+#include "cpuidle.h"
 #include "helpers/helpers.h"
-#include "helpers/sysfs.h"
 
 #define NORM_FREQ_LEN 32
 
@@ -296,7 +296,7 @@
 			struct cpufreq_affected_cpus *cpus;
 
 			if (!bitmask_isbitset(cpus_chosen, cpu) ||
-			    cpufreq_cpu_exists(cpu))
+			    cpupower_is_cpu_online(cpu))
 				continue;
 
 			cpus = cpufreq_get_related_cpus(cpu);
@@ -316,10 +316,10 @@
 	     cpu <= bitmask_last(cpus_chosen); cpu++) {
 
 		if (!bitmask_isbitset(cpus_chosen, cpu) ||
-		    cpufreq_cpu_exists(cpu))
+		    cpupower_is_cpu_online(cpu))
 			continue;
 
-		if (sysfs_is_cpu_online(cpu) != 1)
+		if (cpupower_is_cpu_online(cpu) != 1)
 			continue;
 
 		printf(_("Setting cpu: %d\n"), cpu);

diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 8bf8ab5..b59c85d 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c

@@ -13,8 +13,10 @@
 #include <string.h>
 #include <getopt.h>
 
-#include "helpers/helpers.h"
+#include <cpuidle.h>
+
 #include "helpers/sysfs.h"
+#include "helpers/helpers.h"
 #include "helpers/bitmask.h"
 
 #define LINE_LEN 10
@@ -24,7 +26,7 @@
 	unsigned int idlestates, idlestate;
 	char *tmp;
 
-	idlestates = sysfs_get_idlestate_count(cpu);
+	idlestates = cpuidle_state_count(cpu);
 	if (idlestates == 0) {
 		printf(_("CPU %u: No idle states\n"), cpu);
 		return;
@@ -33,7 +35,7 @@
 	printf(_("Number of idle states: %d\n"), idlestates);
 	printf(_("Available idle states:"));
 	for (idlestate = 0; idlestate < idlestates; idlestate++) {
-		tmp = sysfs_get_idlestate_name(cpu, idlestate);
+		tmp = cpuidle_state_name(cpu, idlestate);
 		if (!tmp)
 			continue;
 		printf(" %s", tmp);
@@ -45,28 +47,28 @@
 		return;
 
 	for (idlestate = 0; idlestate < idlestates; idlestate++) {
-		int disabled = sysfs_is_idlestate_disabled(cpu, idlestate);
+		int disabled = cpuidle_is_state_disabled(cpu, idlestate);
 		/* Disabled interface not supported on older kernels */
 		if (disabled < 0)
 			disabled = 0;
-		tmp = sysfs_get_idlestate_name(cpu, idlestate);
+		tmp = cpuidle_state_name(cpu, idlestate);
 		if (!tmp)
 			continue;
 		printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : "");
 		free(tmp);
 
-		tmp = sysfs_get_idlestate_desc(cpu, idlestate);
+		tmp = cpuidle_state_desc(cpu, idlestate);
 		if (!tmp)
 			continue;
 		printf(_("Flags/Description: %s\n"), tmp);
 		free(tmp);
 
 		printf(_("Latency: %lu\n"),
-		       sysfs_get_idlestate_latency(cpu, idlestate));
+		       cpuidle_state_latency(cpu, idlestate));
 		printf(_("Usage: %lu\n"),
-		       sysfs_get_idlestate_usage(cpu, idlestate));
+		       cpuidle_state_usage(cpu, idlestate));
 		printf(_("Duration: %llu\n"),
-		       sysfs_get_idlestate_time(cpu, idlestate));
+		       cpuidle_state_time(cpu, idlestate));
 	}
 }
 
@@ -74,7 +76,7 @@
 {
 	char *tmp;
 
-	tmp = sysfs_get_cpuidle_driver();
+	tmp = cpuidle_get_driver();
 	if (!tmp) {
 		printf(_("Could not determine cpuidle driver\n"));
 		return;
@@ -83,7 +85,7 @@
 	printf(_("CPUidle driver: %s\n"), tmp);
 	free(tmp);
 
-	tmp = sysfs_get_cpuidle_governor();
+	tmp = cpuidle_get_governor();
 	if (!tmp) {
 		printf(_("Could not determine cpuidle governor\n"));
 		return;
@@ -98,7 +100,7 @@
 	long max_allowed_cstate = 2000000000;
 	unsigned int cstate, cstates;
 
-	cstates = sysfs_get_idlestate_count(cpu);
+	cstates = cpuidle_state_count(cpu);
 	if (cstates == 0) {
 		printf(_("CPU %u: No C-states info\n"), cpu);
 		return;
@@ -113,11 +115,11 @@
 			 "type[C%d] "), cstate, cstate);
 		printf(_("promotion[--] demotion[--] "));
 		printf(_("latency[%03lu] "),
-		       sysfs_get_idlestate_latency(cpu, cstate));
+		       cpuidle_state_latency(cpu, cstate));
 		printf(_("usage[%08lu] "),
-		       sysfs_get_idlestate_usage(cpu, cstate));
+		       cpuidle_state_usage(cpu, cstate));
 		printf(_("duration[%020Lu] \n"),
-		       sysfs_get_idlestate_time(cpu, cstate));
+		       cpuidle_state_time(cpu, cstate));
 	}
 }
 

diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index d6b6ae4..691c24d 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c

@@ -5,12 +5,12 @@
 #include <limits.h>
 #include <string.h>
 #include <ctype.h>
-
 #include <getopt.h>
 
-#include "cpufreq.h"
+#include <cpufreq.h>
+#include <cpuidle.h>
+
 #include "helpers/helpers.h"
-#include "helpers/sysfs.h"
 
 static struct option info_opts[] = {
      {"disable",	required_argument,		NULL, 'd'},
@@ -104,16 +104,16 @@
 		if (!bitmask_isbitset(cpus_chosen, cpu))
 			continue;
 
-		if (sysfs_is_cpu_online(cpu) != 1)
+		if (cpupower_is_cpu_online(cpu) != 1)
 			continue;
 
-		idlestates = sysfs_get_idlestate_count(cpu);
+		idlestates = cpuidle_state_count(cpu);
 		if (idlestates <= 0)
 			continue;
 
 		switch (param) {
 		case 'd':
-			ret = sysfs_idlestate_disable(cpu, idlestate, 1);
+			ret = cpuidle_state_disable(cpu, idlestate, 1);
 			if (ret == 0)
 		printf(_("Idlestate %u disabled on CPU %u\n"),  idlestate, cpu);
 			else if (ret == -1)
@@ -126,7 +126,7 @@
 		       idlestate, cpu);
 			break;
 		case 'e':
-			ret = sysfs_idlestate_disable(cpu, idlestate, 0);
+			ret = cpuidle_state_disable(cpu, idlestate, 0);
 			if (ret == 0)
 		printf(_("Idlestate %u enabled on CPU %u\n"),  idlestate, cpu);
 			else if (ret == -1)
@@ -140,13 +140,13 @@
 			break;
 		case 'D':
 			for (idlestate = 0; idlestate < idlestates; idlestate++) {
-				disabled = sysfs_is_idlestate_disabled
+				disabled = cpuidle_is_state_disabled
 					(cpu, idlestate);
-				state_latency = sysfs_get_idlestate_latency
+				state_latency = cpuidle_state_latency
 					(cpu, idlestate);
 				if (disabled == 1) {
 					if (latency > state_latency){
-						ret = sysfs_idlestate_disable
+						ret = cpuidle_state_disable
 							(cpu, idlestate, 0);
 						if (ret == 0)
 		printf(_("Idlestate %u enabled on CPU %u\n"),  idlestate, cpu);
@@ -154,7 +154,7 @@
 					continue;
 				}
 				if (latency <= state_latency){
-					ret = sysfs_idlestate_disable
+					ret = cpuidle_state_disable
 						(cpu, idlestate, 1);
 					if (ret == 0)
 		printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
@@ -163,10 +163,10 @@
 			break;
 		case 'E':
 			for (idlestate = 0; idlestate < idlestates; idlestate++) {
-				disabled = sysfs_is_idlestate_disabled
+				disabled = cpuidle_is_state_disabled
 					(cpu, idlestate);
 				if (disabled == 1) {
-					ret = sysfs_idlestate_disable
+					ret = cpuidle_state_disable
 						(cpu, idlestate, 0);
 					if (ret == 0)
 		printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);

diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index aa9e954..afb66f8 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h

@@ -14,6 +14,7 @@
 #include <locale.h>
 
 #include "helpers/bitmask.h"
+#include <cpupower.h>
 
 /* Internationalization ****************************/
 #ifdef NLS
@@ -92,31 +93,6 @@
 extern struct cpupower_cpu_info cpupower_cpu_info;
 /* cpuid and cpuinfo helpers  **************************/
 
-struct cpuid_core_info {
-	int pkg;
-	int core;
-	int cpu;
-
-	/* flags */
-	unsigned int is_online:1;
-};
-
-/* CPU topology/hierarchy parsing ******************/
-struct cpupower_topology {
-	/* Amount of CPU cores, packages and threads per core in the system */
-	unsigned int cores;
-	unsigned int pkgs;
-	unsigned int threads; /* per core */
-
-	/* Array gets mallocated with cores entries, holding per core info */
-	struct cpuid_core_info *core_info;
-};
-
-extern int get_cpu_topology(struct cpupower_topology *cpu_top);
-extern void cpu_topology_release(struct cpupower_topology cpu_top);
-
-/* CPU topology/hierarchy parsing ******************/
-
 /* X86 ONLY ****************************************/
 #if defined(__i386__) || defined(__x86_64__)
 

diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
index 5f9c908..a1a6c60 100644
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c

@@ -16,110 +16,7 @@
 #include <errno.h>
 #include <fcntl.h>
 
-#include <helpers/helpers.h>
-#include <helpers/sysfs.h>
+#include <cpuidle.h>
 
-/* returns -1 on failure, 0 on success */
-static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
-{
-	char linebuf[MAX_LINE_LEN];
-	char *endp;
-	char path[SYSFS_PATH_MAX];
+/* CPU topology/hierarchy parsing ******************/
 
-	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
-			 cpu, fname);
-	if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
-		return -1;
-	*result = strtol(linebuf, &endp, 0);
-	if (endp == linebuf || errno == ERANGE)
-		return -1;
-	return 0;
-}
-
-static int __compare(const void *t1, const void *t2)
-{
-	struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
-	struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
-	if (top1->pkg < top2->pkg)
-		return -1;
-	else if (top1->pkg > top2->pkg)
-		return 1;
-	else if (top1->core < top2->core)
-		return -1;
-	else if (top1->core > top2->core)
-		return 1;
-	else if (top1->cpu < top2->cpu)
-		return -1;
-	else if (top1->cpu > top2->cpu)
-		return 1;
-	else
-		return 0;
-}
-
-/*
- * Returns amount of cpus, negative on error, cpu_top must be
- * passed to cpu_topology_release to free resources
- *
- * Array is sorted after ->pkg, ->core, then ->cpu
- */
-int get_cpu_topology(struct cpupower_topology *cpu_top)
-{
-	int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
-
-	cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
-	if (cpu_top->core_info == NULL)
-		return -ENOMEM;
-	cpu_top->pkgs = cpu_top->cores = 0;
-	for (cpu = 0; cpu < cpus; cpu++) {
-		cpu_top->core_info[cpu].cpu = cpu;
-		cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
-		if(sysfs_topology_read_file(
-			cpu,
-			"physical_package_id",
-			&(cpu_top->core_info[cpu].pkg)) < 0) {
-			cpu_top->core_info[cpu].pkg = -1;
-			cpu_top->core_info[cpu].core = -1;
-			continue;
-		}
-		if(sysfs_topology_read_file(
-			cpu,
-			"core_id",
-			&(cpu_top->core_info[cpu].core)) < 0) {
-			cpu_top->core_info[cpu].pkg = -1;
-			cpu_top->core_info[cpu].core = -1;
-			continue;
-		}
-	}
-
-	qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
-	      __compare);
-
-	/* Count the number of distinct pkgs values. This works
-	   because the primary sort of the core_info struct was just
-	   done by pkg value. */
-	last_pkg = cpu_top->core_info[0].pkg;
-	for(cpu = 1; cpu < cpus; cpu++) {
-		if (cpu_top->core_info[cpu].pkg != last_pkg &&
-				cpu_top->core_info[cpu].pkg != -1) {
-
-			last_pkg = cpu_top->core_info[cpu].pkg;
-			cpu_top->pkgs++;
-		}
-	}
-	if (!(cpu_top->core_info[0].pkg == -1))
-		cpu_top->pkgs++;
-
-	/* Intel's cores count is not consecutively numbered, there may
-	 * be a core_id of 3, but none of 2. Assume there always is 0
-	 * Get amount of cores by counting duplicates in a package
-	for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
-		if (cpu_top->core_info[cpu].core == 0)
-	cpu_top->cores++;
-	*/
-	return cpus;
-}
-
-void cpu_topology_release(struct cpupower_topology cpu_top)
-{
-	free(cpu_top.core_info);
-}

diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index bcd22a1..1b5da00 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c

@@ -10,8 +10,8 @@
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
+#include <cpuidle.h>
 
-#include "helpers/sysfs.h"
 #include "helpers/helpers.h"
 #include "idle_monitor/cpupower-monitor.h"
 
@@ -51,7 +51,7 @@
 		for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
 		     state++) {
 			previous_count[cpu][state] =
-				sysfs_get_idlestate_time(cpu, state);
+				cpuidle_state_time(cpu, state);
 			dprint("CPU %d - State: %d - Val: %llu\n",
 			       cpu, state, previous_count[cpu][state]);
 		}
@@ -70,7 +70,7 @@
 		for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
 		     state++) {
 			current_count[cpu][state] =
-				sysfs_get_idlestate_time(cpu, state);
+				cpuidle_state_time(cpu, state);
 			dprint("CPU %d - State: %d - Val: %llu\n",
 			       cpu, state, previous_count[cpu][state]);
 		}
@@ -132,13 +132,13 @@
 	char *tmp;
 
 	/* Assume idle state count is the same for all CPUs */
-	cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
+	cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0);
 
 	if (cpuidle_sysfs_monitor.hw_states_num <= 0)
 		return NULL;
 
 	for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
-		tmp = sysfs_get_idlestate_name(0, num);
+		tmp = cpuidle_state_name(0, num);
 		if (tmp == NULL)
 			continue;
 
@@ -146,7 +146,7 @@
 		strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
 		free(tmp);
 
-		tmp = sysfs_get_idlestate_desc(0, num);
+		tmp = cpuidle_state_desc(0, num);
 		if (tmp == NULL)
 			continue;
 		strncpy(cpuidle_cstates[num].desc, tmp,	CSTATE_DESC_LEN - 1);

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b04afc3..ff9e5f2 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile

@@ -19,6 +19,7 @@
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += seccomp
+TARGETS += sigaltstack
 TARGETS += size
 TARGETS += static_keys
 TARGETS += sysctl

diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
new file mode 100755
index 0000000..3633828
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh

@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Alternate sleeping and spinning on randomly selected CPUs.  The purpose
+# of this script is to inflict random OS jitter on a concurrently running
+# test.
+#
+# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+#
+# me: Random-number-generator seed salt.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+me=$(($1 * 1000))
+duration=$2
+sleepmax=${3-1000000}
+spinmax=${4-1000}
+
+n=1
+
+starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+
+while :
+do
+	# Check for done.
+	t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+	if test "$t" -gt "$duration"
+	then
+		exit 0;
+	fi
+
+	# Set affinity to randomly selected CPU
+	cpus=`ls /sys/devices/system/cpu/*/online |
+		sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
+		grep -v '^0*$'`
+	cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
+		srand(n + me + systime());
+		ncpus = split(cpus, ca);
+		curcpu = ca[int(rand() * ncpus + 1)];
+		mask = lshift(1, curcpu);
+		if (mask + 0 <= 0)
+			mask = 1;
+		printf("%#x\n", mask);
+	}' < /dev/null`
+	n=$(($n+1))
+	if ! taskset -p $cpumask $$ > /dev/null 2>&1
+	then
+		echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+		exit 1
+	fi
+
+	# Sleep a random duration
+	sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
+		srand(n + me + systime());
+		printf("%06d", int(rand() * sleepmax));
+	}' < /dev/null`
+	n=$(($n+1))
+	sleep .$sleeptime
+
+	# Spin a random duration
+	limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
+		srand(n + me + systime());
+		printf("%06d", int(rand() * spinmax));
+	}' < /dev/null`
+	n=$(($n+1))
+	for i in {1..$limit}
+	do
+		echo > /dev/null
+	done
+done
+
+exit 1

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
new file mode 100755
index 0000000..f79b0e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh

@@ -0,0 +1,121 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements,
+# looking for ftrace data.  Exits with 0 if data was found, analyzed, and
+# printed.  Intended to be invoked from kvm-recheck-rcuperf.sh after
+# argument checking.
+#
+# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+. tools/testing/selftests/rcutorture/bin/functions.sh
+
+if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
+then
+	exit 10
+fi
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+grep 'us : rcu_exp_grace_period' |
+sed -e 's/us : / : /' |
+tr -d '\015' |
+awk '
+$8 == "start" {
+	if (starttask != "")
+		nlost++;
+	starttask = $1;
+	starttime = $3;
+	startseq = $7;
+}
+
+$8 == "end" {
+	if (starttask == $1 && startseq == $7) {
+		curgpdur = $3 - starttime;
+		gptimes[++n] = curgpdur;
+		gptaskcnt[starttask]++;
+		sum += curgpdur;
+		if (curgpdur > 1000)
+			print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
+		starttask = "";
+	} else {
+		# Lost a message or some such, reset.
+		starttask = "";
+		nlost++;
+	}
+}
+
+$8 == "done" {
+	piggybackcnt[$1]++;
+}
+
+END {
+	newNR = asort(gptimes);
+	if (newNR <= 0) {
+		print "No ftrace records found???"
+		exit 10;
+	}
+	pct50 = int(newNR * 50 / 100);
+	if (pct50 < 1)
+		pct50 = 1;
+	pct90 = int(newNR * 90 / 100);
+	if (pct90 < 1)
+		pct90 = 1;
+	pct99 = int(newNR * 99 / 100);
+	if (pct99 < 1)
+		pct99 = 1;
+	div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+	print "Histogram bucket size: " div;
+	last = gptimes[1] - 10;
+	count = 0;
+	for (i = 1; i <= newNR; i++) {
+		current = div * int(gptimes[i] / div);
+		if (last == current) {
+			count++;
+		} else {
+			if (count > 0)
+				print last, count;
+			count = 1;
+			last = current;
+		}
+	}
+	if (count > 0)
+		print last, count;
+	print "Distribution of grace periods across tasks:";
+	for (i in gptaskcnt) {
+		print "\t" i, gptaskcnt[i];
+		nbatches += gptaskcnt[i];
+	}
+	ngps = nbatches;
+	print "Distribution of piggybacking across tasks:";
+	for (i in piggybackcnt) {
+		print "\t" i, piggybackcnt[i];
+		ngps += piggybackcnt[i];
+	}
+	print "Average grace-period duration: " sum / newNR " microseconds";
+	print "Minimum grace-period duration: " gptimes[1];
+	print "50th percentile grace-period duration: " gptimes[pct50];
+	print "90th percentile grace-period duration: " gptimes[pct90];
+	print "99th percentile grace-period duration: " gptimes[pct99];
+	print "Maximum grace-period duration: " gptimes[newNR];
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
+	print "Computed from ftrace data.";
+}'
+exit 0

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
new file mode 100755
index 0000000..8f3121a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh

@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements.
+#
+# Usage: kvm-recheck-rcuperf.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d $i
+then
+	:
+else
+	echo Unreadable results directory: $i
+	exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. tools/testing/selftests/rcutorture/bin/functions.sh
+
+if kvm-recheck-rcuperf-ftrace.sh $i
+then
+	# ftrace data was successfully analyzed, call it good!
+	exit 0
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+awk '
+/-perf: .* gps: .* batches:/ {
+	ngps = $9;
+	nbatches = $11;
+}
+
+/-perf: .*writer-duration/ {
+	gptimes[++n] = $5 / 1000.;
+	sum += $5 / 1000.;
+}
+
+END {
+	newNR = asort(gptimes);
+	if (newNR <= 0) {
+		print "No rcuperf records found???"
+		exit;
+	}
+	pct50 = int(newNR * 50 / 100);
+	if (pct50 < 1)
+		pct50 = 1;
+	pct90 = int(newNR * 90 / 100);
+	if (pct90 < 1)
+		pct90 = 1;
+	pct99 = int(newNR * 99 / 100);
+	if (pct99 < 1)
+		pct99 = 1;
+	div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+	print "Histogram bucket size: " div;
+	last = gptimes[1] - 10;
+	count = 0;
+	for (i = 1; i <= newNR; i++) {
+		current = div * int(gptimes[i] / div);
+		if (last == current) {
+			count++;
+		} else {
+			if (count > 0)
+				print last, count;
+			count = 1;
+			last = current;
+		}
+	}
+	if (count > 0)
+		print last, count;
+	print "Average grace-period duration: " sum / newNR " microseconds";
+	print "Minimum grace-period duration: " gptimes[1];
+	print "50th percentile grace-period duration: " gptimes[pct50];
+	print "90th percentile grace-period duration: " gptimes[pct90];
+	print "99th percentile grace-period duration: " gptimes[pct99];
+	print "Maximum grace-period duration: " gptimes[newNR];
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+	print "Computed from rcuperf printk output.";
+}'

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index d86bdd6..f659346 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh

@@ -48,7 +48,10 @@
 				cat $i/Make.oldconfig.err
 			fi
 			parse-build.sh $i/Make.out $configfile
-			parse-torture.sh $i/console.log $configfile
+			if test "$TORTURE_SUITE" != rcuperf
+			then
+				parse-torture.sh $i/console.log $configfile
+			fi
 			parse-console.sh $i/console.log $configfile
 			if test -r $i/Warnings
 			then

diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0f80eef..4109f30 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh

@@ -6,7 +6,7 @@
 # Execute this in the source tree.  Do not run it as a background task
 # because qemu does not seem to like that much.
 #
-# Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args
+# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
 #
 # qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with
 #			arguments specifying the number of CPUs and other
@@ -91,25 +91,33 @@
 # CONFIG_PCMCIA=n
 # CONFIG_CARDBUS=n
 # CONFIG_YENTA=n
-if kvm-build.sh $config_template $builddir $T
+base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
 then
+	# Rerunning previous test, so use that test's kernel.
+	QEMU="`identify_qemu $base_resdir/vmlinux`"
+	KERNEL=$base_resdir/bzImage
+	ln -s $base_resdir/Make*.out $resdir  # for kvm-recheck.sh
+	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
+elif kvm-build.sh $config_template $builddir $T
+then
+	# Had to build a kernel for this test.
 	QEMU="`identify_qemu $builddir/vmlinux`"
 	BOOT_IMAGE="`identify_boot_image $QEMU`"
 	cp $builddir/Make*.out $resdir
+	cp $builddir/vmlinux $resdir
 	cp $builddir/.config $resdir
 	if test -n "$BOOT_IMAGE"
 	then
 		cp $builddir/$BOOT_IMAGE $resdir
+		KERNEL=$resdir/bzImage
 	else
 		echo No identifiable boot image, not running KVM, see $resdir.
 		echo Do the torture scripts know about your architecture?
 	fi
 	parse-build.sh $resdir/Make.out $title
-	if test -f $builddir.wait
-	then
-		mv $builddir.wait $builddir.ready
-	fi
 else
+	# Build failed.
 	cp $builddir/Make*.out $resdir
 	cp $builddir/.config $resdir || :
 	echo Build failed, not running KVM, see $resdir.
@@ -119,12 +127,15 @@
 	fi
 	exit 1
 fi
+if test -f $builddir.wait
+then
+	mv $builddir.wait $builddir.ready
+fi
 while test -f $builddir.ready
 do
 	sleep 1
 done
-minutes=$4
-seconds=$(($minutes * 60))
+seconds=$4
 qemu_args=$5
 boot_args=$6
 
@@ -167,15 +178,26 @@
 	exit 0
 fi
 echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
-qemu_pid=$!
+echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
 commandcompleted=0
-echo Monitoring qemu job at pid $qemu_pid
+sleep 10 # Give qemu's pid a chance to reach the file
+if test -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+	echo Monitoring qemu job at pid $qemu_pid
+else
+	qemu_pid=""
+	echo Monitoring qemu job at yet-as-unknown pid
+fi
 while :
 do
+	if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+	then
+		qemu_pid=`cat "$resdir/qemu_pid"`
+	fi
 	kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
-	if kill -0 $qemu_pid > /dev/null 2>&1
+	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
 	then
 		if test $kruntime -ge $seconds
 		then
@@ -195,12 +217,16 @@
 				ps -fp $killpid >> $resdir/Warnings 2>&1
 			fi
 		else
-			echo ' ---' `date`: Kernel done
+			echo ' ---' `date`: "Kernel done"
 		fi
 		break
 	fi
 done
-if test $commandcompleted -eq 0
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
 then
 	echo Grace period for qemu job at pid $qemu_pid
 	while :
@@ -220,6 +246,9 @@
 		fi
 		sleep 1
 	done
+elif test -z "$qemu_pid"
+then
+	echo Unknown PID, cannot kill qemu command
 fi
 
 parse-torture.sh $resdir/console.log $title

diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 4a43176..0d59814 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh

@@ -34,7 +34,7 @@
 trap 'rm -rf $T' 0
 mkdir $T
 
-dur=30
+dur=$((30*60))
 dryrun=""
 KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
 PATH=${KVM}/bin:$PATH; export PATH
@@ -48,6 +48,7 @@
 configs=""
 cpus=0
 ds=`date +%Y.%m.%d-%H:%M:%S`
+jitter=0
 
 . functions.sh
 
@@ -63,6 +64,7 @@
 	echo "       --dryrun sched|script"
 	echo "       --duration minutes"
 	echo "       --interactive"
+	echo "       --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
 	echo "       --kmake-arg kernel-make-arguments"
 	echo "       --mac nn:nn:nn:nn:nn:nn"
 	echo "       --no-initrd"
@@ -116,12 +118,17 @@
 		;;
 	--duration)
 		checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
-		dur=$2
+		dur=$(($2*60))
 		shift
 		;;
 	--interactive)
 		TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
 		;;
+	--jitter)
+		checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
+		jitter="$2"
+		shift
+		;;
 	--kmake-arg)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
 		TORTURE_KMAKE_ARG="$2"
@@ -156,7 +163,7 @@
 		shift
 		;;
 	--torture)
-		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
+		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
 		TORTURE_SUITE=$2
 		shift
 		;;
@@ -299,6 +306,7 @@
 	-v CONFIGDIR="$CONFIGFRAG/" \
 	-v KVM="$KVM" \
 	-v ncpus=$cpus \
+	-v jitter="$jitter" \
 	-v rd=$resdir/$ds/ \
 	-v dur=$dur \
 	-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
@@ -359,6 +367,16 @@
 		print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log";
 		print "fi"
 	}
+	njitter = 0;
+	split(jitter, ja);
+	if (ja[1] == -1 && ncpus == 0)
+		njitter = 1;
+	else if (ja[1] == -1)
+		njitter = ncpus;
+	else
+		njitter = ja[1];
+	for (j = 0; j < njitter; j++)
+		print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&"
 	print "wait"
 	print "if test -z \"$TORTURE_BUILDONLY\""
 	print "then"

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 39a2c6d..17cbe09 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04

@@ -14,7 +14,7 @@
 CONFIG_SUSPEND=n
 CONFIG_HIBERNATION=n
 CONFIG_RCU_FANOUT=4
-CONFIG_RCU_FANOUT_LEAF=4
+CONFIG_RCU_FANOUT_LEAF=3
 CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index 0fc8a34..e34c334 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot

@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
new file mode 100644
index 0000000..c9f56cf
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST

@@ -0,0 +1 @@
+TREE

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
new file mode 100644
index 0000000..a09816b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon

@@ -0,0 +1,2 @@
+CONFIG_RCU_PERF_TEST=y
+CONFIG_PRINTK_TIME=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
new file mode 100644
index 0000000..a312f67
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE

@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
new file mode 100644
index 0000000..985fb17
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54

@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=54
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y

diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
new file mode 100644
index 0000000..34f2a1b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh

@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# rcuperf_param_nreaders bootparam-string
+#
+# Adds nreaders rcuperf module parameter if not already specified.
+rcuperf_param_nreaders () {
+	if ! echo "$1" | grep -q "rcuperf.nreaders"
+	then
+		echo rcuperf.nreaders=-1
+	fi
+}
+
+# rcuperf_param_nwriters bootparam-string
+#
+# Adds nwriters rcuperf module parameter if not already specified.
+rcuperf_param_nwriters () {
+	if ! echo "$1" | grep -q "rcuperf.nwriters"
+	then
+		echo rcuperf.nwriters=-1
+	fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+	echo $1 `rcuperf_param_nreaders "$1"` \
+		`rcuperf_param_nwriters "$1"` \
+		rcuperf.perf_runnable=1 \
+		rcuperf.shutdown=1 \
+		rcuperf.verbose=1
+}

diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
new file mode 100644
index 0000000..56af56e
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/Makefile

@@ -0,0 +1,8 @@
+CFLAGS = -Wall
+BINARIES = sas
+all: $(BINARIES)
+
+include ../lib.mk
+
+clean:
+	rm -rf $(BINARIES)

diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
new file mode 100644
index 0000000..1bb0125
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/sas.c

@@ -0,0 +1,176 @@
+/*
+ * Stas Sergeev <stsp@users.sourceforge.net>
+ *
+ * test sigaltstack(SS_ONSTACK | SS_AUTODISARM)
+ * If that succeeds, then swapcontext() can be used inside sighandler safely.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <alloca.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#ifndef SS_AUTODISARM
+#define SS_AUTODISARM  (1U << 31)
+#endif
+
+static void *sstack, *ustack;
+static ucontext_t uc, sc;
+static const char *msg = "[OK]\tStack preserved";
+static const char *msg2 = "[FAIL]\tStack corrupted";
+struct stk_data {
+	char msg[128];
+	int flag;
+};
+
+void my_usr1(int sig, siginfo_t *si, void *u)
+{
+	char *aa;
+	int err;
+	stack_t stk;
+	struct stk_data *p;
+
+	register unsigned long sp asm("sp");
+
+	if (sp < (unsigned long)sstack ||
+			sp >= (unsigned long)sstack + SIGSTKSZ) {
+		printf("[FAIL]\tSP is not on sigaltstack\n");
+		exit(EXIT_FAILURE);
+	}
+	/* put some data on stack. other sighandler will try to overwrite it */
+	aa = alloca(1024);
+	assert(aa);
+	p = (struct stk_data *)(aa + 512);
+	strcpy(p->msg, msg);
+	p->flag = 1;
+	printf("[RUN]\tsignal USR1\n");
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		perror("[FAIL]\tsigaltstack()");
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags != SS_DISABLE)
+		printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n",
+				stk.ss_flags);
+	else
+		printf("[OK]\tsigaltstack is disabled in sighandler\n");
+	swapcontext(&sc, &uc);
+	printf("%s\n", p->msg);
+	if (!p->flag) {
+		printf("[RUN]\tAborting\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
+void my_usr2(int sig, siginfo_t *si, void *u)
+{
+	char *aa;
+	struct stk_data *p;
+
+	printf("[RUN]\tsignal USR2\n");
+	aa = alloca(1024);
+	/* dont run valgrind on this */
+	/* try to find the data stored by previous sighandler */
+	p = memmem(aa, 1024, msg, strlen(msg));
+	if (p) {
+		printf("[FAIL]\tsigaltstack re-used\n");
+		/* corrupt the data */
+		strcpy(p->msg, msg2);
+		/* tell other sighandler that his data is corrupted */
+		p->flag = 0;
+	}
+}
+
+static void switch_fn(void)
+{
+	printf("[RUN]\tswitched to user ctx\n");
+	raise(SIGUSR2);
+	setcontext(&sc);
+}
+
+int main(void)
+{
+	struct sigaction act;
+	stack_t stk;
+	int err;
+
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+	act.sa_sigaction = my_usr1;
+	sigaction(SIGUSR1, &act, NULL);
+	act.sa_sigaction = my_usr2;
+	sigaction(SIGUSR2, &act, NULL);
+	sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+	if (sstack == MAP_FAILED) {
+		perror("mmap()");
+		return EXIT_FAILURE;
+	}
+
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		perror("[FAIL]\tsigaltstack()");
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags == SS_DISABLE) {
+		printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
+	} else {
+		printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags);
+		return EXIT_FAILURE;
+	}
+
+	stk.ss_sp = sstack;
+	stk.ss_size = SIGSTKSZ;
+	stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
+	err = sigaltstack(&stk, NULL);
+	if (err) {
+		if (errno == EINVAL) {
+			printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+			/*
+			 * If test cases for the !SS_AUTODISARM variant were
+			 * added, we could still run them.  We don't have any
+			 * test cases like that yet, so just exit and report
+			 * success.
+			 */
+			return 0;
+		} else {
+			perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)");
+			return EXIT_FAILURE;
+		}
+	}
+
+	ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+	if (ustack == MAP_FAILED) {
+		perror("mmap()");
+		return EXIT_FAILURE;
+	}
+	getcontext(&uc);
+	uc.uc_link = NULL;
+	uc.uc_stack.ss_sp = ustack;
+	uc.uc_stack.ss_size = SIGSTKSZ;
+	makecontext(&uc, switch_fn, 0);
+	raise(SIGUSR1);
+
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		perror("[FAIL]\tsigaltstack()");
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags != SS_AUTODISARM) {
+		printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n",
+				stk.ss_flags);
+		exit(EXIT_FAILURE);
+	}
+	printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n");
+
+	printf("[OK]\tTest passed\n");
+	return 0;
+}

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b47ebd1..c73425de 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile

@@ -9,6 +9,7 @@
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644
index 0000000..5b2b4b3
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase.c

@@ -0,0 +1,398 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (!want_segv) {
+		clearhandler(SIGSEGV);
+		return;  /* Crash cleanly. */
+	}
+
+	want_segv = false;
+	segv_addr = (unsigned long)si->si_addr;
+
+	ctx->uc_mcontext.gregs[REG_RIP] += 4;	/* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+	unsigned long offset;
+	/*
+	 * Unless we have FSGSBASE, there's no direct way to do this from
+	 * user mode.  We can get at it indirectly using signals, though.
+	 */
+
+	want_segv = true;
+
+	offset = 0;
+	if (which == FS) {
+		/* Use a constant-length instruction here. */
+		asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+	} else {
+		asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+	}
+	if (!want_segv)
+		return segv_addr + offset;
+
+	/*
+	 * If that didn't segfault, try the other end of the address space.
+	 * Unless we get really unlucky and run into the vsyscall page, this
+	 * is guaranteed to segfault.
+	 */
+
+	offset = (ULONG_MAX >> 1) + 1;
+	if (which == FS) {
+		asm volatile ("mov %%fs:(%%rcx), %%rax"
+			      : : "c" (offset) : "rax");
+	} else {
+		asm volatile ("mov %%gs:(%%rcx), %%rax"
+			      : : "c" (offset) : "rax");
+	}
+	if (!want_segv)
+		return segv_addr + offset;
+
+	abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+	unsigned long base;
+	unsigned short sel;
+
+	printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+		err(1, "ARCH_SET_GS");
+
+	asm volatile ("mov %%gs, %0" : "=rm" (sel));
+	base = read_base(GS);
+	if (base == value) {
+		printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+		       sel);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+		       base, sel);
+	}
+
+	if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+		err(1, "ARCH_GET_GS");
+	if (base == value) {
+		printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+		       sel);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+		       base, sel);
+	}
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+	unsigned long base, arch_base;
+
+	printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+		err(1, "ARCH_SET_GS");
+
+	if (schedule)
+		usleep(10);
+
+	asm volatile ("mov %0, %%gs" : : "rm" (0));
+	base = read_base(GS);
+	if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+		err(1, "ARCH_GET_GS");
+	if (base == arch_base) {
+		printf("[OK]\tGSBASE is 0x%lx\n", base);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+	}
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero.  HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+	unsigned long to_set = remote_base;
+	bool hard_zero = false;
+	if (to_set == HARD_ZERO) {
+		to_set = 0;
+		hard_zero = true;
+	}
+
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+		err(1, "ARCH_SET_GS");
+
+	if (hard_zero)
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+	unsigned short sel;
+	asm volatile ("mov %%gs, %0" : "=rm" (sel));
+	printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+	       to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+	/*
+	 * The goal here is to try to arrange for GS == 0, GSBASE !=
+	 * 0, and for the the kernel the think that GSBASE == 0.
+	 *
+	 * To make the test as reliable as possible, this uses
+	 * explicit descriptorss.  (This is not the only way.  This
+	 * could use ARCH_SET_GS with a low, nonzero base, but the
+	 * relevant side effect of ARCH_SET_GS could change.)
+	 */
+
+	/* Step 1: tell the kernel that we have GSBASE == 0. */
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+		err(1, "ARCH_SET_GS");
+
+	/* Step 2: change GSBASE without telling the kernel. */
+	struct user_desc desc = {
+		.entry_number    = 0,
+		.base_addr       = 0xBAADF00D,
+		.limit           = 0xfffff,
+		.seg_32bit       = 1,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 1,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+		printf("\tother thread: using LDT slot 0\n");
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+	} else {
+		/* No modify_ldt for us (configured out, perhaps) */
+
+		struct user_desc *low_desc = mmap(
+			NULL, sizeof(desc),
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+		memcpy(low_desc, &desc, sizeof(desc));
+
+		low_desc->entry_number = -1;
+
+		/* 32-bit set_thread_area */
+		long ret;
+		asm volatile ("int $0x80"
+			      : "=a" (ret) : "a" (243), "b" (low_desc)
+			      : "flags");
+		memcpy(&desc, low_desc, sizeof(desc));
+		munmap(low_desc, sizeof(desc));
+
+		if (ret != 0) {
+			printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+			return;
+		}
+		printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+	}
+
+	/*
+	 * Step 3: set the selector back to zero.  On AMD chips, this will
+	 * preserve GSBASE.
+	 */
+
+	asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+	while (1) {
+		while (ftx == 0)
+			syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+		if (ftx == 3)
+			return NULL;
+
+		if (ftx == 1)
+			do_remote_base();
+		else if (ftx == 2)
+			do_unexpected_base();
+		else
+			errx(1, "helper thread got bad command");
+
+		ftx = 0;
+		syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	}
+}
+
+static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
+{
+	unsigned long base;
+
+	bool hard_zero = false;
+	if (local == HARD_ZERO) {
+		hard_zero = true;
+		local = 0;
+	}
+
+	printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+	       local, hard_zero ? " and clear gs" : "", remote);
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+		err(1, "ARCH_SET_GS");
+	if (hard_zero)
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+	if (read_base(GS) != local) {
+		nerrs++;
+		printf("[FAIL]\tGSBASE wasn't set as expected\n");
+	}
+
+	remote_base = remote;
+	ftx = 1;
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	while (ftx != 0)
+		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+	base = read_base(GS);
+	if (base == local) {
+		printf("[OK]\tGSBASE remained 0x%lx\n", local);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+	}
+}
+
+static void test_unexpected_base(void)
+{
+	unsigned long base;
+
+	printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+		err(1, "ARCH_SET_GS");
+	asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+	ftx = 2;
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	while (ftx != 0)
+		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+	base = read_base(GS);
+	if (base == 0) {
+		printf("[OK]\tGSBASE remained 0\n");
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+	}
+}
+
+int main()
+{
+	pthread_t thread;
+
+	sethandler(SIGSEGV, sigsegv, 0);
+
+	check_gs_value(0);
+	check_gs_value(1);
+	check_gs_value(0x200000000);
+	check_gs_value(0);
+	check_gs_value(0x200000000);
+	check_gs_value(1);
+
+	for (int sched = 0; sched < 2; sched++) {
+		mov_0_gs(0, !!sched);
+		mov_0_gs(1, !!sched);
+		mov_0_gs(0x200000000, !!sched);
+	}
+
+	/* Set up for multithreading. */
+
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");	/* should never fail */
+
+	if (pthread_create(&thread, 0, threadproc, 0) != 0)
+		err(1, "pthread_create");
+
+	static unsigned long bases_with_hard_zero[] = {
+		0, HARD_ZERO, 1, 0x200000000,
+	};
+
+	for (int local = 0; local < 4; local++) {
+		for (int remote = 0; remote < 4; remote++) {
+			set_gs_and_switch_to(bases_with_hard_zero[local],
+					     bases_with_hard_zero[remote]);
+		}
+	}
+
+	test_unexpected_base();
+
+	ftx = 3;  /* Kill the thread. */
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+	if (pthread_join(thread, NULL) != 0)
+		err(1, "pthread_join");
+
+	return nerrs == 0 ? 0 : 1;
+}

diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 31a3035..4af4707 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c

@@ -21,6 +21,9 @@
 #include <pthread.h>
 #include <sched.h>
 #include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
 
 #define AR_ACCESSED		(1<<8)
 
@@ -44,6 +47,12 @@
 
 static int nerrs;
 
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear;  /* Use to delete GDT entry */
+static int gdt_entry_num;
+
 static void check_invalid_segment(uint16_t index, int ldt)
 {
 	uint32_t has_limit = 0, has_ar = 0, limit, ar;
@@ -561,16 +570,257 @@
 	}
 }
 
+static void setup_counter_page(void)
+{
+	unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+			 MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+	if (page == MAP_FAILED)
+		err(1, "mmap");
+
+	for (int i = 0; i < 1024; i++)
+		page[i] = i;
+	counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+	int ret;
+	asm volatile ("int $0x80"
+		      : "=a" (ret), "+m" (low_user_desc) :
+			"a" (243), "b" (low_user_desc)
+		      : "flags");
+	return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+	low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+			     PROT_READ | PROT_WRITE,
+			     MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+	if (low_user_desc == MAP_FAILED)
+		err(1, "mmap");
+
+	low_user_desc->entry_number	= -1;
+	low_user_desc->base_addr	= (unsigned long)&counter_page[1];
+	low_user_desc->limit		= 0xfffff;
+	low_user_desc->seg_32bit	= 1;
+	low_user_desc->contents		= 0; /* Data, grow-up*/
+	low_user_desc->read_exec_only	= 0;
+	low_user_desc->limit_in_pages	= 1;
+	low_user_desc->seg_not_present	= 0;
+	low_user_desc->useable		= 0;
+
+	if (invoke_set_thread_area() == 0) {
+		gdt_entry_num = low_user_desc->entry_number;
+		printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+	} else {
+		printf("[NOTE]\tset_thread_area is unavailable\n");
+	}
+
+	low_user_desc_clear = low_user_desc + 1;
+	low_user_desc_clear->entry_number = gdt_entry_num;
+	low_user_desc_clear->read_exec_only = 1;
+	low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+	if (!gdt_entry_num)
+		return;	/* 64-bit only system -- we can't use set_thread_area */
+
+	unsigned short prev_sel;
+	unsigned short sel;
+	unsigned int eax;
+	const char *result;
+#ifdef __x86_64__
+	unsigned long saved_base;
+	unsigned long new_base;
+#endif
+
+	/* Test DS */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+	asm volatile ("movw %%ds, %[prev_sel]\n\t"
+		      "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate ds */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%ds, %[sel]\n\t"
+		      "movw %[prev_sel], %%ds"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : "flags");
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+	       result, sel);
+
+	/* Test ES */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+	asm volatile ("movw %%es, %[prev_sel]\n\t"
+		      "movw %[sel], %%es\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate es */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%es, %[sel]\n\t"
+		      "movw %[prev_sel], %%es"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : "flags");
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+	       result, sel);
+
+	/* Test FS */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+	asm volatile ("movw %%fs, %[prev_sel]\n\t"
+		      "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate fs */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%fs, %[sel]\n\t"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : "flags");
+
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+	/* Restore FS/BASE for glibc */
+	asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+	if (saved_base)
+		syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+	       result, sel);
+
+#ifdef __x86_64__
+	if (sel == 0 && new_base != 0) {
+		nerrs++;
+		printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+	} else {
+		printf("[OK]\tNew FSBASE was zero\n");
+	}
+#endif
+
+	/* Test GS */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+	asm volatile ("movw %%gs, %[prev_sel]\n\t"
+		      "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate gs */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%gs, %[sel]\n\t"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : "flags");
+
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+	/* Restore GS/BASE for glibc */
+	asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+	if (saved_base)
+		syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+	       result, sel);
+
+#ifdef __x86_64__
+	if (sel == 0 && new_base != 0) {
+		nerrs++;
+		printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+	} else {
+		printf("[OK]\tNew GSBASE was zero\n");
+	}
+#endif
+}
+
 int main(int argc, char **argv)
 {
 	if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
 		return finish_exec_test();
 
+	setup_counter_page();
+	setup_low_user_desc();
+
 	do_simple_tests();
 
 	do_multicpu_tests();
 
 	do_exec_test();
 
+	test_gdt_invalidation();
+
 	return nerrs ? 1 : 0;
 }
commit	0cc4b48149ff6948dd82a039ad55cdbec49107f7	[log] [tgz]
author	Rafael J. Wysocki <rafael.j.wysocki@intel.com>	Wed May 25 22:11:28 2016 +0200
committer	Rafael J. Wysocki <rafael.j.wysocki@intel.com>	Wed May 25 22:11:28 2016 +0200
tree	0d0e4a091f8bc43c2c2e9d51d1d5f4b1164b9972
parent	46c13450624e36302547a2ac3695f2350fe7ffc3 [diff]
parent	5dfa0c73953360e07a47731e412d33dfc896bf4e [diff]