Merge branch 'acpi-battery'
* acpi-battery:
ACPI / battery: Correctly serialise with the pending async probe
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
new file mode 100644
index 0000000..727e270
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
@@ -0,0 +1,474 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:28:20 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="9.1in"
+ height="8.9in"
+ viewBox="-66 -66 10932 10707"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreeClassicRCU.fig">
+ <metadata
+ id="metadata106">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs104">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3864"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="973"
+ inkscape:window-height="1137"
+ id="namedview102"
+ showgrid="false"
+ inkscape:zoom="0.9743589"
+ inkscape:cx="409.50003"
+ inkscape:cy="400.49997"
+ inkscape:window-x="915"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="10800"
+ height="5625"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1125"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="3825"
+ y="900"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="6525"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line -->
+ <polyline
+ points="3375,6525 3375,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline14" />
+ <!-- Arrowhead on XXXpoint 3375 6525 - 3375 4860-->
+ <!-- Circle -->
+ <circle
+ cx="7425"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle18" />
+ <!-- Circle -->
+ <circle
+ cx="7875"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle20" />
+ <!-- Circle -->
+ <circle
+ cx="8325"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle22" />
+ <!-- Circle -->
+ <circle
+ cx="2025"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle24" />
+ <!-- Circle -->
+ <circle
+ cx="2475"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="2925"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="4725"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle30" />
+ <!-- Circle -->
+ <circle
+ cx="5175"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle32" />
+ <!-- Circle -->
+ <circle
+ cx="5625"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle34" />
+ <!-- Line: box -->
+ <rect
+ x="2025"
+ y="6525"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect36" />
+ <!-- Line -->
+ <polyline
+ points="2475,3600 3975,2310 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 2475 3600 - 4116 2190-->
+ <!-- Line -->
+ <polyline
+ points="7875,3600 6372,2310 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline42" />
+ <!-- Arrowhead on XXXpoint 7875 3600 - 6231 2190-->
+ <!-- Line -->
+ <polyline
+ points="6975,8775 6975,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 6975 8775 - 6975 4860-->
+ <!-- Line -->
+ <polyline
+ points="1575,8775 1575,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 1575 8775 - 1575 4860-->
+ <!-- Line -->
+ <polyline
+ points="8775,6525 8775,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 8775 6525 - 8775 4860-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1575"
+ y="9225"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text58">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1575"
+ y="9675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text60">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1575"
+ y="10350"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text62">CPU 0</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3375"
+ y="6975"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text64">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3375"
+ y="7425"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text66">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3375"
+ y="8100"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text68">CPU 15</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6975"
+ y="9225"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text70">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6975"
+ y="9675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text72">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6975"
+ y="10350"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text74">CPU 1007</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="6930"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text76">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="7380"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text78">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="8055"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text80">CPU 1023</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="start"
+ id="text82">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2475"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text84">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2475"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text86">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7875"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text88">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7875"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text90">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5175"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text92">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5175"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text94">rcu_node</text>
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="8775"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect96" />
+ <!-- Line: box -->
+ <rect
+ x="5625"
+ y="8775"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect98" />
+ <!-- Line: box -->
+ <rect
+ x="7380"
+ y="6480"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect100" />
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
new file mode 100644
index 0000000..9bbb194
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
@@ -0,0 +1,499 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:26:09 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="5.7in"
+ height="6.6in"
+ viewBox="-44 -44 6838 7888"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreeClassicRCUBH.fig">
+ <metadata
+ id="metadata110">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs108">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3868"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Mend"
+ style="overflow:visible;">
+ <path
+ id="path3886"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(0.6) rotate(180) translate(0,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="878"
+ inkscape:window-height="1148"
+ id="namedview106"
+ showgrid="false"
+ inkscape:zoom="1.3547758"
+ inkscape:cx="256.5"
+ inkscape:cy="297"
+ inkscape:window-x="45"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect14" />
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle16" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle18" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle20" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle22" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle24" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle30" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle32" />
+ <!-- Line -->
+ <polyline
+ points="1350,3450 2350,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+ <!-- Line -->
+ <polyline
+ points="4950,3450 3948,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect42" />
+ <!-- Line -->
+ <polyline
+ points="2250,5400 2250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect48" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect50" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect52" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect54" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect56" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="1650"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect58" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text60">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="1950"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text62">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text64">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text66">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text68">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text70">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text72">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text74">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text76">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text78">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text80">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text82">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text84">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text86">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text88">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text90">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text92">rcu_sched</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5400 5250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline94" />
+ <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+ <!-- Line -->
+ <polyline
+ points="4050,6600 4050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline98" />
+ <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+ <!-- Line -->
+ <polyline
+ points="1050,6600 1050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline102" />
+ <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
new file mode 100644
index 0000000..21ba782
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
@@ -0,0 +1,695 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:20:02 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="5.7in"
+ height="8.6in"
+ viewBox="-44 -44 6838 10288"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreeClassicRCUBHdyntick.fig">
+ <metadata
+ id="metadata166">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs164">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3924"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Lend"
+ style="overflow:visible;">
+ <path
+ id="path3936"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(1.1) rotate(180) translate(1,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="845"
+ inkscape:window-height="988"
+ id="namedview162"
+ showgrid="false"
+ inkscape:zoom="1.0452196"
+ inkscape:cx="256.5"
+ inkscape:cy="387.00003"
+ inkscape:window-x="356"
+ inkscape:window-y="61"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect10" />
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5688,5912 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline12" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+ <polyline
+ points="5714 6068 5704 5822 5598 6044 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline14" />
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4486,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline16" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+ <polyline
+ points="4514 7418 4506 7172 4396 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline18" />
+ <!-- Line -->
+ <polyline
+ points="1040,9300 1476,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+ <polyline
+ points="1504 7418 1496 7172 1386 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline22" />
+ <!-- Line -->
+ <polyline
+ points="2240,8100 2676,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline24" />
+ <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+ <polyline
+ points="2704 6218 2696 5972 2586 6194 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline26" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect30" />
+ <!-- Line -->
+ <polyline
+ points="1350,3450 2350,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+ <!-- Line -->
+ <polyline
+ points="4950,3450 3948,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+ <!-- Line -->
+ <polyline
+ points="4050,6600 4050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+ <!-- Line -->
+ <polyline
+ points="1050,6600 1050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,5400 2250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline48" />
+ <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,8100 2250,6364 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline52" />
+ <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+ <!-- Line -->
+ <polyline
+ points="1050,9300 1050,7564 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline56" />
+ <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4050,7564 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5250,6364 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline64" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle80" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle82" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle84" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect86" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect88" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect90" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect92" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect94" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="1650"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect96" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect98" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect100" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect102" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect104" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect106" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text108">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="1950"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text110">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text112">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text114">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text116">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text118">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text120">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text122">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text124">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text126">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text128">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text130">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text132">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text134">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text136">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text138">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text152">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text154">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text156">rcu_sched</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5400 5250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline158" />
+ <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
new file mode 100644
index 0000000..15adcac
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
@@ -0,0 +1,741 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:32:59 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="6.1in"
+ height="8.9in"
+ viewBox="-44 -44 7288 10738"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreePreemptRCUBHdyntick.fig">
+ <metadata
+ id="metadata182">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs180">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3940"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="874"
+ inkscape:window-height="1148"
+ id="namedview178"
+ showgrid="false"
+ inkscape:zoom="1.2097379"
+ inkscape:cx="274.5"
+ inkscape:cy="400.49997"
+ inkscape:window-x="946"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="900"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1200"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="5400"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect16" />
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5688,6362 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline18" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+ <polyline
+ points="5714 6518 5704 6272 5598 6494 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline20" />
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4486,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+ <polyline
+ points="4514 7868 4506 7622 4396 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline24" />
+ <!-- Line -->
+ <polyline
+ points="1040,9750 1476,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline26" />
+ <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+ <polyline
+ points="1504 7868 1496 7622 1386 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline28" />
+ <!-- Line -->
+ <polyline
+ points="2240,8550 2676,6512 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline30" />
+ <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+ <polyline
+ points="2704 6668 2696 6422 2586 6644 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline32" />
+ <!-- Line -->
+ <polyline
+ points="4050,9750 5682,6360 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 5736 6246-->
+ <polyline
+ points="5672 6518 5722 6276 5562 6466 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline36" />
+ <!-- Line -->
+ <polyline
+ points="1010,9750 2642,6360 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 1010 9750 - 2696 6246-->
+ <polyline
+ points="2632 6518 2682 6276 2522 6466 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline40" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="900"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect42" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1500"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect44" />
+ <!-- Line -->
+ <polyline
+ points="1350,3900 2350,3040 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+ <!-- Line -->
+ <polyline
+ points="4950,3900 3948,3040 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+ <!-- Line -->
+ <polyline
+ points="4050,7050 4050,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+ <!-- Line -->
+ <polyline
+ points="1050,7050 1050,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline58" />
+ <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,5850 2250,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline62" />
+ <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,8550 2250,6814 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline66" />
+ <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+ <!-- Line -->
+ <polyline
+ points="1050,9750 1050,8014 "
+ style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline70" />
+ <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4050,8014 "
+ style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline74" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5250,6814 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline78" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle82" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle84" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle86" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle88" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle90" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle92" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle94" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle96" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle98" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect100" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect102" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect104" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect106" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect108" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="2100"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect110" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect112" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect114" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect116" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect118" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect120" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text122">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text124">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text126">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text128">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text130">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text132">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text134">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text136">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text138">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text152">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text154">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text156">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text158">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text160">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text164">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text166">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text168">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6900"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text170">rcu_preempt</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="1200"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text172">rcu_sched</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5850 5250,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline174" />
+ <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
new file mode 100644
index 0000000..bbc3801
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
@@ -0,0 +1,858 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:29:48 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="7.4in"
+ height="9.9in"
+ viewBox="-44 -44 8938 11938"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreePreemptRCUBHdyntickCB.svg">
+ <metadata
+ id="metadata212">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs210">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3970"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="881"
+ inkscape:window-height="1128"
+ id="namedview208"
+ showgrid="false"
+ inkscape:zoom="1.0195195"
+ inkscape:cx="333"
+ inkscape:cy="445.49997"
+ inkscape:window-x="936"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="900"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1200"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="5400"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect16" />
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5688,6362 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline18" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+ <polyline
+ points="5714 6518 5704 6272 5598 6494 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline20" />
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4486,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+ <polyline
+ points="4514 7868 4506 7622 4396 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline24" />
+ <!-- Line -->
+ <polyline
+ points="1040,9750 1476,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline26" />
+ <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+ <polyline
+ points="1504 7868 1496 7622 1386 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline28" />
+ <!-- Line -->
+ <polyline
+ points="2240,8550 2676,6512 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline30" />
+ <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+ <polyline
+ points="2704 6668 2696 6422 2586 6644 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline32" />
+ <!-- Line -->
+ <polyline
+ points="4050,9600 5692,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 4050 9600 - 5744 5948-->
+ <polyline
+ points="5682 6220 5730 5978 5574 6170 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline36" />
+ <!-- Line -->
+ <polyline
+ points="1086,9600 2728,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 1086 9600 - 2780 5948-->
+ <polyline
+ points="2718 6220 2766 5978 2610 6170 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline40" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="900"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect42" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1500"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect44" />
+ <!-- Line -->
+ <polyline
+ points="1350,3900 2350,3040 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+ <!-- Line -->
+ <polyline
+ points="4950,3900 3948,3040 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+ <!-- Line -->
+ <polyline
+ points="4050,7050 4050,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+ <!-- Line -->
+ <polyline
+ points="1050,7050 1050,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline58" />
+ <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,5850 2250,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline62" />
+ <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,8550 2250,6814 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline66" />
+ <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+ <!-- Line -->
+ <polyline
+ points="1050,9750 1050,8014 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline70" />
+ <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4050,8014 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline74" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5250,6814 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline78" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+ <!-- Line -->
+ <polyline
+ points="6000,6300 8048,7910 "
+ style="stroke:#87cfff;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline82" />
+ <!-- Arrowhead on XXXpoint 6000 6300 - 8146 7986-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle86" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle88" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle90" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle92" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle94" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle96" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle98" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle100" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle102" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="7950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect104" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="9450"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect106" />
+ <!-- Line -->
+ <polyline
+ points="8100,8850 8100,9384 "
+ style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline108" />
+ <!-- Arrowhead on XXXpoint 8100 8850 - 8100 9510-->
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="10950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect112" />
+ <!-- Line -->
+ <polyline
+ points="8100,10350 8100,10884 "
+ style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline114" />
+ <!-- Arrowhead on XXXpoint 8100 10350 - 8100 11010-->
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect118" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect120" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect122" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect124" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect126" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="2100"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect128" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect130" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect132" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect134" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect136" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect138" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="8250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="8550"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_head</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="9750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_head</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="11250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="11550"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_head</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="1200"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text152">rcu_sched</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text154">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text156">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text158">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text160">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text162">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text164">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text166">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text168">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text170">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text172">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text174">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text176">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text178">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text180">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text182">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text184">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text186">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text188">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text190">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text192">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text194">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text196">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text198">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text200">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6900"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text202">rcu_preempt</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5850 5250,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline204" />
+ <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
new file mode 100644
index 0000000..7eb47ac
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -0,0 +1,1333 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html>
+ <head><title>A Tour Through TREE_RCU's Data Structures [LWN.net]</title>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+
+ <p>January 27, 2016</p>
+ <p>This article was contributed by Paul E. McKenney</p>
+
+<h3>Introduction</h3>
+
+This document describes RCU's major data structures and their relationship
+to each other.
+
+<ol>
+<li> <a href="#Data-Structure Relationships">
+ Data-Structure Relationships</a>
+<li> <a href="#The rcu_state Structure">
+ The <tt>rcu_state</tt> Structure</a>
+<li> <a href="#The rcu_node Structure">
+ The <tt>rcu_node</tt> Structure</a>
+<li> <a href="#The rcu_data Structure">
+ The <tt>rcu_data</tt> Structure</a>
+<li> <a href="#The rcu_dynticks Structure">
+ The <tt>rcu_dynticks</tt> Structure</a>
+<li> <a href="#The rcu_head Structure">
+ The <tt>rcu_head</tt> Structure</a>
+<li> <a href="#RCU-Specific Fields in the task_struct Structure">
+ RCU-Specific Fields in the <tt>task_struct</tt> Structure</a>
+<li> <a href="#Accessor Functions">
+ Accessor Functions</a>
+</ol>
+
+At the end we have the
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h3><a name="Data-Structure Relationships">Data-Structure Relationships</a></h3>
+
+<p>RCU is for all intents and purposes a large state machine, and its
+data structures maintain the state in such a way as to allow RCU readers
+to execute extremely quickly, while also processing the RCU grace periods
+requested by updaters in an efficient and extremely scalable fashion.
+The efficiency and scalability of RCU updaters is provided primarily
+by a combining tree, as shown below:
+
+</p><p><img src="BigTreeClassicRCU.svg" alt="BigTreeClassicRCU.svg" width="30%">
+
+</p><p>This diagram shows an enclosing <tt>rcu_state</tt> structure
+containing a tree of <tt>rcu_node</tt> structures.
+Each leaf node of the <tt>rcu_node</tt> tree has up to 16
+<tt>rcu_data</tt> structures associated with it, so that there
+are <tt>NR_CPUS</tt> number of <tt>rcu_data</tt> structures,
+one for each possible CPU.
+This structure is adjusted at boot time, if needed, to handle the
+common case where <tt>nr_cpu_ids</tt> is much less than
+<tt>NR_CPUs</tt>.
+For example, a number of Linux distributions set <tt>NR_CPUs=4096</tt>,
+which results in a three-level <tt>rcu_node</tt> tree.
+If the actual hardware has only 16 CPUs, RCU will adjust itself
+at boot time, resulting in an <tt>rcu_node</tt> tree with only a single node.
+
+</p><p>The purpose of this combining tree is to allow per-CPU events
+such as quiescent states, dyntick-idle transitions,
+and CPU hotplug operations to be processed efficiently
+and scalably.
+Quiescent states are recorded by the per-CPU <tt>rcu_data</tt> structures,
+and other events are recorded by the leaf-level <tt>rcu_node</tt>
+structures.
+All of these events are combined at each level of the tree until finally
+grace periods are completed at the tree's root <tt>rcu_node</tt>
+structure.
+A grace period can be completed at the root once every CPU
+(or, in the case of <tt>CONFIG_PREEMPT_RCU</tt>, task)
+has passed through a quiescent state.
+Once a grace period has completed, record of that fact is propagated
+back down the tree.
+
+</p><p>As can be seen from the diagram, on a 64-bit system
+a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
+of 64 at the root and a fanout of 16 at the leaves.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why isn't the fanout at the leaves also 64?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Because there are more types of events that affect the leaf-level
+ <tt>rcu_node</tt> structures than further up the tree.
+ Therefore, if the leaf <tt>rcu_node</tt> structures have fanout of
+ 64, the contention on these structures' <tt>->structures</tt>
+ becomes excessive.
+ Experimentation on a wide variety of systems has shown that a fanout
+ of 16 works well for the leaves of the <tt>rcu_node</tt> tree.
+ </font>
+
+ <p><font color="ffffff">Of course, further experience with
+ systems having hundreds or thousands of CPUs may demonstrate
+ that the fanout for the non-leaf <tt>rcu_node</tt> structures
+ must also be reduced.
+ Such reduction can be easily carried out when and if it proves
+ necessary.
+ In the meantime, if you are using such a system and running into
+ contention problems on the non-leaf <tt>rcu_node</tt> structures,
+ you may use the <tt>CONFIG_RCU_FANOUT</tt> kernel configuration
+ parameter to reduce the non-leaf fanout as needed.
+ </font>
+
+ <p><font color="ffffff">Kernels built for systems with
+ strong NUMA characteristics might also need to adjust
+ <tt>CONFIG_RCU_FANOUT</tt> so that the domains of the
+ <tt>rcu_node</tt> structures align with hardware boundaries.
+ However, there has thus far been no need for this.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<p>If your system has more than 1,024 CPUs (or more than 512 CPUs on
+a 32-bit system), then RCU will automatically add more levels to the
+tree.
+For example, if you are crazy enough to build a 64-bit system with 65,536
+CPUs, RCU would configure the <tt>rcu_node</tt> tree as follows:
+
+</p><p><img src="HugeTreeClassicRCU.svg" alt="HugeTreeClassicRCU.svg" width="50%">
+
+</p><p>RCU currently permits up to a four-level tree, which on a 64-bit system
+accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
+32-bit systems.
+On the other hand, you can set <tt>CONFIG_RCU_FANOUT</tt> to be
+as small as 2 if you wish, which would permit only 16 CPUs, which
+is useful for testing.
+
+</p><p>This multi-level combining tree allows us to get most of the
+performance and scalability
+benefits of partitioning, even though RCU grace-period detection is
+inherently a global operation.
+The trick here is that only the last CPU to report a quiescent state
+into a given <tt>rcu_node</tt> structure need advance to the <tt>rcu_node</tt>
+structure at the next level up the tree.
+This means that at the leaf-level <tt>rcu_node</tt> structure, only
+one access out of sixteen will progress up the tree.
+For the internal <tt>rcu_node</tt> structures, the situation is even
+more extreme: Only one access out of sixty-four will progress up
+the tree.
+Because the vast majority of the CPUs do not progress up the tree,
+the lock contention remains roughly constant up the tree.
+No matter how many CPUs there are in the system, at most 64 quiescent-state
+reports per grace period will progress all the way to the root
+<tt>rcu_node</tt> structure, thus ensuring that the lock contention
+on that root <tt>rcu_node</tt> structure remains acceptably low.
+
+</p><p>In effect, the combining tree acts like a big shock absorber,
+keeping lock contention under control at all tree levels regardless
+of the level of loading on the system.
+
+</p><p>The Linux kernel actually supports multiple flavors of RCU
+running concurrently, so RCU builds separate data structures for each
+flavor.
+For example, for <tt>CONFIG_TREE_RCU=y</tt> kernels, RCU provides
+rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBH.svg" alt="BigTreeClassicRCUBH.svg" width="33%">
+
+</p><p>Energy efficiency is increasingly important, and for that
+reason the Linux kernel provides <tt>CONFIG_NO_HZ_IDLE</tt>, which
+turns off the scheduling-clock interrupts on idle CPUs, which in
+turn allows those CPUs to attain deeper sleep states and to consume
+less energy.
+CPUs whose scheduling-clock interrupts have been turned off are
+said to be in <i>dyntick-idle mode</i>.
+RCU must handle dyntick-idle CPUs specially
+because RCU would otherwise wake up each CPU on every grace period,
+which would defeat the whole purpose of <tt>CONFIG_NO_HZ_IDLE</tt>.
+RCU uses the <tt>rcu_dynticks</tt> structure to track
+which CPUs are in dyntick idle mode, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBHdyntick.svg" alt="BigTreeClassicRCUBHdyntick.svg" width="33%">
+
+</p><p>However, if a CPU is in dyntick-idle mode, it is in that mode
+for all flavors of RCU.
+Therefore, a single <tt>rcu_dynticks</tt> structure is allocated per
+CPU, and all of a given CPU's <tt>rcu_data</tt> structures share
+that <tt>rcu_dynticks</tt>, as shown in the figure.
+
+</p><p>Kernels built with <tt>CONFIG_PREEMPT_RCU</tt> support
+rcu_preempt in addition to rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreePreemptRCUBHdyntick.svg" alt="BigTreePreemptRCUBHdyntick.svg" width="35%">
+
+</p><p>RCU updaters wait for normal grace periods by registering
+RCU callbacks, either directly via <tt>call_rcu()</tt> and
+friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
+there being a separate interface per flavor of RCU)
+or indirectly via <tt>synchronize_rcu()</tt> and friends.
+RCU callbacks are represented by <tt>rcu_head</tt> structures,
+which are queued on <tt>rcu_data</tt> structures while they are
+waiting for a grace period to elapse, as shown in the following figure:
+
+</p><p><img src="BigTreePreemptRCUBHdyntickCB.svg" alt="BigTreePreemptRCUBHdyntickCB.svg" width="40%">
+
+</p><p>This figure shows how <tt>TREE_RCU</tt>'s and
+<tt>PREEMPT_RCU</tt>'s major data structures are related.
+Lesser data structures will be introduced with the algorithms that
+make use of them.
+
+</p><p>Note that each of the data structures in the above figure has
+its own synchronization:
+
+<p><ol>
+<li> Each <tt>rcu_state</tt> structures has a lock and a mutex,
+ and some fields are protected by the corresponding root
+ <tt>rcu_node</tt> structure's lock.
+<li> Each <tt>rcu_node</tt> structure has a spinlock.
+<li> The fields in <tt>rcu_data</tt> are private to the corresponding
+ CPU, although a few can be read and written by other CPUs.
+<li> Similarly, the fields in <tt>rcu_dynticks</tt> are private
+ to the corresponding CPU, although a few can be read by
+ other CPUs.
+</ol>
+
+<p>It is important to note that different data structures can have
+very different ideas about the state of RCU at any given time.
+For but one example, awareness of the start or end of a given RCU
+grace period propagates slowly through the data structures.
+This slow propagation is absolutely necessary for RCU to have good
+read-side performance.
+If this balkanized implementation seems foreign to you, one useful
+trick is to consider each instance of these data structures to be
+a different person, each having the usual slightly different
+view of reality.
+
+</p><p>The general role of each of these data structures is as
+follows:
+
+</p><ol>
+<li> <tt>rcu_state</tt>:
+ This structure forms the interconnection between the
+ <tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+ tracks grace periods, serves as short-term repository
+ for callbacks orphaned by CPU-hotplug events,
+ maintains <tt>rcu_barrier()</tt> state,
+ tracks expedited grace-period state,
+ and maintains state used to force quiescent states when
+ grace periods extend too long,
+<li> <tt>rcu_node</tt>: This structure forms the combining
+ tree that propagates quiescent-state
+ information from the leaves to the root, and also propagates
+ grace-period information from the root to the leaves.
+ It provides local copies of the grace-period state in order
+ to allow this information to be accessed in a synchronized
+ manner without suffering the scalability limitations that
+ would otherwise be imposed by global locking.
+ In <tt>CONFIG_PREEMPT_RCU</tt> kernels, it manages the lists
+ of tasks that have blocked while in their current
+ RCU read-side critical section.
+ In <tt>CONFIG_PREEMPT_RCU</tt> with
+ <tt>CONFIG_RCU_BOOST</tt>, it manages the
+ per-<tt>rcu_node</tt> priority-boosting
+ kernel threads (kthreads) and state.
+ Finally, it records CPU-hotplug state in order to determine
+ which CPUs should be ignored during a given grace period.
+<li> <tt>rcu_data</tt>: This per-CPU structure is the
+ focus of quiescent-state detection and RCU callback queuing.
+ It also tracks its relationship to the corresponding leaf
+ <tt>rcu_node</tt> structure to allow more-efficient
+ propagation of quiescent states up the <tt>rcu_node</tt>
+ combining tree.
+ Like the <tt>rcu_node</tt> structure, it provides a local
+ copy of the grace-period information to allow for-free
+ synchronized
+ access to this information from the corresponding CPU.
+ Finally, this structure records past dyntick-idle state
+ for the corresponding CPU and also tracks statistics.
+<li> <tt>rcu_dynticks</tt>:
+ This per-CPU structure tracks the current dyntick-idle
+ state for the corresponding CPU.
+ Unlike the other three structures, the <tt>rcu_dynticks</tt>
+ structure is not replicated per RCU flavor.
+<li> <tt>rcu_head</tt>:
+ This structure represents RCU callbacks, and is the
+ only structure allocated and managed by RCU users.
+ The <tt>rcu_head</tt> structure is normally embedded
+ within the RCU-protected data structure.
+</ol>
+
+<p>If all you wanted from this article was a general notion of how
+RCU's data structures are related, you are done.
+Otherwise, each of the following sections give more details on
+the <tt>rcu_state</tt>, <tt>rcu_node</tt>, <tt>rcu_data</tt>,
+and <tt>rcu_dynticks</tt> data structures.
+
+<h3><a name="The rcu_state Structure">
+The <tt>rcu_state</tt> Structure</a></h3>
+
+<p>The <tt>rcu_state</tt> structure is the base structure that
+represents a flavor of RCU.
+This structure forms the interconnection between the
+<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+tracks grace periods, contains the lock used to
+synchronize with CPU-hotplug events,
+and maintains state used to force quiescent states when
+grace periods extend too long,
+
+</p><p>A few of the <tt>rcu_state</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+The more specialized fields are covered in the discussion of their
+use.
+
+<h5>Relationship to rcu_node and rcu_data Structures</h5>
+
+This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_node node[NUM_RCU_NODES];
+ 2 struct rcu_node *level[NUM_RCU_LVLS + 1];
+ 3 struct rcu_data __percpu *rda;
+</pre>
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Wait a minute!
+ You said that the <tt>rcu_node</tt> structures formed a tree,
+ but they are declared as a flat array!
+ What gives?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ The tree is laid out in the array.
+ The first node In the array is the head, the next set of nodes in the
+ array are children of the head node, and so on until the last set of
+ nodes in the array are the leaves.
+ </font>
+
+ <p><font color="ffffff">See the following diagrams to see how
+ this works.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<p>The <tt>rcu_node</tt> tree is embedded into the
+<tt>->node[]</tt> array as shown in the following figure:
+
+</p><p><img src="TreeMapping.svg" alt="TreeMapping.svg" width="40%">
+
+</p><p>One interesting consequence of this mapping is that a
+breadth-first traversal of the tree is implemented as a simple
+linear scan of the array, which is in fact what the
+<tt>rcu_for_each_node_breadth_first()</tt> macro does.
+This macro is used at the beginning and ends of grace periods.
+
+</p><p>Each entry of the <tt>->level</tt> array references
+the first <tt>rcu_node</tt> structure on the corresponding level
+of the tree, for example, as shown below:
+
+</p><p><img src="TreeMappingLevel.svg" alt="TreeMappingLevel.svg" width="40%">
+
+</p><p>The zero<sup>th</sup> element of the array references the root
+<tt>rcu_node</tt> structure, the first element references the
+first child of the root <tt>rcu_node</tt>, and finally the second
+element references the first leaf <tt>rcu_node</tt> structure.
+
+</p><p>For whatever it is worth, if you draw the tree to be tree-shaped
+rather than array-shaped, it is easy to draw a planar representation:
+
+</p><p><img src="TreeLevel.svg" alt="TreeLevel.svg" width="60%">
+
+</p><p>Finally, the <tt>->rda</tt> field references a per-CPU
+pointer to the corresponding CPU's <tt>rcu_data</tt> structure.
+
+</p><p>All of these fields are constant once initialization is complete,
+and therefore need no protection.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long gpnum;
+ 2 unsigned long completed;
+</pre>
+
+<p>RCU grace periods are numbered, and
+the <tt>->gpnum</tt> field contains the number of the grace
+period that started most recently.
+The <tt>->completed</tt> field contains the number of the
+grace period that completed most recently.
+If the two fields are equal, the RCU grace period that most recently
+started has already completed, and therefore the corresponding
+flavor of RCU is idle.
+If <tt>->gpnum</tt> is one greater than <tt>->completed</tt>,
+then <tt>->gpnum</tt> gives the number of the current RCU
+grace period, which has not yet completed.
+Any other combination of values indicates that something is broken.
+These two fields are protected by the root <tt>rcu_node</tt>'s
+<tt>->lock</tt> field.
+
+</p><p>There are <tt>->gpnum</tt> and <tt>->completed</tt> fields
+in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
+as well.
+The fields in the <tt>rcu_state</tt> structure represent the
+most current values, and those of the other structures are compared
+in order to detect the start of a new grace period in a distributed
+fashion.
+The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
+(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
+
+<h5>Miscellaneous</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long gp_max;
+ 2 char abbr;
+ 3 char *name;
+</pre>
+
+<p>The <tt>->gp_max</tt> field tracks the duration of the longest
+grace period in jiffies.
+It is protected by the root <tt>rcu_node</tt>'s <tt>->lock</tt>.
+
+<p>The <tt>->name</tt> field points to the name of the RCU flavor
+(for example, “rcu_sched”), and is constant.
+The <tt>->abbr</tt> field contains a one-character abbreviation,
+for example, “s” for RCU-sched.
+
+<h3><a name="The rcu_node Structure">
+The <tt>rcu_node</tt> Structure</a></h3>
+
+<p>The <tt>rcu_node</tt> structures form the combining
+tree that propagates quiescent-state
+information from the leaves to the root and also that propagates
+grace-period information from the root down to the leaves.
+They provides local copies of the grace-period state in order
+to allow this information to be accessed in a synchronized
+manner without suffering the scalability limitations that
+would otherwise be imposed by global locking.
+In <tt>CONFIG_PREEMPT_RCU</tt> kernels, they manage the lists
+of tasks that have blocked while in their current
+RCU read-side critical section.
+In <tt>CONFIG_PREEMPT_RCU</tt> with
+<tt>CONFIG_RCU_BOOST</tt>, they manage the
+per-<tt>rcu_node</tt> priority-boosting
+kernel threads (kthreads) and state.
+Finally, they record CPU-hotplug state in order to determine
+which CPUs should be ignored during a given grace period.
+
+</p><p>The <tt>rcu_node</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Combining Tree</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_node *parent;
+ 2 u8 level;
+ 3 u8 grpnum;
+ 4 unsigned long grpmask;
+ 5 int grplo;
+ 6 int grphi;
+</pre>
+
+<p>The <tt>->parent</tt> pointer references the <tt>rcu_node</tt>
+one level up in the tree, and is <tt>NULL</tt> for the root
+<tt>rcu_node</tt>.
+The RCU implementation makes heavy use of this field to push quiescent
+states up the tree.
+The <tt>->level</tt> field gives the level in the tree, with
+the root being at level zero, its children at level one, and so on.
+The <tt>->grpnum</tt> field gives this node's position within
+the children of its parent, so this number can range between 0 and 31
+on 32-bit systems and between 0 and 63 on 64-bit systems.
+The <tt>->level</tt> and <tt>->grpnum</tt> fields are
+used only during initialization and for tracing.
+The <tt>->grpmask</tt> field is the bitmask counterpart of
+<tt>->grpnum</tt>, and therefore always has exactly one bit set.
+This mask is used to clear the bit corresponding to this <tt>rcu_node</tt>
+structure in its parent's bitmasks, which are described later.
+Finally, the <tt>->grplo</tt> and <tt>->grphi</tt> fields
+contain the lowest and highest numbered CPU served by this
+<tt>rcu_node</tt> structure, respectively.
+
+</p><p>All of these fields are constant, and thus do not require any
+synchronization.
+
+<h5>Synchronization</h5>
+
+<p>This field of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+ 1 raw_spinlock_t lock;
+</pre>
+
+<p>This field is used to protect the remaining fields in this structure,
+unless otherwise stated.
+That said, all of the fields in this structure can be accessed without
+locking for tracing purposes.
+Yes, this can result in confusing traces, but better some tracing confusion
+than to be heisenbugged out of existence.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long gpnum;
+ 2 unsigned long completed;
+</pre>
+
+<p>These fields are the counterparts of the fields of the same name in
+the <tt>rcu_state</tt> structure.
+They each may lag up to one behind their <tt>rcu_state</tt>
+counterparts.
+If a given <tt>rcu_node</tt> structure's <tt>->gpnum</tt> and
+<tt>->complete</tt> fields are equal, then this <tt>rcu_node</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> structure,
+the <tt>->gpnum</tt> field will be one greater than the
+<tt>->complete</tt> fields, with <tt>->gpnum</tt>
+indicating which grace period this <tt>rcu_node</tt> believes
+is still being waited for.
+
+</p><p>The <tt>>gpnum</tt> field of each <tt>rcu_node</tt>
+structure is updated at the beginning
+of each grace period, and the <tt>->completed</tt> fields are
+updated at the end of each grace period.
+
+<h5>Quiescent-State Tracking</h5>
+
+<p>These fields manage the propagation of quiescent states up the
+combining tree.
+
+</p><p>This portion of the <tt>rcu_node</tt> structure has fields
+as follows:
+
+<pre>
+ 1 unsigned long qsmask;
+ 2 unsigned long expmask;
+ 3 unsigned long qsmaskinit;
+ 4 unsigned long expmaskinit;
+</pre>
+
+<p>The <tt>->qsmask</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current normal grace period.
+Such children will have a value of 1 in their corresponding bit.
+Note that the leaf <tt>rcu_node</tt> structures should be
+thought of as having <tt>rcu_data</tt> structures as their
+children.
+Similarly, the <tt>->expmask</tt> field tracks which
+of this <tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current expedited grace period.
+An expedited grace period has
+the same conceptual properties as a normal grace period, but the
+expedited implementation accepts extreme CPU overhead to obtain
+much lower grace-period latency, for example, consuming a few
+tens of microseconds worth of CPU time to reduce grace-period
+duration from milliseconds to tens of microseconds.
+The <tt>->qsmaskinit</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children cover for at least
+one online CPU.
+This mask is used to initialize <tt>->qsmask</tt>,
+and <tt>->expmaskinit</tt> is used to initialize
+<tt>->expmask</tt> and the beginning of the
+normal and expedited grace periods, respectively.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why are these bitmasks protected by locking?
+ Come on, haven't you heard of atomic instructions???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Lockless grace-period computation! Such a tantalizing possibility!
+ </font>
+
+ <p><font color="ffffff">But consider the following sequence of events:
+ </font>
+
+ <ol>
+ <li> <font color="ffffff">CPU 0 has been in dyntick-idle
+ mode for quite some time.
+ When it wakes up, it notices that the current RCU
+ grace period needs it to report in, so it sets a
+ flag where the scheduling clock interrupt will find it.
+ </font><p>
+ <li> <font color="ffffff">Meanwhile, CPU 1 is running
+ <tt>force_quiescent_state()</tt>,
+ and notices that CPU 0 has been in dyntick idle mode,
+ which qualifies as an extended quiescent state.
+ </font><p>
+ <li> <font color="ffffff">CPU 0's scheduling clock
+ interrupt fires in the
+ middle of an RCU read-side critical section, and notices
+ that the RCU core needs something, so commences RCU softirq
+ processing.
+ </font>
+ <p>
+ <li> <font color="ffffff">CPU 0's softirq handler
+ executes and is just about ready
+ to report its quiescent state up the <tt>rcu_node</tt>
+ tree.
+ </font><p>
+ <li> <font color="ffffff">But CPU 1 beats it to the punch,
+ completing the current
+ grace period and starting a new one.
+ </font><p>
+ <li> <font color="ffffff">CPU 0 now reports its quiescent
+ state for the wrong
+ grace period.
+ That grace period might now end before the RCU read-side
+ critical section.
+ If that happens, disaster will ensue.
+ </font>
+ </ol>
+
+ <p><font color="ffffff">So the locking is absolutely required in
+ order to coordinate
+ clearing of the bits with the grace-period numbers in
+ <tt>->gpnum</tt> and <tt>->completed</tt>.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<h5>Blocked-Task Management</h5>
+
+<p><tt>PREEMPT_RCU</tt> allows tasks to be preempted in the
+midst of their RCU read-side critical sections, and these tasks
+must be tracked explicitly.
+The details of exactly why and how they are tracked will be covered
+in a separate article on RCU read-side processing.
+For now, it is enough to know that the <tt>rcu_node</tt>
+structure tracks them.
+
+<pre>
+ 1 struct list_head blkd_tasks;
+ 2 struct list_head *gp_tasks;
+ 3 struct list_head *exp_tasks;
+ 4 bool wait_blkd_tasks;
+</pre>
+
+<p>The <tt>->blkd_tasks</tt> field is a list header for
+the list of blocked and preempted tasks.
+As tasks undergo context switches within RCU read-side critical
+sections, their <tt>task_struct</tt> structures are enqueued
+(via the <tt>task_struct</tt>'s <tt>->rcu_node_entry</tt>
+field) onto the head of the <tt>->blkd_tasks</tt> list for the
+leaf <tt>rcu_node</tt> structure corresponding to the CPU
+on which the outgoing context switch executed.
+As these tasks later exit their RCU read-side critical sections,
+they remove themselves from the list.
+This list is therefore in reverse time order, so that if one of the tasks
+is blocking the current grace period, all subsequent tasks must
+also be blocking that same grace period.
+Therefore, a single pointer into this list suffices to track
+all tasks blocking a given grace period.
+That pointer is stored in <tt>->gp_tasks</tt> for normal
+grace periods and in <tt>->exp_tasks</tt> for expedited
+grace periods.
+These last two fields are <tt>NULL</tt> if either there is
+no grace period in flight or if there are no blocked tasks
+preventing that grace period from completing.
+If either of these two pointers is referencing a task that
+removes itself from the <tt>->blkd_tasks</tt> list,
+then that task must advance the pointer to the next task on
+the list, or set the pointer to <tt>NULL</tt> if there
+are no subsequent tasks on the list.
+
+</p><p>For example, suppose that tasks T1, T2, and T3 are
+all hard-affinitied to the largest-numbered CPU in the system.
+Then if task T1 blocked in an RCU read-side
+critical section, then an expedited grace period started,
+then task T2 blocked in an RCU read-side critical section,
+then a normal grace period started, and finally task 3 blocked
+in an RCU read-side critical section, then the state of the
+last leaf <tt>rcu_node</tt> structure's blocked-task list
+would be as shown below:
+
+</p><p><img src="blkd_task.svg" alt="blkd_task.svg" width="60%">
+
+</p><p>Task T1 is blocking both grace periods, task T2 is
+blocking only the normal grace period, and task T3 is blocking
+neither grace period.
+Note that these tasks will not remove themselves from this list
+immediately upon resuming execution.
+They will instead remain on the list until they execute the outermost
+<tt>rcu_read_unlock()</tt> that ends their RCU read-side critical
+section.
+
+<p>
+The <tt>->wait_blkd_tasks</tt> field indicates whether or not
+the current grace period is waiting on a blocked task.
+
+<h5>Sizing the <tt>rcu_node</tt> Array</h5>
+
+<p>The <tt>rcu_node</tt> array is sized via a series of
+C-preprocessor expressions as follows:
+
+<pre>
+ 1 #ifdef CONFIG_RCU_FANOUT
+ 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
+ 3 #else
+ 4 # ifdef CONFIG_64BIT
+ 5 # define RCU_FANOUT 64
+ 6 # else
+ 7 # define RCU_FANOUT 32
+ 8 # endif
+ 9 #endif
+10
+11 #ifdef CONFIG_RCU_FANOUT_LEAF
+12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+13 #else
+14 # ifdef CONFIG_64BIT
+15 # define RCU_FANOUT_LEAF 64
+16 # else
+17 # define RCU_FANOUT_LEAF 32
+18 # endif
+19 #endif
+20
+21 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
+22 #define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT)
+23 #define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT)
+24 #define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT)
+25
+26 #if NR_CPUS <= RCU_FANOUT_1
+27 # define RCU_NUM_LVLS 1
+28 # define NUM_RCU_LVL_0 1
+29 # define NUM_RCU_NODES NUM_RCU_LVL_0
+30 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
+31 # define RCU_NODE_NAME_INIT { "rcu_node_0" }
+32 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
+33 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
+34 #elif NR_CPUS <= RCU_FANOUT_2
+35 # define RCU_NUM_LVLS 2
+36 # define NUM_RCU_LVL_0 1
+37 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+38 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+39 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+40 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
+41 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+42 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
+43 #elif NR_CPUS <= RCU_FANOUT_3
+44 # define RCU_NUM_LVLS 3
+45 # define NUM_RCU_LVL_0 1
+46 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+47 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+48 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+49 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+50 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+51 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+52 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
+53 #elif NR_CPUS <= RCU_FANOUT_4
+54 # define RCU_NUM_LVLS 4
+55 # define NUM_RCU_LVL_0 1
+56 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+57 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+58 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+59 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+60 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+61 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+62 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+63 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
+64 #else
+65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+66 #endif
+</pre>
+
+<p>The maximum number of levels in the <tt>rcu_node</tt> structure
+is currently limited to four, as specified by lines 21-24
+and the structure of the subsequent “if” statement.
+For 32-bit systems, this allows 16*32*32*32=524,288 CPUs, which
+should be sufficient for the next few years at least.
+For 64-bit systems, 16*64*64*64=4,194,304 CPUs is allowed, which
+should see us through the next decade or so.
+This four-level tree also allows kernels built with
+<tt>CONFIG_RCU_FANOUT=8</tt> to support up to 4096 CPUs,
+which might be useful in very large systems having eight CPUs per
+socket (but please note that no one has yet shown any measurable
+performance degradation due to misaligned socket and <tt>rcu_node</tt>
+boundaries).
+In addition, building kernels with a full four levels of <tt>rcu_node</tt>
+tree permits better testing of RCU's combining-tree code.
+
+</p><p>The <tt>RCU_FANOUT</tt> symbol controls how many children
+are permitted at each non-leaf level of the <tt>rcu_node</tt> tree.
+If the <tt>CONFIG_RCU_FANOUT</tt> Kconfig option is not specified,
+it is set based on the word size of the system, which is also
+the Kconfig default.
+
+</p><p>The <tt>RCU_FANOUT_LEAF</tt> symbol controls how many CPUs are
+handled by each leaf <tt>rcu_node</tt> structure.
+Experience has shown that allowing a given leaf <tt>rcu_node</tt>
+structure to handle 64 CPUs, as permitted by the number of bits in
+the <tt>->qsmask</tt> field on a 64-bit system, results in
+excessive contention for the leaf <tt>rcu_node</tt> structures'
+<tt>->lock</tt> fields.
+The number of CPUs per leaf <tt>rcu_node</tt> structure is therefore
+limited to 16 given the default value of <tt>CONFIG_RCU_FANOUT_LEAF</tt>.
+If <tt>CONFIG_RCU_FANOUT_LEAF</tt> is unspecified, the value
+selected is based on the word size of the system, just as for
+<tt>CONFIG_RCU_FANOUT</tt>.
+Lines 11-19 perform this computation.
+
+</p><p>Lines 21-24 compute the maximum number of CPUs supported by
+a single-level (which contains a single <tt>rcu_node</tt> structure),
+two-level, three-level, and four-level <tt>rcu_node</tt> tree,
+respectively, given the fanout specified by <tt>RCU_FANOUT</tt>
+and <tt>RCU_FANOUT_LEAF</tt>.
+These numbers of CPUs are retained in the
+<tt>RCU_FANOUT_1</tt>,
+<tt>RCU_FANOUT_2</tt>,
+<tt>RCU_FANOUT_3</tt>, and
+<tt>RCU_FANOUT_4</tt>
+C-preprocessor variables, respectively.
+
+</p><p>These variables are used to control the C-preprocessor <tt>#if</tt>
+statement spanning lines 26-66 that computes the number of
+<tt>rcu_node</tt> structures required for each level of the tree,
+as well as the number of levels required.
+The number of levels is placed in the <tt>NUM_RCU_LVLS</tt>
+C-preprocessor variable by lines 27, 35, 44, and 54.
+The number of <tt>rcu_node</tt> structures for the topmost level
+of the tree is always exactly one, and this value is unconditionally
+placed into <tt>NUM_RCU_LVL_0</tt> by lines 28, 36, 45, and 55.
+The rest of the levels (if any) of the <tt>rcu_node</tt> tree
+are computed by dividing the maximum number of CPUs by the
+fanout supported by the number of levels from the current level down,
+rounding up. This computation is performed by lines 37,
+46-47, and 56-58.
+Lines 31-33, 40-42, 50-52, and 62-63 create initializers
+for lockdep lock-class names.
+Finally, lines 64-66 produce an error if the maximum number of
+CPUs is too large for the specified fanout.
+
+<h3><a name="The rcu_data Structure">
+The <tt>rcu_data</tt> Structure</a></h3>
+
+<p>The <tt>rcu_data</tt> maintains the per-CPU state for the
+corresponding flavor of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+This structure is the
+focus of quiescent-state detection and RCU callback queuing.
+It also tracks its relationship to the corresponding leaf
+<tt>rcu_node</tt> structure to allow more-efficient
+propagation of quiescent states up the <tt>rcu_node</tt>
+combining tree.
+Like the <tt>rcu_node</tt> structure, it provides a local
+copy of the grace-period information to allow for-free
+synchronized
+access to this information from the corresponding CPU.
+Finally, this structure records past dyntick-idle state
+for the corresponding CPU and also tracks statistics.
+
+</p><p>The <tt>rcu_data</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Other Data Structures</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 int cpu;
+ 2 struct rcu_state *rsp;
+ 3 struct rcu_node *mynode;
+ 4 struct rcu_dynticks *dynticks;
+ 5 unsigned long grpmask;
+ 6 bool beenonline;
+</pre>
+
+<p>The <tt>->cpu</tt> field contains the number of the
+corresponding CPU, the <tt>->rsp</tt> pointer references
+the corresponding <tt>rcu_state</tt> structure (and is most frequently
+used to locate the name of the corresponding flavor of RCU for tracing),
+and the <tt>->mynode</tt> field references the corresponding
+<tt>rcu_node</tt> structure.
+The <tt>->mynode</tt> is used to propagate quiescent states
+up the combining tree.
+<p>The <tt>->dynticks</tt> pointer references the
+<tt>rcu_dynticks</tt> structure corresponding to this
+CPU.
+Recall that a single per-CPU instance of the <tt>rcu_dynticks</tt>
+structure is shared among all flavors of RCU.
+These first four fields are constant and therefore require not
+synchronization.
+
+</p><p>The <tt>->grpmask</tt> field indicates the bit in
+the <tt>->mynode->qsmask</tt> corresponding to this
+<tt>rcu_data</tt> structure, and is also used when propagating
+quiescent states.
+The <tt>->beenonline</tt> flag is set whenever the corresponding
+CPU comes online, which means that the debugfs tracing need not dump
+out any <tt>rcu_data</tt> structure for which this flag is not set.
+
+<h5>Quiescent-State and Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long completed;
+ 2 unsigned long gpnum;
+ 3 bool cpu_no_qs;
+ 4 bool core_needs_qs;
+ 5 bool gpwrap;
+ 6 unsigned long rcu_qs_ctr_snap;
+</pre>
+
+<p>The <tt>completed</tt> and <tt>gpnum</tt>
+fields are the counterparts of the fields of the same name
+in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.
+They may each lag up to one behind their <tt>rcu_node</tt>
+counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
+<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
+arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
+will catch up upon exit from dyntick-idle mode).
+If a given <tt>rcu_data</tt> structure's <tt>->gpnum</tt> and
+<tt>->complete</tt> fields are equal, then this <tt>rcu_data</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> and <tt>rcu_node</tt>
+structure,
+the <tt>->gpnum</tt> field will be one greater than the
+<tt>->complete</tt> fields, with <tt>->gpnum</tt>
+indicating which grace period this <tt>rcu_data</tt> believes
+is still being waited for.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ All this replication of the grace period numbers can only cause
+ massive confusion.
+ Why not just keep a global pair of counters and be done with it???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Because if there was only a single global pair of grace-period
+ numbers, there would need to be a single global lock to allow
+ safely accessing and updating them.
+ And if we are not going to have a single global lock, we need
+ to carefully manage the numbers on a per-node basis.
+ Recall from the answer to a previous Quick Quiz that the consequences
+ of applying a previously sampled quiescent state to the wrong
+ grace period are quite severe.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<p>The <tt>->cpu_no_qs</tt> flag indicates that the
+CPU has not yet passed through a quiescent state,
+while the <tt>->core_needs_qs</tt> flag indicates that the
+RCU core needs a quiescent state from the corresponding CPU.
+The <tt>->gpwrap</tt> field indicates that the corresponding
+CPU has remained idle for so long that the <tt>completed</tt>
+and <tt>gpnum</tt> counters are in danger of overflow, which
+will cause the CPU to disregard the values of its counters on
+its next exit from idle.
+Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
+cases where a given operation has resulted in a quiescent state
+for all flavors of RCU, for example, <tt>cond_resched_rcu_qs()</tt>.
+
+<h5>RCU Callback Handling</h5>
+
+<p>In the absence of CPU-hotplug events, RCU callbacks are invoked by
+the same CPU that registered them.
+This is strictly a cache-locality optimization: callbacks can and
+do get invoked on CPUs other than the one that registered them.
+After all, if the CPU that registered a given callback has gone
+offline before the callback can be invoked, there really is no other
+choice.
+
+</p><p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_head *nxtlist;
+ 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
+ 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
+ 4 long qlen_lazy;
+ 5 long qlen;
+ 6 long qlen_last_fqs_check;
+ 7 unsigned long n_force_qs_snap;
+ 8 unsigned long n_cbs_invoked;
+ 9 unsigned long n_cbs_orphaned;
+10 unsigned long n_cbs_adopted;
+11 long blimit;
+</pre>
+
+<p>The <tt>->nxtlist</tt> pointer and the
+<tt>->nxttail[]</tt> array form a four-segment list with
+older callbacks near the head and newer ones near the tail.
+Each segment contains callbacks with the corresponding relationship
+to the current grace period.
+The pointer out of the end of each of the four segments is referenced
+by the element of the <tt>->nxttail[]</tt> array indexed by
+<tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period),
+<tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period),
+<tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next
+grace period), and
+<tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated
+with a specific grace period)
+respectively, as shown in the following figure.
+
+</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
+
+</p><p>In this figure, the <tt>->nxtlist</tt> pointer references the
+first
+RCU callback in the list.
+The <tt>->nxttail[RCU_DONE_TAIL]</tt> array element references
+the <tt>->nxtlist</tt> pointer itself, indicating that none
+of the callbacks is ready to invoke.
+The <tt>->nxttail[RCU_WAIT_TAIL]</tt> array element references callback
+CB 2's <tt>->next</tt> pointer, which indicates that
+CB 1 and CB 2 are both waiting on the current grace period.
+The <tt>->nxttail[RCU_NEXT_READY_TAIL]</tt> array element
+references the same RCU callback that <tt>->nxttail[RCU_WAIT_TAIL]</tt>
+does, which indicates that there are no callbacks waiting on the next
+RCU grace period.
+The <tt>->nxttail[RCU_NEXT_TAIL]</tt> array element references
+CB 4's <tt>->next</tt> pointer, indicating that all the
+remaining RCU callbacks have not yet been assigned to an RCU grace
+period.
+Note that the <tt>->nxttail[RCU_NEXT_TAIL]</tt> array element
+always references the last RCU callback's <tt>->next</tt> pointer
+unless the callback list is empty, in which case it references
+the <tt>->nxtlist</tt> pointer.
+
+</p><p>CPUs advance their callbacks from the
+<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
+<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
+as grace periods advance.
+The CPU advances the callbacks in its <tt>rcu_data</tt> structure
+whenever it notices that another RCU grace period has completed.
+The CPU detects the completion of an RCU grace period by noticing
+that the value of its <tt>rcu_data</tt> structure's
+<tt>->completed</tt> field differs from that of its leaf
+<tt>rcu_node</tt> structure.
+Recall that each <tt>rcu_node</tt> structure's
+<tt>->completed</tt> field is updated at the end of each
+grace period.
+
+</p><p>The <tt>->nxtcompleted[]</tt> array records grace-period
+numbers corresponding to the list segments.
+This allows CPUs that go idle for extended periods to determine
+which of their callbacks are ready to be invoked after reawakening.
+
+</p><p>The <tt>->qlen</tt> counter contains the number of
+callbacks in <tt>->nxtlist</tt>, and the
+<tt>->qlen_lazy</tt> contains the number of those callbacks that
+are known to only free memory, and whose invocation can therefore
+be safely deferred.
+The <tt>->qlen_last_fqs_check</tt> and
+<tt>->n_force_qs_snap</tt> coordinate the forcing of quiescent
+states from <tt>call_rcu()</tt> and friends when callback
+lists grow excessively long.
+
+</p><p>The <tt>->n_cbs_invoked</tt>,
+<tt>->n_cbs_orphaned</tt>, and <tt>->n_cbs_adopted</tt>
+fields count the number of callbacks invoked,
+sent to other CPUs when this CPU goes offline,
+and received from other CPUs when those other CPUs go offline.
+Finally, the <tt>->blimit</tt> counter is the maximum number of
+RCU callbacks that may be invoked at a given time.
+
+<h5>Dyntick-Idle Handling</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 int dynticks_snap;
+ 2 unsigned long dynticks_fqs;
+</pre>
+
+The <tt>->dynticks_snap</tt> field is used to take a snapshot
+of the corresponding CPU's dyntick-idle state when forcing
+quiescent states, and is therefore accessed from other CPUs.
+Finally, the <tt>->dynticks_fqs</tt> field is used to
+count the number of times this CPU is determined to be in
+dyntick-idle state, and is used for tracing and debugging purposes.
+
+<h3><a name="The rcu_dynticks Structure">
+The <tt>rcu_dynticks</tt> Structure</a></h3>
+
+<p>The <tt>rcu_dynticks</tt> maintains the per-CPU dyntick-idle state
+for the corresponding CPU.
+Unlike the other structures, <tt>rcu_dynticks</tt> is not
+replicated over the different flavors of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+Its fields are as follows:
+
+<pre>
+ 1 int dynticks_nesting;
+ 2 int dynticks_nmi_nesting;
+ 3 atomic_t dynticks;
+</pre>
+
+<p>The <tt>->dynticks_nesting</tt> field counts the
+nesting depth of normal interrupts.
+In addition, this counter is incremented when exiting dyntick-idle
+mode and decremented when entering it.
+This counter can therefore be thought of as counting the number
+of reasons why this CPU cannot be permitted to enter dyntick-idle
+mode, aside from non-maskable interrupts (NMIs).
+NMIs are counted by the <tt>->dynticks_nmi_nesting</tt>
+field, except that NMIs that interrupt non-dyntick-idle execution
+are not counted.
+
+</p><p>Finally, the <tt>->dynticks</tt> field counts the corresponding
+CPU's transitions to and from dyntick-idle mode, so that this counter
+has an even value when the CPU is in dyntick-idle mode and an odd
+value otherwise.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why not just count all NMIs?
+ Wouldn't that be simpler and less error prone?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ It seems simpler only until you think hard about how to go about
+ updating the <tt>rcu_dynticks</tt> structure's
+ <tt>->dynticks</tt> field.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<p>Additional fields are present for some special-purpose
+builds, and are discussed separately.
+
+<h3><a name="The rcu_head Structure">
+The <tt>rcu_head</tt> Structure</a></h3>
+
+<p>Each <tt>rcu_head</tt> structure represents an RCU callback.
+These structures are normally embedded within RCU-protected data
+structures whose algorithms use asynchronous grace periods.
+In contrast, when using algorithms that block waiting for RCU grace periods,
+RCU users need not provide <tt>rcu_head</tt> structures.
+
+</p><p>The <tt>rcu_head</tt> structure has fields as follows:
+
+<pre>
+ 1 struct rcu_head *next;
+ 2 void (*func)(struct rcu_head *head);
+</pre>
+
+<p>The <tt>->next</tt> field is used
+to link the <tt>rcu_head</tt> structures together in the
+lists within the <tt>rcu_data</tt> structures.
+The <tt>->func</tt> field is a pointer to the function
+to be called when the callback is ready to be invoked, and
+this function is passed a pointer to the <tt>rcu_head</tt>
+structure.
+However, <tt>kfree_rcu()</tt> uses the <tt>->func</tt>
+field to record the offset of the <tt>rcu_head</tt>
+structure within the enclosing RCU-protected data structure.
+
+</p><p>Both of these fields are used internally by RCU.
+From the viewpoint of RCU users, this structure is an
+opaque “cookie”.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Given that the callback function <tt>->func</tt>
+ is passed a pointer to the <tt>rcu_head</tt> structure,
+ how is that function supposed to find the beginning of the
+ enclosing RCU-protected data structure?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In actual practice, there is a separate callback function per
+ type of RCU-protected data structure.
+ The callback function can therefore use the <tt>container_of()</tt>
+ macro in the Linux kernel (or other pointer-manipulation facilities
+ in other software environments) to find the beginning of the
+ enclosing structure.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<h3><a name="RCU-Specific Fields in the task_struct Structure">
+RCU-Specific Fields in the <tt>task_struct</tt> Structure</a></h3>
+
+<p>The <tt>CONFIG_PREEMPT_RCU</tt> implementation uses some
+additional fields in the <tt>task_struct</tt> structure:
+
+<pre>
+ 1 #ifdef CONFIG_PREEMPT_RCU
+ 2 int rcu_read_lock_nesting;
+ 3 union rcu_special rcu_read_unlock_special;
+ 4 struct list_head rcu_node_entry;
+ 5 struct rcu_node *rcu_blocked_node;
+ 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+ 7 #ifdef CONFIG_TASKS_RCU
+ 8 unsigned long rcu_tasks_nvcsw;
+ 9 bool rcu_tasks_holdout;
+10 struct list_head rcu_tasks_holdout_list;
+11 int rcu_tasks_idle_cpu;
+12 #endif /* #ifdef CONFIG_TASKS_RCU */
+</pre>
+
+<p>The <tt>->rcu_read_lock_nesting</tt> field records the
+nesting level for RCU read-side critical sections, and
+the <tt>->rcu_read_unlock_special</tt> field is a bitmask
+that records special conditions that require <tt>rcu_read_unlock()</tt>
+to do additional work.
+The <tt>->rcu_node_entry</tt> field is used to form lists of
+tasks that have blocked within preemptible-RCU read-side critical
+sections and the <tt>->rcu_blocked_node</tt> field references
+the <tt>rcu_node</tt> structure whose list this task is a member of,
+or <tt>NULL</tt> if it is not blocked within a preemptible-RCU
+read-side critical section.
+
+<p>The <tt>->rcu_tasks_nvcsw</tt> field tracks the number of
+voluntary context switches that this task had undergone at the
+beginning of the current tasks-RCU grace period,
+<tt>->rcu_tasks_holdout</tt> is set if the current tasks-RCU
+grace period is waiting on this task, <tt>->rcu_tasks_holdout_list</tt>
+is a list element enqueuing this task on the holdout list,
+and <tt>->rcu_tasks_idle_cpu</tt> tracks which CPU this
+idle task is running, but only if the task is currently running,
+that is, if the CPU is currently idle.
+
+<h3><a name="Accessor Functions">
+Accessor Functions</a></h3>
+
+<p>The following listing shows the
+<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt>,
+<tt>rcu_for_each_nonleaf_node_breadth_first()</tt>, and
+<tt>rcu_for_each_leaf_node()</tt> function and macros:
+
+<pre>
+ 1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+ 2 {
+ 3 return &rsp->node[0];
+ 4 }
+ 5
+ 6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
+ 7 for ((rnp) = &(rsp)->node[0]; \
+ 8 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+ 9
+ 10 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+ 11 for ((rnp) = &(rsp)->node[0]; \
+ 12 (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++)
+ 13
+ 14 #define rcu_for_each_leaf_node(rsp, rnp) \
+ 15 for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \
+ 16 (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++)
+</pre>
+
+<p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
+first element of the specified <tt>rcu_state</tt> structure's
+<tt>->node[]</tt> array, which is the root <tt>rcu_node</tt>
+structure.
+
+</p><p>As noted earlier, the <tt>rcu_for_each_node_breadth_first()</tt>
+macro takes advantage of the layout of the <tt>rcu_node</tt>
+structures in the <tt>rcu_state</tt> structure's
+<tt>->node[]</tt> array, performing a breadth-first traversal by
+simply traversing the array in order.
+The <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> macro operates
+similarly, but traverses only the first part of the array, thus excluding
+the leaf <tt>rcu_node</tt> structures.
+Finally, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
+the last part of the array, thus traversing only the leaf
+<tt>rcu_node</tt> structures.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ What do <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> and
+ <tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
+ contains only a single node?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In the single-node case,
+ <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> is a no-op
+ and <tt>rcu_for_each_leaf_node()</tt> traverses the single node.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<h3><a name="Summary">
+Summary</a></h3>
+
+So each flavor of RCU is represented by an <tt>rcu_state</tt> structure,
+which contains a combining tree of <tt>rcu_node</tt> and
+<tt>rcu_data</tt> structures.
+Finally, in <tt>CONFIG_NO_HZ_IDLE</tt> kernels, each CPU's dyntick-idle
+state is tracked by an <tt>rcu_dynticks</tt> structure.
+
+If you made it this far, you are well prepared to read the code
+walkthroughs in the other articles in this series.
+
+<h3><a name="Acknowledgments">
+Acknowledgments</a></h3>
+
+I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
+Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn
+for helping me get this document into a more human-readable state.
+
+<h3><a name="Legal Statement">
+Legal Statement</a></h3>
+
+<p>This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+</p><p>Linux is a registered trademark of Linus Torvalds.
+
+</p><p>Other company, product, and service names may be trademarks or
+service marks of others.
+
+</body></html>
diff --git a/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
new file mode 100644
index 0000000..2bf12b4
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
@@ -0,0 +1,939 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:37:22 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="15.1in"
+ height="11.2in"
+ viewBox="-66 -66 18087 13407"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="HugeTreeClassicRCU.fig">
+ <metadata
+ id="metadata224">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs222">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3982"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1134"
+ inkscape:window-height="789"
+ id="namedview220"
+ showgrid="false"
+ inkscape:zoom="0.60515873"
+ inkscape:cx="679.5"
+ inkscape:cy="504"
+ inkscape:window-x="786"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="17100"
+ height="8325"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="11025"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="4275"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="5400"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="9900"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="14400"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="900"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect18" />
+ <!-- Line: box -->
+ <rect
+ x="7650"
+ y="900"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect20" />
+ <!-- Line -->
+ <polyline
+ points="3150,9225 3150,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 3150 9225 - 3150 7560-->
+ <!-- Circle -->
+ <circle
+ cx="8550"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="9000"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="9450"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle30" />
+ <!-- Line -->
+ <polyline
+ points="6750,6300 8250,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 6750 6300 - 8391 4890-->
+ <!-- Line -->
+ <polyline
+ points="11250,6300 9747,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 11250 6300 - 9606 4890-->
+ <!-- Circle -->
+ <circle
+ cx="13950"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle40" />
+ <!-- Circle -->
+ <circle
+ cx="13500"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle42" />
+ <!-- Circle -->
+ <circle
+ cx="13050"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle44" />
+ <!-- Circle -->
+ <circle
+ cx="9450"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle46" />
+ <!-- Circle -->
+ <circle
+ cx="9000"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle48" />
+ <!-- Circle -->
+ <circle
+ cx="8550"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle50" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle52" />
+ <!-- Circle -->
+ <circle
+ cx="4500"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle54" />
+ <!-- Circle -->
+ <circle
+ cx="4050"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle56" />
+ <!-- Circle -->
+ <circle
+ cx="1800"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle58" />
+ <!-- Circle -->
+ <circle
+ cx="2250"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle60" />
+ <!-- Circle -->
+ <circle
+ cx="2700"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle62" />
+ <!-- Circle -->
+ <circle
+ cx="15300"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle64" />
+ <!-- Circle -->
+ <circle
+ cx="15750"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle66" />
+ <!-- Circle -->
+ <circle
+ cx="16200"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="10800"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="11250"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="11700"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="6300"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="6750"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="7200"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle80" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect82" />
+ <!-- Line: box -->
+ <rect
+ x="1800"
+ y="9225"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect84" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect86" />
+ <!-- Line: box -->
+ <rect
+ x="6300"
+ y="9270"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect88" />
+ <!-- Line: box -->
+ <rect
+ x="8955"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect90" />
+ <!-- Line: box -->
+ <rect
+ x="10755"
+ y="9270"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect92" />
+ <!-- Line: box -->
+ <rect
+ x="13455"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect94" />
+ <!-- Line: box -->
+ <rect
+ x="15255"
+ y="9270"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect96" />
+ <!-- Line -->
+ <polyline
+ points="11700,3600 10197,2310 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline98" />
+ <!-- Arrowhead on XXXpoint 11700 3600 - 10056 2190-->
+ <!-- Line -->
+ <polyline
+ points="6300,3600 7800,2310 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline102" />
+ <!-- Arrowhead on XXXpoint 6300 3600 - 7941 2190-->
+ <!-- Line -->
+ <polyline
+ points="3150,6300 4650,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline106" />
+ <!-- Arrowhead on XXXpoint 3150 6300 - 4791 4890-->
+ <!-- Line -->
+ <polyline
+ points="14850,6300 13347,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline110" />
+ <!-- Arrowhead on XXXpoint 14850 6300 - 13206 4890-->
+ <!-- Line -->
+ <polyline
+ points="1350,11475 1350,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline114" />
+ <!-- Arrowhead on XXXpoint 1350 11475 - 1350 7560-->
+ <!-- Line -->
+ <polyline
+ points="16650,9225 16650,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline118" />
+ <!-- Arrowhead on XXXpoint 16650 9225 - 16650 7560-->
+ <!-- Line -->
+ <polyline
+ points="14850,11475 14850,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline122" />
+ <!-- Arrowhead on XXXpoint 14850 11475 - 14850 7560-->
+ <!-- Line -->
+ <polyline
+ points="12150,9225 12150,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline126" />
+ <!-- Arrowhead on XXXpoint 12150 9225 - 12150 7560-->
+ <!-- Line -->
+ <polyline
+ points="10350,11475 10350,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline130" />
+ <!-- Arrowhead on XXXpoint 10350 11475 - 10350 7560-->
+ <!-- Line -->
+ <polyline
+ points="7650,9225 7650,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline134" />
+ <!-- Arrowhead on XXXpoint 7650 9225 - 7650 7560-->
+ <!-- Line -->
+ <polyline
+ points="5850,11475 5850,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline138" />
+ <!-- Arrowhead on XXXpoint 5850 11475 - 5850 7560-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12375"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text142">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12375"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5625"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text146">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5625"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text148">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6750"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text150">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6750"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text152">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11250"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text154">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11250"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text156">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15750"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text158">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15750"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text160">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text164">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text166">CPU 0</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text168">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text170">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="10800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text172">CPU 15</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="9675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text174">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="10125"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text176">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5850"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text178">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5850"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text180">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5850"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text182">CPU 21823</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7650"
+ y="10845"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text184">CPU 21839</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7650"
+ y="10170"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text186">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7650"
+ y="9720"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text188">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10305"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text190">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10305"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text192">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10305"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text194">CPU 43679</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="10845"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text196">CPU 43695</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="10170"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text198">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="9720"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text200">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14805"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text202">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14805"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text204">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14805"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text206">CPU 65519</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="16605"
+ y="10845"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text208">CPU 65535</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="16605"
+ y="10170"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text210">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="16605"
+ y="9720"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text212">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="675"
+ y="450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="start"
+ id="text214">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9000"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text216">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9000"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text218">rcu_node</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
new file mode 100644
index 0000000..7a7eb3b
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
@@ -0,0 +1,828 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:41:29 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="17.7in"
+ height="10.4in"
+ viewBox="-66 -66 21237 12507"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="TreeLevel.fig">
+ <metadata
+ id="metadata216">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs214">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3974"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1023"
+ inkscape:window-height="1148"
+ id="namedview212"
+ showgrid="false"
+ inkscape:zoom="0.55869424"
+ inkscape:cx="796.50006"
+ inkscape:cy="467.99997"
+ inkscape:window-x="897"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="20655"
+ height="8325"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="14130"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="7380"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="8505"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="13005"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="17505"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="4005"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect18" />
+ <!-- Line: box -->
+ <rect
+ x="10755"
+ y="900"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect20" />
+ <!-- Line -->
+ <polyline
+ points="6255,9225 6255,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 6255 9225 - 6255 7560-->
+ <!-- Circle -->
+ <circle
+ cx="11655"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="12105"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="12555"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle30" />
+ <!-- Line -->
+ <polyline
+ points="9855,6300 11355,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 9855 6300 - 11496 4890-->
+ <!-- Line -->
+ <polyline
+ points="14355,6300 12852,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 14355 6300 - 12711 4890-->
+ <!-- Circle -->
+ <circle
+ cx="17055"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle40" />
+ <!-- Circle -->
+ <circle
+ cx="16605"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle42" />
+ <!-- Circle -->
+ <circle
+ cx="16155"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle44" />
+ <!-- Circle -->
+ <circle
+ cx="12555"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle46" />
+ <!-- Circle -->
+ <circle
+ cx="12105"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle48" />
+ <!-- Circle -->
+ <circle
+ cx="11655"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle50" />
+ <!-- Circle -->
+ <circle
+ cx="8055"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle52" />
+ <!-- Circle -->
+ <circle
+ cx="7605"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle54" />
+ <!-- Circle -->
+ <circle
+ cx="7155"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle56" />
+ <!-- Circle -->
+ <circle
+ cx="4905"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle58" />
+ <!-- Circle -->
+ <circle
+ cx="5355"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle60" />
+ <!-- Circle -->
+ <circle
+ cx="5805"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle62" />
+ <!-- Circle -->
+ <circle
+ cx="18405"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle64" />
+ <!-- Circle -->
+ <circle
+ cx="18855"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle66" />
+ <!-- Circle -->
+ <circle
+ cx="19305"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="13905"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="14355"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="14805"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="9405"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="9855"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="10305"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle80" />
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="1125"
+ width="3150"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect82" />
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="2250"
+ width="3150"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect84" />
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="3375"
+ width="3150"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect86" />
+ <!-- Line -->
+ <polyline
+ points="14805,3600 13302,2310 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline88" />
+ <!-- Arrowhead on XXXpoint 14805 3600 - 13161 2190-->
+ <!-- Line -->
+ <polyline
+ points="9405,3600 10905,2310 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline92" />
+ <!-- Arrowhead on XXXpoint 9405 3600 - 11046 2190-->
+ <!-- Line -->
+ <polyline
+ points="6255,6300 7755,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline96" />
+ <!-- Arrowhead on XXXpoint 6255 6300 - 7896 4890-->
+ <!-- Line -->
+ <polyline
+ points="17955,6300 16452,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline100" />
+ <!-- Arrowhead on XXXpoint 17955 6300 - 16311 4890-->
+ <!-- Line -->
+ <polyline
+ points="4455,11025 4455,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline104" />
+ <!-- Arrowhead on XXXpoint 4455 11025 - 4455 7560-->
+ <!-- Line -->
+ <polyline
+ points="19755,9225 19755,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline108" />
+ <!-- Arrowhead on XXXpoint 19755 9225 - 19755 7560-->
+ <!-- Line -->
+ <polyline
+ points="17955,11025 17955,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline112" />
+ <!-- Arrowhead on XXXpoint 17955 11025 - 17955 7560-->
+ <!-- Line -->
+ <polyline
+ points="15255,9225 15255,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline116" />
+ <!-- Arrowhead on XXXpoint 15255 9225 - 15255 7560-->
+ <!-- Line -->
+ <polyline
+ points="13455,11025 13455,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline120" />
+ <!-- Arrowhead on XXXpoint 13455 11025 - 13455 7560-->
+ <!-- Line -->
+ <polyline
+ points="10755,9225 10755,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline124" />
+ <!-- Arrowhead on XXXpoint 10755 9225 - 10755 7560-->
+ <!-- Line -->
+ <polyline
+ points="8955,11025 8955,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline128" />
+ <!-- Arrowhead on XXXpoint 8955 11025 - 8955 7560-->
+ <!-- Line: box -->
+ <rect
+ x="12105"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect132" />
+ <!-- Line: box -->
+ <rect
+ x="13905"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect134" />
+ <!-- Line: box -->
+ <rect
+ x="16605"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect136" />
+ <!-- Line: box -->
+ <rect
+ x="18405"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect138" />
+ <!-- Line: box -->
+ <rect
+ x="9405"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect140" />
+ <!-- Line: box -->
+ <rect
+ x="7605"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect142" />
+ <!-- Line: box -->
+ <rect
+ x="4905"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect144" />
+ <!-- Line: box -->
+ <rect
+ x="3105"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect146" />
+ <!-- Line -->
+ <polyline
+ points="3375,1575 10701,1575 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline148" />
+ <!-- Arrowhead on XXXpoint 3375 1575 - 10890 1575-->
+ <!-- Line -->
+ <polyline
+ points="3375,3825 4050,3825 4050,5400 2700,5400 2700,6975 3951,6975 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline152" />
+ <!-- Arrowhead on XXXpoint 2700 6975 - 4140 6975-->
+ <!-- Line -->
+ <polyline
+ points="3375,2700 5175,2700 5175,4275 7326,4275 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline156" />
+ <!-- Arrowhead on XXXpoint 5175 4275 - 7515 4275-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15480"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text160">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15480"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text164">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text166">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9855"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text168">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9855"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text170">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14355"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text172">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14355"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text174">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="18855"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text176">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="18855"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text178">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5355"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text180">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5355"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text182">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text184">->level[0]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="2925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text186">->level[1]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text188">->level[2]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text190">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text192">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6255"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text194">CPU 15</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4455"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text196">CPU 0</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="19755"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text198">CPU 65535</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="17955"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text200">CPU 65519</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15255"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text202">CPU 43695</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="13455"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text204">CPU 43679</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10755"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text206">CPU 21839</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8955"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text208">CPU 21823</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="start"
+ id="text210">struct rcu_state</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeMapping.svg b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
new file mode 100644
index 0000000..729cfa9
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
@@ -0,0 +1,305 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:43:22 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="3.1in"
+ height="0.9in"
+ viewBox="-12 -12 3699 1074"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="TreeMapping.fig">
+ <metadata
+ id="metadata66">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs64">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Lend"
+ style="overflow:visible;">
+ <path
+ id="path3836"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(1.1) rotate(180) translate(1,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Mend"
+ style="overflow:visible;">
+ <path
+ id="path3842"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(0.6) rotate(180) translate(0,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3824"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="991"
+ inkscape:window-height="606"
+ id="namedview62"
+ showgrid="false"
+ inkscape:zoom="3.0752688"
+ inkscape:cx="139.5"
+ inkscape:cy="40.5"
+ inkscape:window-x="891"
+ inkscape:window-y="177"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="3675"
+ height="1050"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="600"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="1125"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="1650"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="2175"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="3225"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect18" />
+ <!-- Line -->
+ <polyline
+ points="675,375 675,150 300,150 300,358 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+ <!-- Line -->
+ <polyline
+ points="1200,675 1200,900 300,900 300,691 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline24" />
+ <!-- Arrowhead on XXXpoint 300 900 - 300 660-->
+ <!-- Line -->
+ <polyline
+ points="1725,375 1725,150 900,150 900,358 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline28" />
+ <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+ <!-- Line -->
+ <polyline
+ points="2250,375 2250,75 825,75 825,358 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+ <!-- Line -->
+ <polyline
+ points="2775,675 2775,900 1425,900 1425,691 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 1425 900 - 1425 660-->
+ <!-- Line -->
+ <polyline
+ points="3300,675 3300,975 1350,975 1350,691 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 1350 975 - 1350 660-->
+ <!-- Line: box -->
+ <rect
+ x="2700"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect44" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="300"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text46">0:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text48">4:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1875"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text50">0:1 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text52">2:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2925"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text54">4:5 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3450"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text56">6:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="825"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text58">0:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3600"
+ y="150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="end"
+ id="text60">struct rcu_state</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
new file mode 100644
index 0000000..5b416a4
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
@@ -0,0 +1,380 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:45:19 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="3.1in"
+ height="1.8in"
+ viewBox="-12 -12 3699 2124"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="TreeMappingLevel.svg">
+ <metadata
+ id="metadata98">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs96">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Lend"
+ style="overflow:visible;">
+ <path
+ id="path3868"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(1.1) rotate(180) translate(1,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1598"
+ inkscape:window-height="1211"
+ id="namedview94"
+ showgrid="false"
+ inkscape:zoom="5.2508961"
+ inkscape:cx="139.5"
+ inkscape:cy="81"
+ inkscape:window-x="840"
+ inkscape:window-y="122"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="3675"
+ height="2100"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="1350"
+ width="750"
+ height="225"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="1575"
+ width="750"
+ height="225"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="1800"
+ width="750"
+ height="225"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Arc -->
+ <path
+ style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+ d="M 1800,900 A 118 118 0 0 0 1800 1125 "
+ id="path14" />
+ <!-- Arc -->
+ <path
+ style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+ d="M 750,900 A 75 75 0 0 0 750 1050 "
+ id="path16" />
+ <!-- Line -->
+ <polyline
+ points="750,900 750,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline18" />
+ <!-- Arrowhead on XXXpoint 750 900 - 750 660-->
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect22" />
+ <!-- Line: box -->
+ <rect
+ x="600"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect24" />
+ <!-- Line: box -->
+ <rect
+ x="1650"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect26" />
+ <!-- Line: box -->
+ <rect
+ x="2175"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="3225"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect30" />
+ <!-- Line -->
+ <polyline
+ points="675,375 675,150 300,150 300,358 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+ <!-- Line -->
+ <polyline
+ points="1725,375 1725,150 900,150 900,358 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+ <!-- Line -->
+ <polyline
+ points="2250,375 2250,75 825,75 825,358 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+ <!-- Line -->
+ <polyline
+ points="2775,675 2775,975 1425,975 1425,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 1425 975 - 1425 660-->
+ <!-- Line: box -->
+ <rect
+ x="2700"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect48" />
+ <!-- Line: box -->
+ <rect
+ x="1125"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect50" />
+ <!-- Line -->
+ <polyline
+ points="3300,675 3300,1050 1350,1050 1350,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline52" />
+ <!-- Arrowhead on XXXpoint 1350 1050 - 1350 660-->
+ <!-- Line -->
+ <polyline
+ points="825,1425 975,1425 975,1200 225,1200 225,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline56" />
+ <!-- Arrowhead on XXXpoint 225 1200 - 225 660-->
+ <!-- Line -->
+ <polyline
+ points="1200,675 1200,975 300,975 300,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 300 975 - 300 660-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="150"
+ y="1500"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="108"
+ text-anchor="start"
+ id="text64">->level[0]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="150"
+ y="1725"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="108"
+ text-anchor="start"
+ id="text66">->level[1]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="150"
+ y="1950"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="108"
+ text-anchor="start"
+ id="text68">->level[2]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="300"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text70">0:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text72">4:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1875"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text74">0:1 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text76">2:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2925"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text78">4:5 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3450"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text80">6:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="825"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text82">0:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3600"
+ y="150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="end"
+ id="text84">struct rcu_state</text>
+ <!-- Line -->
+ <polyline
+ points="825,1875 1800,1875 1800,1125 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:none"
+ id="polyline86" />
+ <!-- Line -->
+ <polyline
+ points="1800,900 1800,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline88" />
+ <!-- Arrowhead on XXXpoint 1800 900 - 1800 660-->
+ <!-- Line -->
+ <polyline
+ points="825,1650 1200,1650 1200,1125 750,1125 750,1050 "
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline92" />
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/blkd_task.svg b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
new file mode 100644
index 0000000..00e810b
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
@@ -0,0 +1,843 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:35:03 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="10.1in"
+ height="8.6in"
+ viewBox="-44 -44 12088 10288"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="blkd_task.fig">
+ <metadata
+ id="metadata212">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs210">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3970"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1087"
+ inkscape:window-height="1144"
+ id="namedview208"
+ showgrid="false"
+ inkscape:zoom="1.0495049"
+ inkscape:cx="454.50003"
+ inkscape:cy="387.00003"
+ inkscape:window-x="833"
+ inkscape:window-y="28"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect10" />
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5688,5912 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline12" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+ <polyline
+ points="5714 6068 5704 5822 5598 6044 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline14" />
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4486,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline16" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+ <polyline
+ points="4514 7418 4506 7172 4396 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline18" />
+ <!-- Line -->
+ <polyline
+ points="1040,9300 1476,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+ <polyline
+ points="1504 7418 1496 7172 1386 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline22" />
+ <!-- Line -->
+ <polyline
+ points="2240,8100 2676,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline24" />
+ <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+ <polyline
+ points="2704 6218 2696 5972 2586 6194 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline26" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect30" />
+ <!-- Line -->
+ <polyline
+ points="1350,3450 2350,2590 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+ <!-- Line -->
+ <polyline
+ points="4950,3450 3948,2590 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+ <!-- Line -->
+ <polyline
+ points="4050,6600 4050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+ <!-- Line -->
+ <polyline
+ points="1050,6600 1050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,5400 2250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline48" />
+ <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,8100 2250,6364 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline52" />
+ <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+ <!-- Line -->
+ <polyline
+ points="1050,9300 1050,7564 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline56" />
+ <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4050,7564 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5250,6364 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline64" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle80" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle82" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle84" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect86" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect88" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect90" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect92" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="1650"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect94" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect96" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect98" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect100" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect102" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect104" />
+ <!-- Line -->
+ <polygon
+ points="5550,3450 7350,2850 7350,5100 5550,4350 5550,3450 "
+ style="stroke:#000000;stroke-width:14; stroke-linejoin:miter; stroke-linecap:butt; stroke-dasharray:120 120;fill:#ffbfbf; "
+ id="polygon106" />
+ <!-- Line -->
+ <polyline
+ points="9300,3150 10734,3150 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline108" />
+ <!-- Arrowhead on XXXpoint 9300 3150 - 10860 3150-->
+ <!-- Line: box -->
+ <rect
+ x="10800"
+ y="2850"
+ width="1200"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect112" />
+ <!-- Line -->
+ <polyline
+ points="11400,3600 11400,4284 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline114" />
+ <!-- Arrowhead on XXXpoint 11400 3600 - 11400 4410-->
+ <!-- Line: box -->
+ <rect
+ x="10800"
+ y="4350"
+ width="1200"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect118" />
+ <!-- Line -->
+ <polyline
+ points="11400,5100 11400,5784 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline120" />
+ <!-- Arrowhead on XXXpoint 11400 5100 - 11400 5910-->
+ <!-- Line: box -->
+ <rect
+ x="10800"
+ y="5850"
+ width="1200"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect124" />
+ <!-- Line -->
+ <polyline
+ points="9300,3900 9900,3900 9900,4650 10734,4650 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline126" />
+ <!-- Arrowhead on XXXpoint 9900 4650 - 10860 4650-->
+ <!-- Line -->
+ <polyline
+ points="9300,4650 9600,4650 9600,6150 10734,6150 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline130" />
+ <!-- Arrowhead on XXXpoint 9600 6150 - 10860 6150-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text134">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="1950"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text136">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text138">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text152">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text154">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text156">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text158">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text160">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text164">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text166">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text168">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text170">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text172">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text174">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text176">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text178">rcu_sched</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11400"
+ y="3300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="216"
+ text-anchor="middle"
+ id="text180">T3</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11400"
+ y="4800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="216"
+ text-anchor="middle"
+ id="text182">T2</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11400"
+ y="6300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="216"
+ text-anchor="middle"
+ id="text184">T1</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5400 5250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline186" />
+ <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect190" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="2850"
+ width="1950"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect192" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="3600"
+ width="1950"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect194" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="4350"
+ width="1950"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect196" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text198">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text200">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7500"
+ y="3300"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text202">blkd_tasks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7500"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text204">gp_tasks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7500"
+ y="4800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text206">exp_tasks</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
new file mode 100644
index 0000000..abc4cc7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
@@ -0,0 +1,396 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:39:46 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="10.4in"
+ height="10.4in"
+ viewBox="-66 -66 12507 12507"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="nxtlist.fig">
+ <metadata
+ id="metadata94">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs92">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3852"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="925"
+ inkscape:window-height="928"
+ id="namedview90"
+ showgrid="false"
+ inkscape:zoom="0.80021373"
+ inkscape:cx="467.99997"
+ inkscape:cy="467.99997"
+ inkscape:window-x="948"
+ inkscape:window-y="73"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="1125"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="2250"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="3375"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="4500"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="0"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="1125"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect18" />
+ <!-- Line -->
+ <polyline
+ points="11475,2250 11475,3276 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 11475 2250 - 11475 3465-->
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="6750"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect24" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="7875"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect26" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="10125"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="11250"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect30" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="3375"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect32" />
+ <!-- Line -->
+ <polyline
+ points="11475,5625 11475,6651 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 11475 5625 - 11475 6840-->
+ <!-- Line -->
+ <polyline
+ points="7875,225 10476,225 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 7875 225 - 10665 225-->
+ <!-- Line -->
+ <polyline
+ points="7875,1350 9675,1350 9675,675 7971,675 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline42" />
+ <!-- Arrowhead on XXXpoint 9675 675 - 7785 675-->
+ <!-- Line -->
+ <polyline
+ points="7875,2475 9675,2475 9675,4725 10476,4725 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 9675 4725 - 10665 4725-->
+ <!-- Line -->
+ <polyline
+ points="7875,3600 9225,3600 9225,5175 10476,5175 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 9225 5175 - 10665 5175-->
+ <!-- Line -->
+ <polyline
+ points="7875,4725 8775,4725 8775,11475 10476,11475 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 8775 11475 - 10665 11475-->
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="4500"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect58" />
+ <!-- Line -->
+ <polyline
+ points="11475,9000 11475,10026 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 11475 9000 - 11475 10215-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text64">nxtlist</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text66">nxttail[RCU_DONE_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="2925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text68">nxttail[RCU_WAIT_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text70">nxttail[RCU_NEXT_READY_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="5175"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text72">nxttail[RCU_NEXT_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="675"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text74">CB 1</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="1800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text76">next</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="7425"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text78">CB 3</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="8550"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text80">next</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="10800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text82">CB 4</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text84">next</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="4050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text86">CB 2</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="5175"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text88">next</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
deleted file mode 100644
index 7496a55..0000000
--- a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
+++ /dev/null
Binary files differ
diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
deleted file mode 100644
index ebcbeee..0000000
--- a/Documentation/RCU/Design/Requirements/RCUApplicability.svg
+++ /dev/null
@@ -1,237 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5d -->
-
-<!-- CreationDate: Tue Mar 4 18:34:25 2014 -->
-
-<!-- Magnification: 3.000 -->
-
-<svg
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:cc="http://creativecommons.org/ns#"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:svg="http://www.w3.org/2000/svg"
- xmlns="http://www.w3.org/2000/svg"
- xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
- xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
- width="1089.1382"
- height="668.21368"
- viewBox="-2121 -36 14554.634 8876.4061"
- id="svg2"
- version="1.1"
- inkscape:version="0.48.3.1 r9886"
- sodipodi:docname="RCUApplicability.svg">
- <metadata
- id="metadata40">
- <rdf:RDF>
- <cc:Work
- rdf:about="">
- <dc:format>image/svg+xml</dc:format>
- <dc:type
- rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
- <dc:title />
- </cc:Work>
- </rdf:RDF>
- </metadata>
- <defs
- id="defs38" />
- <sodipodi:namedview
- pagecolor="#ffffff"
- bordercolor="#666666"
- borderopacity="1"
- objecttolerance="10"
- gridtolerance="10"
- guidetolerance="10"
- inkscape:pageopacity="0"
- inkscape:pageshadow="2"
- inkscape:window-width="849"
- inkscape:window-height="639"
- id="namedview36"
- showgrid="false"
- inkscape:zoom="0.51326165"
- inkscape:cx="544.56912"
- inkscape:cy="334.10686"
- inkscape:window-x="149"
- inkscape:window-y="448"
- inkscape:window-maximized="0"
- inkscape:current-layer="g4"
- fit-margin-top="5"
- fit-margin-left="5"
- fit-margin-right="5"
- fit-margin-bottom="5" />
- <g
- style="fill:none;stroke-width:0.025in"
- id="g4"
- transform="translate(-2043.6828,14.791398)">
- <!-- Line: box -->
- <rect
- x="0"
- y="0"
- width="14400"
- height="8775"
- rx="0"
- style="fill:#ffa1a1;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect6" />
- <!-- Line: box -->
- <rect
- x="1350"
- y="0"
- width="11700"
- height="6075"
- rx="0"
- style="fill:#ffff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect8" />
- <!-- Line: box -->
- <rect
- x="2700"
- y="0"
- width="9000"
- height="4275"
- rx="0"
- style="fill:#00ff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect10" />
- <!-- Line: box -->
- <rect
- x="4050"
- y="0"
- width="6300"
- height="2475"
- rx="0"
- style="fill:#87cfff;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect12" />
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="900"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text14"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3017">Read-Mostly, Stale &</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="1350"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text16"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3019">Inconsistent Data OK</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="1800"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text18"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3021">(RCU Works Great!!!)</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="3825"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text20"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3023">(RCU Works Well)</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="3375"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text22"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3025">Read-Mostly, Need Consistent Data</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="5175"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text24"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3027">Read-Write, Need Consistent Data</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="6975"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text26"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">Update-Mostly, Need Consistent Data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="5625"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text28"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3029">(RCU Might Be OK...)</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="7875"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text30"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">(1) Provide Existence Guarantees For Update-Friendly Mechanisms</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="8325"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text32"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">(2) Provide Wait-Free Read-Side Primitives for Real-Time Use)</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="7425"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text34"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">(RCU is Very Unlikely to be the Right Tool For The Job, But it Can:</text>
- </g>
-</svg>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index a725f99..e7e24b3 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -1,5 +1,3 @@
-<!-- DO NOT HAND EDIT. -->
-<!-- Instead, edit Documentation/RCU/Design/Requirements/Requirements.htmlx and run 'sh htmlqqz.sh Documentation/RCU/Design/Requirements/Requirements' -->
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
@@ -65,8 +63,8 @@
<p>
This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+however, the answers to each quick quiz immediately follows the quiz.
+Select the big white space with your mouse to see the answer.
<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
@@ -153,13 +151,27 @@
</blockquote>
cannot happen.
-<p><a name="Quick Quiz 1"><b>Quick Quiz 1</b>:</a>
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<br><a href="#qq1answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Wait a minute!
+ You said that updaters can make useful forward progress concurrently
+ with readers, but pre-existing readers will block
+ <tt>synchronize_rcu()</tt>!!!
+ Just who are you trying to fool???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ First, if updaters do not wish to be blocked by readers, they can use
+ <tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+ be discussed later.
+ Second, even when using <tt>synchronize_rcu()</tt>, the other
+ update-side code does run concurrently with readers, whether
+ pre-existing or not.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
This scenario resembles one of the first uses of RCU in
@@ -210,9 +222,20 @@
with <tt>recovery()</tt>, but with little or no synchronization
overhead in <tt>do_something_dlm()</tt>.
-<p><a name="Quick Quiz 2"><b>Quick Quiz 2</b>:</a>
-Why is the <tt>synchronize_rcu()</tt> on line 28 needed?
-<br><a href="#qq2answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why is the <tt>synchronize_rcu()</tt> on line 28 needed?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Without that extra grace period, memory reordering could result in
+ <tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+ concurrently with the last bits of <tt>recovery()</tt>.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
In order to avoid fatal problems such as deadlocks,
@@ -332,12 +355,27 @@
optimizations, for example, the use of <tt>gp</tt> as a scratch
location immediately preceding the assignment.
-<p><a name="Quick Quiz 3"><b>Quick Quiz 3</b>:</a>
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p->a</tt> and <tt>p->b</tt>
-from being reordered.
-Can't that also cause problems?
-<br><a href="#qq3answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+ two assignments to <tt>p->a</tt> and <tt>p->b</tt>
+ from being reordered.
+ Can't that also cause problems?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ No, it cannot.
+ The readers cannot see either of these two fields until
+ the assignment to <tt>gp</tt>, by which time both fields are
+ fully initialized.
+ So reordering the assignments
+ to <tt>p->a</tt> and <tt>p->b</tt> cannot possibly
+ cause any problems.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
It is tempting to assume that the reader need not do anything special
@@ -494,11 +532,42 @@
code protected by the corresponding update-side lock.
</ol>
-<p><a name="Quick Quiz 4"><b>Quick Quiz 4</b>:</a>
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<br><a href="#qq4answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Without the <tt>rcu_dereference()</tt> or the
+ <tt>rcu_access_pointer()</tt>, what destructive optimizations
+ might the compiler make use of?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Let's start with what happens to <tt>do_something_gp()</tt>
+ if it fails to use <tt>rcu_dereference()</tt>.
+ It could reuse a value formerly fetched from this same pointer.
+ It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+ manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+ mash-up of two distince pointer values.
+ It might even use value-speculation optimizations, where it makes
+ a wrong guess, but by the time it gets around to checking the
+ value, an update has changed the pointer to match the wrong guess.
+ Too bad about any dereferences that returned pre-initialization garbage
+ in the meantime!
+ </font>
+
+ <p><font color="ffffff">
+ For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+ to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+ the above optimizations are harmless.
+ However,
+ with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+ <tt>sparse</tt> will complain if you
+ define <tt>gp</tt> with <tt>__rcu</tt> and then
+ access it without using
+ either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
In short, RCU's publish-subscribe guarantee is provided by the combination
@@ -571,17 +640,156 @@
<tt>synchronize_rcu()</tt> migrates in the meantime.
</ol>
-<p><a name="Quick Quiz 5"><b>Quick Quiz 5</b>:</a>
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<br><a href="#qq5answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Given that multiple CPUs can start RCU read-side critical sections
+ at any time without any ordering whatsoever, how can RCU possibly
+ tell whether or not a given RCU read-side critical section starts
+ before a given instance of <tt>synchronize_rcu()</tt>?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ If RCU cannot tell whether or not a given
+ RCU read-side critical section starts before a
+ given instance of <tt>synchronize_rcu()</tt>,
+ then it must assume that the RCU read-side critical section
+ started first.
+ In other words, a given instance of <tt>synchronize_rcu()</tt>
+ can avoid waiting on a given RCU read-side critical section only
+ if it can prove that <tt>synchronize_rcu()</tt> started first.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
-<p><a name="Quick Quiz 6"><b>Quick Quiz 6</b>:</a>
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<br><a href="#qq6answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ The first and second guarantees require unbelievably strict ordering!
+ Are all these memory barriers <i> really</i> required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Yes, they really are required.
+ To see why the first guarantee is required, consider the following
+ sequence of events:
+ </font>
+
+ <ol>
+ <li> <font color="ffffff">
+ CPU 1: <tt>rcu_read_lock()</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>q = rcu_dereference(gp);
+ /* Very likely to return p. */</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>list_del_rcu(p);</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>synchronize_rcu()</tt> starts.
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>do_something_with(q->a);
+ /* No smp_mb(), so might happen after kfree(). */</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>rcu_read_unlock()</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>synchronize_rcu()</tt> returns.
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>kfree(p);</tt>
+ </font>
+ </ol>
+
+ <p><font color="ffffff">
+ Therefore, there absolutely must be a full memory barrier between the
+ end of the RCU read-side critical section and the end of the
+ grace period.
+ </font>
+
+ <p><font color="ffffff">
+ The sequence of events demonstrating the necessity of the second rule
+ is roughly similar:
+ </font>
+
+ <ol>
+ <li> <font color="ffffff">CPU 0: <tt>list_del_rcu(p);</tt>
+ </font>
+ <li> <font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> starts.
+ </font>
+ <li> <font color="ffffff">CPU 1: <tt>rcu_read_lock()</tt>
+ </font>
+ <li> <font color="ffffff">CPU 1: <tt>q = rcu_dereference(gp);
+ /* Might return p if no memory barrier. */</tt>
+ </font>
+ <li> <font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> returns.
+ </font>
+ <li> <font color="ffffff">CPU 0: <tt>kfree(p);</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>do_something_with(q->a); /* Boom!!! */</tt>
+ </font>
+ <li> <font color="ffffff">CPU 1: <tt>rcu_read_unlock()</tt>
+ </font>
+ </ol>
+
+ <p><font color="ffffff">
+ And similarly, without a memory barrier between the beginning of the
+ grace period and the beginning of the RCU read-side critical section,
+ CPU 1 might end up accessing the freelist.
+ </font>
+
+ <p><font color="ffffff">
+ The “as if” rule of course applies, so that any
+ implementation that acts as if the appropriate memory barriers
+ were in place is a correct implementation.
+ That said, it is much easier to fool yourself into believing
+ that you have adhered to the as-if rule than it is to actually
+ adhere to it!
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ You claim that <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+ generate absolutely no code in some kernel builds.
+ This means that the compiler might arbitrarily rearrange consecutive
+ RCU read-side critical sections.
+ Given such rearrangement, if a given RCU read-side critical section
+ is done, how can you be sure that all prior RCU read-side critical
+ sections are done?
+ Won't the compiler rearrangements make that impossible to determine?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In cases where <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+ generate absolutely no code, RCU infers quiescent states only at
+ special locations, for example, within the scheduler.
+ Because calls to <tt>schedule()</tt> had better prevent calling-code
+ accesses to shared variables from being rearranged across the call to
+ <tt>schedule()</tt>, if RCU detects the end of a given RCU read-side
+ critical section, it will necessarily detect the end of all prior
+ RCU read-side critical sections, no matter how aggressively the
+ compiler scrambles the code.
+ </font>
+
+ <p><font color="ffffff">
+ Again, this all assumes that the compiler cannot scramble code across
+ calls to the scheduler, out of interrupt handlers, into the idle loop,
+ into user-mode code, and so on.
+ But if your kernel build allows that sort of scrambling, you have broken
+ far more than just RCU!
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
Note that these memory-barrier requirements do not replace the fundamental
@@ -626,9 +834,19 @@
<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
described later in this document.
-<p><a name="Quick Quiz 7"><b>Quick Quiz 7</b>:</a>
-But how does the upgrade-to-write operation exclude other readers?
-<br><a href="#qq7answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ But how does the upgrade-to-write operation exclude other readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ It doesn't, just like normal RCU updates, which also do not exclude
+ RCU readers.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
This guarantee allows lookup code to be shared between read-side
@@ -714,9 +932,20 @@
This is by design: Any significant ordering constraints would slow down
these fast-path APIs.
-<p><a name="Quick Quiz 8"><b>Quick Quiz 8</b>:</a>
-Can't the compiler also reorder this code?
-<br><a href="#qq8answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Can't the compiler also reorder this code?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ No, the volatile casts in <tt>READ_ONCE()</tt> and
+ <tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+ this particular case.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
@@ -769,10 +998,28 @@
starts, and <tt>synchronize_rcu()</tt> is under no
obligation to wait for these new readers.
-<p><a name="Quick Quiz 9"><b>Quick Quiz 9</b>:</a>
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<br><a href="#qq9answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Suppose that synchronize_rcu() did wait until <i>all</i>
+ readers had completed instead of waiting only on
+ pre-existing readers.
+ For how long would the updater be able to rely on there
+ being no readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ For no time at all.
+ Even if <tt>synchronize_rcu()</tt> were to wait until
+ all readers had completed, a new reader might start immediately after
+ <tt>synchronize_rcu()</tt> completed.
+ Therefore, the code following
+ <tt>synchronize_rcu()</tt> can <i>never</i> rely on there being
+ no readers.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
@@ -969,11 +1216,24 @@
As a result, an RCU read-side critical section cannot partition a pair
of RCU grace periods.
-<p><a name="Quick Quiz 10"><b>Quick Quiz 10</b>:</a>
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<br><a href="#qq10answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ How long a sequence of grace periods, each separated by an RCU
+ read-side critical section, would be required to partition the RCU
+ read-side critical sections at the beginning and end of the chain?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In theory, an infinite number.
+ In practice, an unknown number that is sensitive to both implementation
+ details and timing considerations.
+ Therefore, even in practice, RCU users must abide by the
+ theoretical rather than the practical answer.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<h3><a name="Disabling Preemption Does Not Block Grace Periods">
Disabling Preemption Does Not Block Grace Periods</a></h3>
@@ -1109,12 +1369,27 @@
<h3><a name="Specialization">Specialization</a></h3>
<p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
+RCU is and always has been intended primarily for read-mostly situations,
+which means that RCU's read-side primitives are optimized, often at the
expense of its update-side primitives.
+Experience thus far is captured by the following list of situations:
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+<ol>
+<li> Read-mostly data, where stale and inconsistent data is not
+ a problem: RCU works great!
+<li> Read-mostly data, where data must be consistent:
+ RCU works well.
+<li> Read-write data, where data must be consistent:
+ RCU <i>might</i> work OK.
+ Or not.
+<li> Write-mostly data, where data must be consistent:
+ RCU is very unlikely to be the right tool for the job,
+ with the following exceptions, where RCU can provide:
+ <ol type=a>
+ <li> Existence guarantees for update-friendly mechanisms.
+ <li> Wait-free read-side primitives for real-time use.
+ </ol>
+</ol>
<p>
This focus on read-mostly situations means that RCU must interoperate
@@ -1127,9 +1402,43 @@
including spinlocks, sequence locks, atomic operations, reference
counters, and memory barriers.
-<p><a name="Quick Quiz 11"><b>Quick Quiz 11</b>:</a>
-What about sleeping locks?
-<br><a href="#qq11answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ What about sleeping locks?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ These are forbidden within Linux-kernel RCU read-side critical
+ sections because it is not legal to place a quiescent state
+ (in this case, voluntary context switch) within an RCU read-side
+ critical section.
+ However, sleeping locks may be used within userspace RCU read-side
+ critical sections, and also within Linux-kernel sleepable RCU
+ <a href="#Sleepable RCU"><font color="ffffff">(SRCU)</font></a>
+ read-side critical sections.
+ In addition, the -rt patchset turns spinlocks into a
+ sleeping locks so that the corresponding critical sections
+ can be preempted, which also means that these sleeplockified
+ spinlocks (but not other sleeping locks!) may be acquire within
+ -rt-Linux-kernel RCU read-side critical sections.
+ </font>
+
+ <p><font color="ffffff">
+ Note that it <i>is</i> legal for a normal RCU read-side
+ critical section to conditionally acquire a sleeping locks
+ (as in <tt>mutex_trylock()</tt>), but only as long as it does
+ not loop indefinitely attempting to conditionally acquire that
+ sleeping locks.
+ The key point is that things like <tt>mutex_trylock()</tt>
+ either return with the mutex held, or return an error indication if
+ the mutex was not immediately available.
+ Either way, <tt>mutex_trylock()</tt> returns immediately without
+ sleeping.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
It often comes as a surprise that many algorithms do not require a
@@ -1160,10 +1469,7 @@
One of our pair of veternarians might wait 30 seconds before pronouncing
the cat dead, while the other might insist on waiting a full minute.
The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+the final 30 seconds of the minute following the last heartbeat.
<p>
Interestingly enough, this same situation applies to hardware.
@@ -1343,7 +1649,8 @@
<tt>synchronize_rcu_expedited()</tt> would be legal,
including within preempt-disable code, <tt>local_bh_disable()</tt> code,
interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers
+and from idle and offline CPUs.
The callback function (<tt>remove_gp_cb()</tt> in this case) will be
executed within softirq (software interrupt) environment within the
Linux kernel,
@@ -1354,12 +1661,27 @@
Long-running operations should be relegated to separate threads or
(in the Linux kernel) workqueues.
-<p><a name="Quick Quiz 12"><b>Quick Quiz 12</b>:</a>
-Why does line 19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line 25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<br><a href="#qq12answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why does line 19 use <tt>rcu_access_pointer()</tt>?
+ After all, <tt>call_rcu()</tt> on line 25 stores into the
+ structure, which would interact badly with concurrent insertions.
+ Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Presumably the <tt>->gp_lock</tt> acquired on line 18 excludes
+ any changes, including any insertions that <tt>rcu_dereference()</tt>
+ would protect against.
+ Therefore, any insertions will be delayed until after
+ <tt>->gp_lock</tt>
+ is released on line 25, which in turn means that
+ <tt>rcu_access_pointer()</tt> suffices.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
However, all that <tt>remove_gp_cb()</tt> is doing is
@@ -1406,14 +1728,31 @@
so the very few places that needed something like
<tt>synchronize_rcu()</tt> simply open-coded it.
-<p><a name="Quick Quiz 13"><b>Quick Quiz 13</b>:</a>
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<br><a href="#qq13answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Earlier it was claimed that <tt>call_rcu()</tt> and
+ <tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+ by readers.
+ But how can that be correct, given that the invocation of the callback
+ and the freeing of the memory (respectively) must still wait for
+ a grace period to elapse?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ We could define things this way, but keep in mind that this sort of
+ definition would say that updates in garbage-collected languages
+ cannot complete until the next time the garbage collector runs,
+ which does not seem at all reasonable.
+ The key point is that in most cases, an updater using either
+ <tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+ next update as soon as it has invoked <tt>call_rcu()</tt> or
+ <tt>kfree_rcu()</tt>, without having to wait for a subsequent
+ grace period.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
But what if the updater must wait for the completion of code to be
@@ -1838,11 +2177,26 @@
Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
initialization can result in deadlock.
-<p><a name="Quick Quiz 14"><b>Quick Quiz 14</b>:</a>
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<br><a href="#qq14answer">Answer</a>
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ So what happens with <tt>synchronize_rcu()</tt> during
+ scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+ kernels?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+ maps directly to <tt>synchronize_sched()</tt>.
+ Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+ boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+ However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+ so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+ during scheduler initialization.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
<p>
I learned of these boot-time requirements as a result of a series of
@@ -2171,6 +2525,14 @@
also simplified handling of a number of race conditions.
<p>
+RCU must avoid degrading real-time response for CPU-bound threads, whether
+executing in usermode (which is one use case for
+<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
+That said, CPU-bound loops in the kernel must execute
+<tt>cond_resched_rcu_qs()</tt> at least once per few tens of milliseconds
+in order to avoid receiving an IPI from RCU.
+
+<p>
Finally, RCU's status as a synchronization primitive means that
any RCU failure can result in arbitrary memory corruption that can be
extremely difficult to debug.
@@ -2223,6 +2585,8 @@
<li> <a href="#Sched Flavor">Sched Flavor</a>
<li> <a href="#Sleepable RCU">Sleepable RCU</a>
<li> <a href="#Tasks RCU">Tasks RCU</a>
+<li> <a href="#Waiting for Multiple Grace Periods">
+ Waiting for Multiple Grace Periods</a>
</ol>
<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
@@ -2472,6 +2836,94 @@
<tt>synchronize_rcu_tasks()</tt>, and
<tt>rcu_barrier_tasks()</tt>.
+<h3><a name="Waiting for Multiple Grace Periods">
+Waiting for Multiple Grace Periods</a></h3>
+
+<p>
+Perhaps you have an RCU protected data structure that is accessed from
+RCU read-side critical sections, from softirq handlers, and from
+hardware interrupt handlers.
+That is three flavors of RCU, the normal flavor, the bottom-half flavor,
+and the sched flavor.
+How to wait for a compound grace period?
+
+<p>
+The best approach is usually to “just say no!” and
+insert <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+around each RCU read-side critical section, regardless of what
+environment it happens to be in.
+But suppose that some of the RCU read-side critical sections are
+on extremely hot code paths, and that use of <tt>CONFIG_PREEMPT=n</tt>
+is not a viable option, so that <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are not free.
+What then?
+
+<p>
+You <i>could</i> wait on all three grace periods in succession, as follows:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu();
+ 2 synchronize_rcu_bh();
+ 3 synchronize_sched();
+</pre>
+</blockquote>
+
+<p>
+This works, but triples the update-side latency penalty.
+In cases where this is not acceptable, <tt>synchronize_rcu_mult()</tt>
+may be used to wait on all three flavors of grace period concurrently:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched);
+</pre>
+</blockquote>
+
+<p>
+But what if it is necessary to also wait on SRCU?
+This can be done as follows:
+
+<blockquote>
+<pre>
+ 1 static void call_my_srcu(struct rcu_head *head,
+ 2 void (*func)(struct rcu_head *head))
+ 3 {
+ 4 call_srcu(&my_srcu, head, func);
+ 5 }
+ 6
+ 7 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched, call_my_srcu);
+</pre>
+</blockquote>
+
+<p>
+If you needed to wait on multiple different flavors of SRCU
+(but why???), you would need to create a wrapper function resembling
+<tt>call_my_srcu()</tt> for each SRCU flavor.
+
+<table>
+<tr><th> </th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ But what if I need to wait for multiple RCU flavors, but I also need
+ the grace periods to be expedited?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ If you are using expedited grace periods, there should be less penalty
+ for waiting on them in succession.
+ But if that is nevertheless a problem, you can use workqueues
+ or multiple kthreads to wait on the various expedited grace
+ periods concurrently.
+</font></td></tr>
+<tr><td> </td></tr>
+</table>
+
+<p>
+Again, it is usually better to adjust the RCU read-side critical sections
+to use a single flavor of RCU, but when this is not feasible, you can use
+<tt>synchronize_rcu_mult()</tt>.
+
<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
<p>
@@ -2569,329 +3021,4 @@
under the terms of the Creative Commons Attribution-Share Alike 3.0
United States license.
-<h3><a name="Answers to Quick Quizzes">
-Answers to Quick Quizzes</a></h3>
-
-<a name="qq1answer"></a>
-<p><b>Quick Quiz 1</b>:
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-
-
-</p><p><b>Answer</b>:
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-
-
-</p><p><a href="#Quick%20Quiz%201"><b>Back to Quick Quiz 1</b>.</a>
-
-<a name="qq2answer"></a>
-<p><b>Quick Quiz 2</b>:
-Why is the <tt>synchronize_rcu()</tt> on line 28 needed?
-
-
-</p><p><b>Answer</b>:
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%202"><b>Back to Quick Quiz 2</b>.</a>
-
-<a name="qq3answer"></a>
-<p><b>Quick Quiz 3</b>:
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p->a</tt> and <tt>p->b</tt>
-from being reordered.
-Can't that also cause problems?
-
-
-</p><p><b>Answer</b>:
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p->a</tt> and <tt>p->b</tt> cannot possibly
-cause any problems.
-
-
-</p><p><a href="#Quick%20Quiz%203"><b>Back to Quick Quiz 3</b>.</a>
-
-<a name="qq4answer"></a>
-<p><b>Quick Quiz 4</b>:
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-
-
-</p><p><b>Answer</b>:
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%204"><b>Back to Quick Quiz 4</b>.</a>
-
-<a name="qq5answer"></a>
-<p><b>Quick Quiz 5</b>:
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-
-
-</p><p><b>Answer</b>:
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-
-
-</p><p><a href="#Quick%20Quiz%205"><b>Back to Quick Quiz 5</b>.</a>
-
-<a name="qq6answer"></a>
-<p><b>Quick Quiz 6</b>:
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-
-
-</p><p><b>Answer</b>:
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Very likely to return p. */</tt>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>do_something_with(q->a);
- /* No smp_mb(), so might happen after kfree(). */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Might return p if no memory barrier. */</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-<li> CPU 1: <tt>do_something_with(q->a); /* Boom!!! */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU 1 might end up accessing the freelist.
-
-<p>
-The “as if” rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-
-
-</p><p><a href="#Quick%20Quiz%206"><b>Back to Quick Quiz 6</b>.</a>
-
-<a name="qq7answer"></a>
-<p><b>Quick Quiz 7</b>:
-But how does the upgrade-to-write operation exclude other readers?
-
-
-</p><p><b>Answer</b>:
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-
-
-</p><p><a href="#Quick%20Quiz%207"><b>Back to Quick Quiz 7</b>.</a>
-
-<a name="qq8answer"></a>
-<p><b>Quick Quiz 8</b>:
-Can't the compiler also reorder this code?
-
-
-</p><p><b>Answer</b>:
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-
-
-</p><p><a href="#Quick%20Quiz%208"><b>Back to Quick Quiz 8</b>.</a>
-
-<a name="qq9answer"></a>
-<p><b>Quick Quiz 9</b>:
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-
-
-</p><p><b>Answer</b>:
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-
-
-</p><p><a href="#Quick%20Quiz%209"><b>Back to Quick Quiz 9</b>.</a>
-
-<a name="qq10answer"></a>
-<p><b>Quick Quiz 10</b>:
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-
-
-</p><p><b>Answer</b>:
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-
-
-</p><p><a href="#Quick%20Quiz%2010"><b>Back to Quick Quiz 10</b>.</a>
-
-<a name="qq11answer"></a>
-<p><b>Quick Quiz 11</b>:
-What about sleeping locks?
-
-
-</p><p><b>Answer</b>:
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-
-
-</p><p><a href="#Quick%20Quiz%2011"><b>Back to Quick Quiz 11</b>.</a>
-
-<a name="qq12answer"></a>
-<p><b>Quick Quiz 12</b>:
-Why does line 19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line 25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-
-
-</p><p><b>Answer</b>:
-Presumably the <tt>->gp_lock</tt> acquired on line 18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>->gp_lock</tt>
-is released on line 25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-
-
-</p><p><a href="#Quick%20Quiz%2012"><b>Back to Quick Quiz 12</b>.</a>
-
-<a name="qq13answer"></a>
-<p><b>Quick Quiz 13</b>:
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-
-
-</p><p><b>Answer</b>:
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-
-
-</p><p><a href="#Quick%20Quiz%2013"><b>Back to Quick Quiz 13</b>.</a>
-
-<a name="qq14answer"></a>
-<p><b>Quick Quiz 14</b>:
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-
-
-</p><p><b>Answer</b>:
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-
-
-</p><p><a href="#Quick%20Quiz%2014"><b>Back to Quick Quiz 14</b>.</a>
-
-
</body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx
deleted file mode 100644
index 3a97ba4..0000000
--- a/Documentation/RCU/Design/Requirements/Requirements.htmlx
+++ /dev/null
@@ -1,2741 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-
-<h1>A Tour Through RCU's Requirements</h1>
-
-<p>Copyright IBM Corporation, 2015</p>
-<p>Author: Paul E. McKenney</p>
-<p><i>The initial version of this document appeared in the
-<a href="https://lwn.net/">LWN</a> articles
-<a href="https://lwn.net/Articles/652156/">here</a>,
-<a href="https://lwn.net/Articles/652677/">here</a>, and
-<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
-
-<h2>Introduction</h2>
-
-<p>
-Read-copy update (RCU) is a synchronization mechanism that is often
-used as a replacement for reader-writer locking.
-RCU is unusual in that updaters do not block readers,
-which means that RCU's read-side primitives can be exceedingly fast
-and scalable.
-In addition, updaters can make useful forward progress concurrently
-with readers.
-However, all this concurrency between RCU readers and updaters does raise
-the question of exactly what RCU readers are doing, which in turn
-raises the question of exactly what RCU's requirements are.
-
-<p>
-This document therefore summarizes RCU's requirements, and can be thought
-of as an informal, high-level specification for RCU.
-It is important to understand that RCU's specification is primarily
-empirical in nature;
-in fact, I learned about many of these requirements the hard way.
-This situation might cause some consternation, however, not only
-has this learning process been a lot of fun, but it has also been
-a great privilege to work with so many people willing to apply
-technologies in interesting new ways.
-
-<p>
-All that aside, here are the categories of currently known RCU requirements:
-</p>
-
-<ol>
-<li> <a href="#Fundamental Requirements">
- Fundamental Requirements</a>
-<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
-<li> <a href="#Parallelism Facts of Life">
- Parallelism Facts of Life</a>
-<li> <a href="#Quality-of-Implementation Requirements">
- Quality-of-Implementation Requirements</a>
-<li> <a href="#Linux Kernel Complications">
- Linux Kernel Complications</a>
-<li> <a href="#Software-Engineering Requirements">
- Software-Engineering Requirements</a>
-<li> <a href="#Other RCU Flavors">
- Other RCU Flavors</a>
-<li> <a href="#Possible Future Changes">
- Possible Future Changes</a>
-</ol>
-
-<p>
-This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
-
-<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
-
-<p>
-RCU's fundamental requirements are the closest thing RCU has to hard
-mathematical requirements.
-These are:
-
-<ol>
-<li> <a href="#Grace-Period Guarantee">
- Grace-Period Guarantee</a>
-<li> <a href="#Publish-Subscribe Guarantee">
- Publish-Subscribe Guarantee</a>
-<li> <a href="#Memory-Barrier Guarantees">
- Memory-Barrier Guarantees</a>
-<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
- RCU Primitives Guaranteed to Execute Unconditionally</a>
-<li> <a href="#Guaranteed Read-to-Write Upgrade">
- Guaranteed Read-to-Write Upgrade</a>
-</ol>
-
-<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
-
-<p>
-RCU's grace-period guarantee is unusual in being premeditated:
-Jack Slingwine and I had this guarantee firmly in mind when we started
-work on RCU (then called “rclock”) in the early 1990s.
-That said, the past two decades of experience with RCU have produced
-a much more detailed understanding of this guarantee.
-
-<p>
-RCU's grace-period guarantee allows updaters to wait for the completion
-of all pre-existing RCU read-side critical sections.
-An RCU read-side critical section
-begins with the marker <tt>rcu_read_lock()</tt> and ends with
-the marker <tt>rcu_read_unlock()</tt>.
-These markers may be nested, and RCU treats a nested set as one
-big RCU read-side critical section.
-Production-quality implementations of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
-fact have exactly zero overhead in Linux kernels built for production
-use with <tt>CONFIG_PREEMPT=n</tt>.
-
-<p>
-This guarantee allows ordering to be enforced with extremely low
-overhead to readers, for example:
-
-<blockquote>
-<pre>
- 1 int x, y;
- 2
- 3 void thread0(void)
- 4 {
- 5 rcu_read_lock();
- 6 r1 = READ_ONCE(x);
- 7 r2 = READ_ONCE(y);
- 8 rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13 WRITE_ONCE(x, 1);
-14 synchronize_rcu();
-15 WRITE_ONCE(y, 1);
-16 }
-</pre>
-</blockquote>
-
-<p>
-Because the <tt>synchronize_rcu()</tt> on line 14 waits for
-all pre-existing readers, any instance of <tt>thread0()</tt> that
-loads a value of zero from <tt>x</tt> must complete before
-<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
-also load a value of zero from <tt>y</tt>.
-Similarly, any instance of <tt>thread0()</tt> that loads a value of
-one from <tt>y</tt> must have started after the
-<tt>synchronize_rcu()</tt> started, and must therefore also load
-a value of one from <tt>x</tt>.
-Therefore, the outcome:
-<blockquote>
-<pre>
-(r1 == 0 && r2 == 1)
-</pre>
-</blockquote>
-cannot happen.
-
-<p>@@QQ@@
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<p>@@QQA@@
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-<p>@@QQE@@
-
-<p>
-This scenario resembles one of the first uses of RCU in
-<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
-which managed a distributed lock manager's transition into
-a state suitable for handling recovery from node failure,
-more or less as follows:
-
-<blockquote>
-<pre>
- 1 #define STATE_NORMAL 0
- 2 #define STATE_WANT_RECOVERY 1
- 3 #define STATE_RECOVERING 2
- 4 #define STATE_WANT_NORMAL 3
- 5
- 6 int state = STATE_NORMAL;
- 7
- 8 void do_something_dlm(void)
- 9 {
-10 int state_snap;
-11
-12 rcu_read_lock();
-13 state_snap = READ_ONCE(state);
-14 if (state_snap == STATE_NORMAL)
-15 do_something();
-16 else
-17 do_something_carefully();
-18 rcu_read_unlock();
-19 }
-20
-21 void start_recovery(void)
-22 {
-23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
-24 synchronize_rcu();
-25 WRITE_ONCE(state, STATE_RECOVERING);
-26 recovery();
-27 WRITE_ONCE(state, STATE_WANT_NORMAL);
-28 synchronize_rcu();
-29 WRITE_ONCE(state, STATE_NORMAL);
-30 }
-</pre>
-</blockquote>
-
-<p>
-The RCU read-side critical section in <tt>do_something_dlm()</tt>
-works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
-to guarantee that <tt>do_something()</tt> never runs concurrently
-with <tt>recovery()</tt>, but with little or no synchronization
-overhead in <tt>do_something_dlm()</tt>.
-
-<p>@@QQ@@
-Why is the <tt>synchronize_rcu()</tt> on line 28 needed?
-<p>@@QQA@@
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-<p>@@QQE@@
-
-<p>
-In order to avoid fatal problems such as deadlocks,
-an RCU read-side critical section must not contain calls to
-<tt>synchronize_rcu()</tt>.
-Similarly, an RCU read-side critical section must not
-contain anything that waits, directly or indirectly, on completion of
-an invocation of <tt>synchronize_rcu()</tt>.
-
-<p>
-Although RCU's grace-period guarantee is useful in and of itself, with
-<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
-it would be good to be able to use RCU to coordinate read-side
-access to linked data structures.
-For this, the grace-period guarantee is not sufficient, as can
-be seen in function <tt>add_gp_buggy()</tt> below.
-We will look at the reader's code later, but in the meantime, just think of
-the reader as locklessly picking up the <tt>gp</tt> pointer,
-and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
-<tt>->a</tt> and <tt>->b</tt> fields.
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&gp_lock);
- 9 return false;
-10 }
-11 p->a = a;
-12 p->b = a;
-13 gp = p; /* ORDERING BUG */
-14 spin_unlock(&gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The problem is that both the compiler and weakly ordered CPUs are within
-their rights to reorder this code as follows:
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy_optimized(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&gp_lock);
- 9 return false;
-10 }
-<b>11 gp = p; /* ORDERING BUG */
-12 p->a = a;
-13 p->b = a;</b>
-14 spin_unlock(&gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-If an RCU reader fetches <tt>gp</tt> just after
-<tt>add_gp_buggy_optimized</tt> executes line 11,
-it will see garbage in the <tt>->a</tt> and <tt>->b</tt>
-fields.
-And this is but one of many ways in which compiler and hardware optimizations
-could cause trouble.
-Therefore, we clearly need some way to prevent the compiler and the CPU from
-reordering in this manner, which brings us to the publish-subscribe
-guarantee discussed in the next section.
-
-<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
-
-<p>
-RCU's publish-subscribe guarantee allows data to be inserted
-into a linked data structure without disrupting RCU readers.
-The updater uses <tt>rcu_assign_pointer()</tt> to insert the
-new data, and readers use <tt>rcu_dereference()</tt> to
-access data, whether new or old.
-The following shows an example of insertion:
-
-<blockquote>
-<pre>
- 1 bool add_gp(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&gp_lock);
- 9 return false;
-10 }
-11 p->a = a;
-12 p->b = a;
-13 rcu_assign_pointer(gp, p);
-14 spin_unlock(&gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_assign_pointer()</tt> on line 13 is conceptually
-equivalent to a simple assignment statement, but also guarantees
-that its assignment will
-happen after the two assignments in lines 11 and 12,
-similar to the C11 <tt>memory_order_release</tt> store operation.
-It also prevents any number of “interesting” compiler
-optimizations, for example, the use of <tt>gp</tt> as a scratch
-location immediately preceding the assignment.
-
-<p>@@QQ@@
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p->a</tt> and <tt>p->b</tt>
-from being reordered.
-Can't that also cause problems?
-<p>@@QQA@@
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p->a</tt> and <tt>p->b</tt> cannot possibly
-cause any problems.
-<p>@@QQE@@
-
-<p>
-It is tempting to assume that the reader need not do anything special
-to control its accesses to the RCU-protected data,
-as shown in <tt>do_something_gp_buggy()</tt> below:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy(void)
- 2 {
- 3 rcu_read_lock();
- 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
- 5 if (p) {
- 6 do_something(p->a, p->b);
- 7 rcu_read_unlock();
- 8 return true;
- 9 }
-10 rcu_read_unlock();
-11 return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-However, this temptation must be resisted because there are a
-surprisingly large number of ways that the compiler
-(to say nothing of
-<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
-can trip this code up.
-For but one example, if the compiler were short of registers, it
-might choose to refetch from <tt>gp</tt> rather than keeping
-a separate copy in <tt>p</tt> as follows:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy_optimized(void)
- 2 {
- 3 rcu_read_lock();
- 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
-<b> 5 do_something(gp->a, gp->b);</b>
- 6 rcu_read_unlock();
- 7 return true;
- 8 }
- 9 rcu_read_unlock();
-10 return false;
-11 }
-</pre>
-</blockquote>
-
-<p>
-If this function ran concurrently with a series of updates that
-replaced the current structure with a new one,
-the fetches of <tt>gp->a</tt>
-and <tt>gp->b</tt> might well come from two different structures,
-which could cause serious confusion.
-To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
-<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp(void)
- 2 {
- 3 rcu_read_lock();
- 4 p = rcu_dereference(gp);
- 5 if (p) {
- 6 do_something(p->a, p->b);
- 7 rcu_read_unlock();
- 8 return true;
- 9 }
-10 rcu_read_unlock();
-11 return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
-memory barriers in the Linux kernel.
-Should a
-<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
-ever appear, then <tt>rcu_dereference()</tt> could be implemented
-as a <tt>memory_order_consume</tt> load.
-Regardless of the exact implementation, a pointer fetched by
-<tt>rcu_dereference()</tt> may not be used outside of the
-outermost RCU read-side critical section containing that
-<tt>rcu_dereference()</tt>, unless protection of
-the corresponding data element has been passed from RCU to some
-other synchronization mechanism, most commonly locking or
-<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
-
-<p>
-In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
-use <tt>rcu_dereference()</tt>, and these two RCU API elements
-work together to ensure that readers have a consistent view of
-newly added data elements.
-
-<p>
-Of course, it is also necessary to remove elements from RCU-protected
-data structures, for example, using the following process:
-
-<ol>
-<li> Remove the data element from the enclosing structure.
-<li> Wait for all pre-existing RCU read-side critical sections
- to complete (because only pre-existing readers can possibly have
- a reference to the newly removed data element).
-<li> At this point, only the updater has a reference to the
- newly removed data element, so it can safely reclaim
- the data element, for example, by passing it to <tt>kfree()</tt>.
-</ol>
-
-This process is implemented by <tt>remove_gp_synchronous()</tt>:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_synchronous(void)
- 2 {
- 3 struct foo *p;
- 4
- 5 spin_lock(&gp_lock);
- 6 p = rcu_access_pointer(gp);
- 7 if (!p) {
- 8 spin_unlock(&gp_lock);
- 9 return false;
-10 }
-11 rcu_assign_pointer(gp, NULL);
-12 spin_unlock(&gp_lock);
-13 synchronize_rcu();
-14 kfree(p);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-This function is straightforward, with line 13 waiting for a grace
-period before line 14 frees the old data element.
-This waiting ensures that readers will reach line 7 of
-<tt>do_something_gp()</tt> before the data element referenced by
-<tt>p</tt> is freed.
-The <tt>rcu_access_pointer()</tt> on line 6 is similar to
-<tt>rcu_dereference()</tt>, except that:
-
-<ol>
-<li> The value returned by <tt>rcu_access_pointer()</tt>
- cannot be dereferenced.
- If you want to access the value pointed to as well as
- the pointer itself, use <tt>rcu_dereference()</tt>
- instead of <tt>rcu_access_pointer()</tt>.
-<li> The call to <tt>rcu_access_pointer()</tt> need not be
- protected.
- In contrast, <tt>rcu_dereference()</tt> must either be
- within an RCU read-side critical section or in a code
- segment where the pointer cannot change, for example, in
- code protected by the corresponding update-side lock.
-</ol>
-
-<p>@@QQ@@
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<p>@@QQA@@
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-<p>@@QQE@@
-
-<p>
-In short, RCU's publish-subscribe guarantee is provided by the combination
-of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
-This guarantee allows data elements to be safely added to RCU-protected
-linked data structures without disrupting RCU readers.
-This guarantee can be used in combination with the grace-period
-guarantee to also allow data elements to be removed from RCU-protected
-linked data structures, again without disrupting RCU readers.
-
-<p>
-This guarantee was only partially premeditated.
-DYNIX/ptx used an explicit memory barrier for publication, but had nothing
-resembling <tt>rcu_dereference()</tt> for subscription, nor did it
-have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt>.
-The need for these operations made itself known quite suddenly at a
-late-1990s meeting with the DEC Alpha architects, back in the days when
-DEC was still a free-standing company.
-It took the Alpha architects a good hour to convince me that any sort
-of barrier would ever be needed, and it then took me a good <i>two</i> hours
-to convince them that their documentation did not make this point clear.
-More recent work with the C and C++ standards committees have provided
-much education on tricks and traps from the compiler.
-In short, compilers were much less tricky in the early 1990s, but in
-2015, don't even think about omitting <tt>rcu_dereference()</tt>!
-
-<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
-
-<p>
-The previous section's simple linked-data-structure scenario clearly
-demonstrates the need for RCU's stringent memory-ordering guarantees on
-systems with more than one CPU:
-
-<ol>
-<li> Each CPU that has an RCU read-side critical section that
- begins before <tt>synchronize_rcu()</tt> starts is
- guaranteed to execute a full memory barrier between the time
- that the RCU read-side critical section ends and the time that
- <tt>synchronize_rcu()</tt> returns.
- Without this guarantee, a pre-existing RCU read-side critical section
- might hold a reference to the newly removed <tt>struct foo</tt>
- after the <tt>kfree()</tt> on line 14 of
- <tt>remove_gp_synchronous()</tt>.
-<li> Each CPU that has an RCU read-side critical section that ends
- after <tt>synchronize_rcu()</tt> returns is guaranteed
- to execute a full memory barrier between the time that
- <tt>synchronize_rcu()</tt> begins and the time that the RCU
- read-side critical section begins.
- Without this guarantee, a later RCU read-side critical section
- running after the <tt>kfree()</tt> on line 14 of
- <tt>remove_gp_synchronous()</tt> might
- later run <tt>do_something_gp()</tt> and find the
- newly deleted <tt>struct foo</tt>.
-<li> If the task invoking <tt>synchronize_rcu()</tt> remains
- on a given CPU, then that CPU is guaranteed to execute a full
- memory barrier sometime during the execution of
- <tt>synchronize_rcu()</tt>.
- This guarantee ensures that the <tt>kfree()</tt> on
- line 14 of <tt>remove_gp_synchronous()</tt> really does
- execute after the removal on line 11.
-<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
- among a group of CPUs during that invocation, then each of the
- CPUs in that group is guaranteed to execute a full memory barrier
- sometime during the execution of <tt>synchronize_rcu()</tt>.
- This guarantee also ensures that the <tt>kfree()</tt> on
- line 14 of <tt>remove_gp_synchronous()</tt> really does
- execute after the removal on
- line 11, but also in the case where the thread executing the
- <tt>synchronize_rcu()</tt> migrates in the meantime.
-</ol>
-
-<p>@@QQ@@
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<p>@@QQA@@
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-<p>@@QQE@@
-
-<p>@@QQ@@
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<p>@@QQA@@
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Very likely to return p. */</tt>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>do_something_with(q->a);
- /* No smp_mb(), so might happen after kfree(). */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Might return p if no memory barrier. */</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-<li> CPU 1: <tt>do_something_with(q->a); /* Boom!!! */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU 1 might end up accessing the freelist.
-
-<p>
-The “as if” rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-<p>@@QQE@@
-
-<p>
-Note that these memory-barrier requirements do not replace the fundamental
-RCU requirement that a grace period wait for all pre-existing readers.
-On the contrary, the memory barriers called out in this section must operate in
-such a way as to <i>enforce</i> this fundamental requirement.
-Of course, different implementations enforce this requirement in different
-ways, but enforce it they must.
-
-<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
-
-<p>
-The common-case RCU primitives are unconditional.
-They are invoked, they do their job, and they return, with no possibility
-of error, and no need to retry.
-This is a key RCU design philosophy.
-
-<p>
-However, this philosophy is pragmatic rather than pigheaded.
-If someone comes up with a good justification for a particular conditional
-RCU primitive, it might well be implemented and added.
-After all, this guarantee was reverse-engineered, not premeditated.
-The unconditional nature of the RCU primitives was initially an
-accident of implementation, and later experience with synchronization
-primitives with conditional primitives caused me to elevate this
-accident to a guarantee.
-Therefore, the justification for adding a conditional primitive to
-RCU would need to be based on detailed and compelling use cases.
-
-<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
-
-<p>
-As far as RCU is concerned, it is always possible to carry out an
-update within an RCU read-side critical section.
-For example, that RCU read-side critical section might search for
-a given data element, and then might acquire the update-side
-spinlock in order to update that element, all while remaining
-in that RCU read-side critical section.
-Of course, it is necessary to exit the RCU read-side critical section
-before invoking <tt>synchronize_rcu()</tt>, however, this
-inconvenience can be avoided through use of the
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
-described later in this document.
-
-<p>@@QQ@@
-But how does the upgrade-to-write operation exclude other readers?
-<p>@@QQA@@
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-<p>@@QQE@@
-
-<p>
-This guarantee allows lookup code to be shared between read-side
-and update-side code, and was premeditated, appearing in the earliest
-DYNIX/ptx RCU documentation.
-
-<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
-
-<p>
-RCU provides extremely lightweight readers, and its read-side guarantees,
-though quite useful, are correspondingly lightweight.
-It is therefore all too easy to assume that RCU is guaranteeing more
-than it really is.
-Of course, the list of things that RCU does not guarantee is infinitely
-long, however, the following sections list a few non-guarantees that
-have caused confusion.
-Except where otherwise noted, these non-guarantees were premeditated.
-
-<ol>
-<li> <a href="#Readers Impose Minimal Ordering">
- Readers Impose Minimal Ordering</a>
-<li> <a href="#Readers Do Not Exclude Updaters">
- Readers Do Not Exclude Updaters</a>
-<li> <a href="#Updaters Only Wait For Old Readers">
- Updaters Only Wait For Old Readers</a>
-<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
- Grace Periods Don't Partition Read-Side Critical Sections</a>
-<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
- Read-Side Critical Sections Don't Partition Grace Periods</a>
-<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
- Disabling Preemption Does Not Block Grace Periods</a>
-</ol>
-
-<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
-
-<p>
-Reader-side markers such as <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
-except through their interaction with the grace-period APIs such as
-<tt>synchronize_rcu()</tt>.
-To see this, consider the following pair of threads:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(x, 1);
- 5 rcu_read_unlock();
- 6 rcu_read_lock();
- 7 WRITE_ONCE(y, 1);
- 8 rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13 rcu_read_lock();
-14 r1 = READ_ONCE(y);
-15 rcu_read_unlock();
-16 rcu_read_lock();
-17 r2 = READ_ONCE(x);
-18 rcu_read_unlock();
-19 }
-</pre>
-</blockquote>
-
-<p>
-After <tt>thread0()</tt> and <tt>thread1()</tt> execute
-concurrently, it is quite possible to have
-
-<blockquote>
-<pre>
-(r1 == 1 && r2 == 0)
-</pre>
-</blockquote>
-
-(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
-which would not be possible if <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> had much in the way of ordering
-properties.
-But they do not, so the CPU is within its rights
-to do significant reordering.
-This is by design: Any significant ordering constraints would slow down
-these fast-path APIs.
-
-<p>@@QQ@@
-Can't the compiler also reorder this code?
-<p>@@QQA@@
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-<p>@@QQE@@
-
-<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
-
-<p>
-Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
-exclude updates.
-All they do is to prevent grace periods from ending.
-The following example illustrates this:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 r1 = READ_ONCE(y);
- 5 if (r1) {
- 6 do_something_with_nonzero_x();
- 7 r2 = READ_ONCE(x);
- 8 WARN_ON(!r2); /* BUG!!! */
- 9 }
-10 rcu_read_unlock();
-11 }
-12
-13 void thread1(void)
-14 {
-15 spin_lock(&my_lock);
-16 WRITE_ONCE(x, 1);
-17 WRITE_ONCE(y, 1);
-18 spin_unlock(&my_lock);
-19 }
-</pre>
-</blockquote>
-
-<p>
-If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
-excluded the <tt>thread1()</tt> function's update,
-the <tt>WARN_ON()</tt> could never fire.
-But the fact is that <tt>rcu_read_lock()</tt> does not exclude
-much of anything aside from subsequent grace periods, of which
-<tt>thread1()</tt> has none, so the
-<tt>WARN_ON()</tt> can and does fire.
-
-<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
-
-<p>
-It might be tempting to assume that after <tt>synchronize_rcu()</tt>
-completes, there are no readers executing.
-This temptation must be avoided because
-new readers can start immediately after <tt>synchronize_rcu()</tt>
-starts, and <tt>synchronize_rcu()</tt> is under no
-obligation to wait for these new readers.
-
-<p>@@QQ@@
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<p>@@QQA@@
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-<p>@@QQE@@
-
-<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
-Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
-
-<p>
-It is tempting to assume that if any part of one RCU read-side critical
-section precedes a given grace period, and if any part of another RCU
-read-side critical section follows that same grace period, then all of
-the first RCU read-side critical section must precede all of the second.
-However, this just isn't the case: A single grace period does not
-partition the set of RCU read-side critical sections.
-An example of this situation can be illustrated as follows, where
-<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 rcu_read_lock();
-19 r2 = READ_ONCE(b);
-20 r3 = READ_ONCE(c);
-21 rcu_read_unlock();
-22 }
-</pre>
-</blockquote>
-
-<p>
-It turns out that the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 && r2 == 0 && r3 == 1)
-</pre>
-</blockquote>
-
-is entirely possible.
-The following figure show how this can happen, with each circled
-<tt>QS</tt> indicating the point at which RCU recorded a
-<i>quiescent state</i> for each thread, that is, a state in which
-RCU knows that the thread cannot be in the midst of an RCU read-side
-critical section that started before the current grace period:
-
-<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
-
-<p>
-If it is necessary to partition RCU read-side critical sections in this
-manner, it is necessary to use two grace periods, where the first
-grace period is known to end before the second grace period starts:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 r2 = READ_ONCE(c);
-19 synchronize_rcu();
-20 WRITE_ONCE(d, 1);
-21 }
-22
-23 void thread3(void)
-24 {
-25 rcu_read_lock();
-26 r3 = READ_ONCE(b);
-27 r4 = READ_ONCE(d);
-28 rcu_read_unlock();
-29 }
-</pre>
-</blockquote>
-
-<p>
-Here, if <tt>(r1 == 1)</tt>, then
-<tt>thread0()</tt>'s write to <tt>b</tt> must happen
-before the end of <tt>thread1()</tt>'s grace period.
-If in addition <tt>(r4 == 1)</tt>, then
-<tt>thread3()</tt>'s read from <tt>b</tt> must happen
-after the beginning of <tt>thread2()</tt>'s grace period.
-If it is also the case that <tt>(r2 == 1)</tt>, then the
-end of <tt>thread1()</tt>'s grace period must precede the
-beginning of <tt>thread2()</tt>'s grace period.
-This mean that the two RCU read-side critical sections cannot overlap,
-guaranteeing that <tt>(r3 == 1)</tt>.
-As a result, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 && r2 == 1 && r3 == 0 && r4 == 1)
-</pre>
-</blockquote>
-
-cannot happen.
-
-<p>
-This non-requirement was also non-premeditated, but became apparent
-when studying RCU's interaction with memory ordering.
-
-<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
-Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
-
-<p>
-It is also tempting to assume that if an RCU read-side critical section
-happens between a pair of grace periods, then those grace periods cannot
-overlap.
-However, this temptation leads nowhere good, as can be illustrated by
-the following, with all variables initially zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 rcu_read_lock();
-19 WRITE_ONCE(d, 1);
-20 r2 = READ_ONCE(c);
-21 rcu_read_unlock();
-22 }
-23
-24 void thread3(void)
-25 {
-26 r3 = READ_ONCE(d);
-27 synchronize_rcu();
-28 WRITE_ONCE(e, 1);
-29 }
-30
-31 void thread4(void)
-32 {
-33 rcu_read_lock();
-34 r4 = READ_ONCE(b);
-35 r5 = READ_ONCE(e);
-36 rcu_read_unlock();
-37 }
-</pre>
-</blockquote>
-
-<p>
-In this case, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 && r2 == 1 && r3 == 1 && r4 == 0 && r5 == 1)
-</pre>
-</blockquote>
-
-is entirely possible, as illustrated below:
-
-<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
-
-<p>
-Again, an RCU read-side critical section can overlap almost all of a
-given grace period, just so long as it does not overlap the entire
-grace period.
-As a result, an RCU read-side critical section cannot partition a pair
-of RCU grace periods.
-
-<p>@@QQ@@
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<p>@@QQA@@
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-<p>@@QQE@@
-
-<h3><a name="Disabling Preemption Does Not Block Grace Periods">
-Disabling Preemption Does Not Block Grace Periods</a></h3>
-
-<p>
-There was a time when disabling preemption on any given CPU would block
-subsequent grace periods.
-However, this was an accident of implementation and is not a requirement.
-And in the current Linux-kernel implementation, disabling preemption
-on a given CPU in fact does not block grace periods, as Oleg Nesterov
-<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
-
-<p>
-If you need a preempt-disable region to block grace periods, you need to add
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
-as follows:
-
-<blockquote>
-<pre>
- 1 preempt_disable();
- 2 rcu_read_lock();
- 3 do_something();
- 4 rcu_read_unlock();
- 5 preempt_enable();
- 6
- 7 /* Spinlocks implicitly disable preemption. */
- 8 spin_lock(&mylock);
- 9 rcu_read_lock();
-10 do_something();
-11 rcu_read_unlock();
-12 spin_unlock(&mylock);
-</pre>
-</blockquote>
-
-<p>
-In theory, you could enter the RCU read-side critical section first,
-but it is more efficient to keep the entire RCU read-side critical
-section contained in the preempt-disable region as shown above.
-Of course, RCU read-side critical sections that extend outside of
-preempt-disable regions will work correctly, but such critical sections
-can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
-more work.
-And no, this is <i>not</i> an invitation to enclose all of your RCU
-read-side critical sections within preempt-disable regions, because
-doing so would degrade real-time response.
-
-<p>
-This non-requirement appeared with preemptible RCU.
-If you need a grace period that waits on non-preemptible code regions, use
-<a href="#Sched Flavor">RCU-sched</a>.
-
-<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
-
-<p>
-These parallelism facts of life are by no means specific to RCU, but
-the RCU implementation must abide by them.
-They therefore bear repeating:
-
-<ol>
-<li> Any CPU or task may be delayed at any time,
- and any attempts to avoid these delays by disabling
- preemption, interrupts, or whatever are completely futile.
- This is most obvious in preemptible user-level
- environments and in virtualized environments (where
- a given guest OS's VCPUs can be preempted at any time by
- the underlying hypervisor), but can also happen in bare-metal
- environments due to ECC errors, NMIs, and other hardware
- events.
- Although a delay of more than about 20 seconds can result
- in splats, the RCU implementation is obligated to use
- algorithms that can tolerate extremely long delays, but where
- “extremely long” is not long enough to allow
- wrap-around when incrementing a 64-bit counter.
-<li> Both the compiler and the CPU can reorder memory accesses.
- Where it matters, RCU must use compiler directives and
- memory-barrier instructions to preserve ordering.
-<li> Conflicting writes to memory locations in any given cache line
- will result in expensive cache misses.
- Greater numbers of concurrent writes and more-frequent
- concurrent writes will result in more dramatic slowdowns.
- RCU is therefore obligated to use algorithms that have
- sufficient locality to avoid significant performance and
- scalability problems.
-<li> As a rough rule of thumb, only one CPU's worth of processing
- may be carried out under the protection of any given exclusive
- lock.
- RCU must therefore use scalable locking designs.
-<li> Counters are finite, especially on 32-bit systems.
- RCU's use of counters must therefore tolerate counter wrap,
- or be designed such that counter wrap would take way more
- time than a single system is likely to run.
- An uptime of ten years is quite possible, a runtime
- of a century much less so.
- As an example of the latter, RCU's dyntick-idle nesting counter
- allows 54 bits for interrupt nesting level (this counter
- is 64 bits even on a 32-bit system).
- Overflowing this counter requires 2<sup>54</sup>
- half-interrupts on a given CPU without that CPU ever going idle.
- If a half-interrupt happened every microsecond, it would take
- 570 years of runtime to overflow this counter, which is currently
- believed to be an acceptably long time.
-<li> Linux systems can have thousands of CPUs running a single
- Linux kernel in a single shared-memory environment.
- RCU must therefore pay close attention to high-end scalability.
-</ol>
-
-<p>
-This last parallelism fact of life means that RCU must pay special
-attention to the preceding facts of life.
-The idea that Linux might scale to systems with thousands of CPUs would
-have been met with some skepticism in the 1990s, but these requirements
-would have otherwise have been unsurprising, even in the early 1990s.
-
-<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
-
-<p>
-These sections list quality-of-implementation requirements.
-Although an RCU implementation that ignores these requirements could
-still be used, it would likely be subject to limitations that would
-make it inappropriate for industrial-strength production use.
-Classes of quality-of-implementation requirements are as follows:
-
-<ol>
-<li> <a href="#Specialization">Specialization</a>
-<li> <a href="#Performance and Scalability">Performance and Scalability</a>
-<li> <a href="#Composability">Composability</a>
-<li> <a href="#Corner Cases">Corner Cases</a>
-</ol>
-
-<p>
-These classes is covered in the following sections.
-
-<h3><a name="Specialization">Specialization</a></h3>
-
-<p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
-expense of its update-side primitives.
-
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
-
-<p>
-This focus on read-mostly situations means that RCU must interoperate
-with other synchronization primitives.
-For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
-examples discussed earlier use RCU to protect readers and locking to
-coordinate updaters.
-However, the need extends much farther, requiring that a variety of
-synchronization primitives be legal within RCU read-side critical sections,
-including spinlocks, sequence locks, atomic operations, reference
-counters, and memory barriers.
-
-<p>@@QQ@@
-What about sleeping locks?
-<p>@@QQA@@
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-<p>@@QQE@@
-
-<p>
-It often comes as a surprise that many algorithms do not require a
-consistent view of data, but many can function in that mode,
-with network routing being the poster child.
-Internet routing algorithms take significant time to propagate
-updates, so that by the time an update arrives at a given system,
-that system has been sending network traffic the wrong way for
-a considerable length of time.
-Having a few threads continue to send traffic the wrong way for a
-few more milliseconds is clearly not a problem: In the worst case,
-TCP retransmissions will eventually get the data where it needs to go.
-In general, when tracking the state of the universe outside of the
-computer, some level of inconsistency must be tolerated due to
-speed-of-light delays if nothing else.
-
-<p>
-Furthermore, uncertainty about external state is inherent in many cases.
-For example, a pair of veternarians might use heartbeat to determine
-whether or not a given cat was alive.
-But how long should they wait after the last heartbeat to decide that
-the cat is in fact dead?
-Waiting less than 400 milliseconds makes no sense because this would
-mean that a relaxed cat would be considered to cycle between death
-and life more than 100 times per minute.
-Moreover, just as with human beings, a cat's heart might stop for
-some period of time, so the exact wait period is a judgment call.
-One of our pair of veternarians might wait 30 seconds before pronouncing
-the cat dead, while the other might insist on waiting a full minute.
-The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
-
-<p>
-Interestingly enough, this same situation applies to hardware.
-When push comes to shove, how do we tell whether or not some
-external server has failed?
-We send messages to it periodically, and declare it failed if we
-don't receive a response within a given period of time.
-Policy decisions can usually tolerate short
-periods of inconsistency.
-The policy was decided some time ago, and is only now being put into
-effect, so a few milliseconds of delay is normally inconsequential.
-
-<p>
-However, there are algorithms that absolutely must see consistent data.
-For example, the translation between a user-level SystemV semaphore
-ID to the corresponding in-kernel data structure is protected by RCU,
-but it is absolutely forbidden to update a semaphore that has just been
-removed.
-In the Linux kernel, this need for consistency is accommodated by acquiring
-spinlocks located in the in-kernel data structure from within
-the RCU read-side critical section, and this is indicated by the
-green box in the figure above.
-Many other techniques may be used, and are in fact used within the
-Linux kernel.
-
-<p>
-In short, RCU is not required to maintain consistency, and other
-mechanisms may be used in concert with RCU when consistency is required.
-RCU's specialization allows it to do its job extremely well, and its
-ability to interoperate with other synchronization mechanisms allows
-the right mix of synchronization tools to be used for a given job.
-
-<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
-
-<p>
-Energy efficiency is a critical component of performance today,
-and Linux-kernel RCU implementations must therefore avoid unnecessarily
-awakening idle CPUs.
-I cannot claim that this requirement was premeditated.
-In fact, I learned of it during a telephone conversation in which I
-was given “frank and open” feedback on the importance
-of energy efficiency in battery-powered systems and on specific
-energy-efficiency shortcomings of the Linux-kernel RCU implementation.
-In my experience, the battery-powered embedded community will consider
-any unnecessary wakeups to be extremely unfriendly acts.
-So much so that mere Linux-kernel-mailing-list posts are
-insufficient to vent their ire.
-
-<p>
-Memory consumption is not particularly important for in most
-situations, and has become decreasingly
-so as memory sizes have expanded and memory
-costs have plummeted.
-However, as I learned from Matt Mackall's
-<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
-efforts, memory footprint is critically important on single-CPU systems with
-non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
-<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
-was born.
-Josh Triplett has since taken over the small-memory banner with his
-<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
-project, which resulted in
-<a href="#Sleepable RCU">SRCU</a>
-becoming optional for those kernels not needing it.
-
-<p>
-The remaining performance requirements are, for the most part,
-unsurprising.
-For example, in keeping with RCU's read-side specialization,
-<tt>rcu_dereference()</tt> should have negligible overhead (for
-example, suppression of a few minor compiler optimizations).
-Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
-
-<p>
-In preemptible environments, in the case where the RCU read-side
-critical section was not preempted (as will be the case for the
-highest-priority real-time process), <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have minimal overhead.
-In particular, they should not contain atomic read-modify-write
-operations, memory-barrier instructions, preemption disabling,
-interrupt disabling, or backwards branches.
-However, in the case where the RCU read-side critical section was preempted,
-<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
-This is why it is better to nest an RCU read-side critical section
-within a preempt-disable region than vice versa, at least in cases
-where that critical section is short enough to avoid unduly degrading
-real-time latencies.
-
-<p>
-The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
-optimized for throughput.
-It may therefore incur several milliseconds of latency in addition to
-the duration of the longest RCU read-side critical section.
-On the other hand, multiple concurrent invocations of
-<tt>synchronize_rcu()</tt> are required to use batching optimizations
-so that they can be satisfied by a single underlying grace-period-wait
-operation.
-For example, in the Linux kernel, it is not unusual for a single
-grace-period-wait operation to serve more than
-<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
-of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
-overhead down to nearly zero.
-However, the grace-period optimization is also required to avoid
-measurable degradation of real-time scheduling and interrupt latencies.
-
-<p>
-In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
-latencies are unacceptable.
-In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
-instead, reducing the grace-period latency down to a few tens of
-microseconds on small systems, at least in cases where the RCU read-side
-critical sections are short.
-There are currently no special latency requirements for
-<tt>synchronize_rcu_expedited()</tt> on large systems, but,
-consistent with the empirical nature of the RCU specification,
-that is subject to change.
-However, there most definitely are scalability requirements:
-A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
-CPUs should at least make reasonable forward progress.
-In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
-is permitted to impose modest degradation of real-time latency
-on non-idle online CPUs.
-That said, it will likely be necessary to take further steps to reduce this
-degradation, hopefully to roughly that of a scheduling-clock interrupt.
-
-<p>
-There are a number of situations where even
-<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
-latency is unacceptable.
-In these situations, the asynchronous <tt>call_rcu()</tt> can be
-used in place of <tt>synchronize_rcu()</tt> as follows:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2 int a;
- 3 int b;
- 4 struct rcu_head rh;
- 5 };
- 6
- 7 static void remove_gp_cb(struct rcu_head *rhp)
- 8 {
- 9 struct foo *p = container_of(rhp, struct foo, rh);
-10
-11 kfree(p);
-12 }
-13
-14 bool remove_gp_asynchronous(void)
-15 {
-16 struct foo *p;
-17
-18 spin_lock(&gp_lock);
-19 p = rcu_dereference(gp);
-20 if (!p) {
-21 spin_unlock(&gp_lock);
-22 return false;
-23 }
-24 rcu_assign_pointer(gp, NULL);
-25 call_rcu(&p->rh, remove_gp_cb);
-26 spin_unlock(&gp_lock);
-27 return true;
-28 }
-</pre>
-</blockquote>
-
-<p>
-A definition of <tt>struct foo</tt> is finally needed, and appears
-on lines 1-5.
-The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
-on line 25, and will be invoked after the end of a subsequent
-grace period.
-This gets the same effect as <tt>remove_gp_synchronous()</tt>,
-but without forcing the updater to wait for a grace period to elapse.
-The <tt>call_rcu()</tt> function may be used in a number of
-situations where neither <tt>synchronize_rcu()</tt> nor
-<tt>synchronize_rcu_expedited()</tt> would be legal,
-including within preempt-disable code, <tt>local_bh_disable()</tt> code,
-interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
-The callback function (<tt>remove_gp_cb()</tt> in this case) will be
-executed within softirq (software interrupt) environment within the
-Linux kernel,
-either within a real softirq handler or under the protection
-of <tt>local_bh_disable()</tt>.
-In both the Linux kernel and in userspace, it is bad practice to
-write an RCU callback function that takes too long.
-Long-running operations should be relegated to separate threads or
-(in the Linux kernel) workqueues.
-
-<p>@@QQ@@
-Why does line 19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line 25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<p>@@QQA@@
-Presumably the <tt>->gp_lock</tt> acquired on line 18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>->gp_lock</tt>
-is released on line 25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-<p>@@QQE@@
-
-<p>
-However, all that <tt>remove_gp_cb()</tt> is doing is
-invoking <tt>kfree()</tt> on the data element.
-This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
-which allows “fire and forget” operation as shown below:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2 int a;
- 3 int b;
- 4 struct rcu_head rh;
- 5 };
- 6
- 7 bool remove_gp_faf(void)
- 8 {
- 9 struct foo *p;
-10
-11 spin_lock(&gp_lock);
-12 p = rcu_dereference(gp);
-13 if (!p) {
-14 spin_unlock(&gp_lock);
-15 return false;
-16 }
-17 rcu_assign_pointer(gp, NULL);
-18 kfree_rcu(p, rh);
-19 spin_unlock(&gp_lock);
-20 return true;
-21 }
-</pre>
-</blockquote>
-
-<p>
-Note that <tt>remove_gp_faf()</tt> simply invokes
-<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
-further attention to the subsequent grace period and <tt>kfree()</tt>.
-It is permissible to invoke <tt>kfree_rcu()</tt> from the same
-environments as for <tt>call_rcu()</tt>.
-Interestingly enough, DYNIX/ptx had the equivalents of
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
-<tt>synchronize_rcu()</tt>.
-This was due to the fact that RCU was not heavily used within DYNIX/ptx,
-so the very few places that needed something like
-<tt>synchronize_rcu()</tt> simply open-coded it.
-
-<p>@@QQ@@
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<p>@@QQA@@
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-<p>@@QQE@@
-
-<p>
-But what if the updater must wait for the completion of code to be
-executed after the end of the grace period, but has other tasks
-that can be carried out in the meantime?
-The polling-style <tt>get_state_synchronize_rcu()</tt> and
-<tt>cond_synchronize_rcu()</tt> functions may be used for this
-purpose, as shown below:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_poll(void)
- 2 {
- 3 struct foo *p;
- 4 unsigned long s;
- 5
- 6 spin_lock(&gp_lock);
- 7 p = rcu_access_pointer(gp);
- 8 if (!p) {
- 9 spin_unlock(&gp_lock);
-10 return false;
-11 }
-12 rcu_assign_pointer(gp, NULL);
-13 spin_unlock(&gp_lock);
-14 s = get_state_synchronize_rcu();
-15 do_something_while_waiting();
-16 cond_synchronize_rcu(s);
-17 kfree(p);
-18 return true;
-19 }
-</pre>
-</blockquote>
-
-<p>
-On line 14, <tt>get_state_synchronize_rcu()</tt> obtains a
-“cookie” from RCU,
-then line 15 carries out other tasks,
-and finally, line 16 returns immediately if a grace period has
-elapsed in the meantime, but otherwise waits as required.
-The need for <tt>get_state_synchronize_rcu</tt> and
-<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
-so it is too early to tell whether they will stand the test of time.
-
-<p>
-RCU thus provides a range of tools to allow updaters to strike the
-required tradeoff between latency, flexibility and CPU overhead.
-
-<h3><a name="Composability">Composability</a></h3>
-
-<p>
-Composability has received much attention in recent years, perhaps in part
-due to the collision of multicore hardware with object-oriented techniques
-designed in single-threaded environments for single-threaded use.
-And in theory, RCU read-side critical sections may be composed, and in
-fact may be nested arbitrarily deeply.
-In practice, as with all real-world implementations of composable
-constructs, there are limitations.
-
-<p>
-Implementations of RCU for which <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> generate no code, such as
-Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
-nested arbitrarily deeply.
-After all, there is no overhead.
-Except that if all these instances of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> are visible to the compiler,
-compilation will eventually fail due to exhausting memory,
-mass storage, or user patience, whichever comes first.
-If the nesting is not visible to the compiler, as is the case with
-mutually recursive functions each in its own translation unit,
-stack overflow will result.
-If the nesting takes the form of loops, either the control variable
-will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
-Nevertheless, this class of RCU implementations is one
-of the most composable constructs in existence.
-
-<p>
-RCU implementations that explicitly track nesting depth
-are limited by the nesting-depth counter.
-For example, the Linux kernel's preemptible RCU limits nesting to
-<tt>INT_MAX</tt>.
-This should suffice for almost all practical purposes.
-That said, a consecutive pair of RCU read-side critical sections
-between which there is an operation that waits for a grace period
-cannot be enclosed in another RCU read-side critical section.
-This is because it is not legal to wait for a grace period within
-an RCU read-side critical section: To do so would result either
-in deadlock or
-in RCU implicitly splitting the enclosing RCU read-side critical
-section, neither of which is conducive to a long-lived and prosperous
-kernel.
-
-<p>
-It is worth noting that RCU is not alone in limiting composability.
-For example, many transactional-memory implementations prohibit
-composing a pair of transactions separated by an irrevocable
-operation (for example, a network receive operation).
-For another example, lock-based critical sections can be composed
-surprisingly freely, but only if deadlock is avoided.
-
-<p>
-In short, although RCU read-side critical sections are highly composable,
-care is required in some situations, just as is the case for any other
-composable synchronization mechanism.
-
-<h3><a name="Corner Cases">Corner Cases</a></h3>
-
-<p>
-A given RCU workload might have an endless and intense stream of
-RCU read-side critical sections, perhaps even so intense that there
-was never a point in time during which there was not at least one
-RCU read-side critical section in flight.
-RCU cannot allow this situation to block grace periods: As long as
-all the RCU read-side critical sections are finite, grace periods
-must also be finite.
-
-<p>
-That said, preemptible RCU implementations could potentially result
-in RCU read-side critical sections being preempted for long durations,
-which has the effect of creating a long-duration RCU read-side
-critical section.
-This situation can arise only in heavily loaded systems, but systems using
-real-time priorities are of course more vulnerable.
-Therefore, RCU priority boosting is provided to help deal with this
-case.
-That said, the exact requirements on RCU priority boosting will likely
-evolve as more experience accumulates.
-
-<p>
-Other workloads might have very high update rates.
-Although one can argue that such workloads should instead use
-something other than RCU, the fact remains that RCU must
-handle such workloads gracefully.
-This requirement is another factor driving batching of grace periods,
-but it is also the driving force behind the checks for large numbers
-of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
-Finally, high update rates should not delay RCU read-side critical
-sections, although some read-side delays can occur when using
-<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>try_stop_cpus()</tt>.
-(In the future, <tt>synchronize_rcu_expedited()</tt> will be
-converted to use lighter-weight inter-processor interrupts (IPIs),
-but this will still disturb readers, though to a much smaller degree.)
-
-<p>
-Although all three of these corner cases were understood in the early
-1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
-in a tight loop
-in the early 2000s suddenly provided a much deeper appreciation of the
-high-update-rate corner case.
-This test also motivated addition of some RCU code to react to high update
-rates, for example, if a given CPU finds itself with more than 10,000
-RCU callbacks queued, it will cause RCU to take evasive action by
-more aggressively starting grace periods and more aggressively forcing
-completion of grace-period processing.
-This evasive action causes the grace period to complete more quickly,
-but at the cost of restricting RCU's batching optimizations, thus
-increasing the CPU overhead incurred by that grace period.
-
-<h2><a name="Software-Engineering Requirements">
-Software-Engineering Requirements</a></h2>
-
-<p>
-Between Murphy's Law and “To err is human”, it is necessary to
-guard against mishaps and misuse:
-
-<ol>
-<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
- everywhere that it is needed, so kernels built with
- <tt>CONFIG_PROVE_RCU=y</tt> will spat if
- <tt>rcu_dereference()</tt> is used outside of an
- RCU read-side critical section.
- Update-side code can use <tt>rcu_dereference_protected()</tt>,
- which takes a
- <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
- to indicate what is providing the protection.
- If the indicated protection is not provided, a lockdep splat
- is emitted.
-
- <p>
- Code shared between readers and updaters can use
- <tt>rcu_dereference_check()</tt>, which also takes a
- lockdep expression, and emits a lockdep splat if neither
- <tt>rcu_read_lock()</tt> nor the indicated protection
- is in place.
- In addition, <tt>rcu_dereference_raw()</tt> is used in those
- (hopefully rare) cases where the required protection cannot
- be easily described.
- Finally, <tt>rcu_read_lock_held()</tt> is provided to
- allow a function to verify that it has been invoked within
- an RCU read-side critical section.
- I was made aware of this set of requirements shortly after Thomas
- Gleixner audited a number of RCU uses.
-<li> A given function might wish to check for RCU-related preconditions
- upon entry, before using any other RCU API.
- The <tt>rcu_lockdep_assert()</tt> does this job,
- asserting the expression in kernels having lockdep enabled
- and doing nothing otherwise.
-<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
- and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
- substituting a simple assignment.
- To catch this sort of error, a given RCU-protected pointer may be
- tagged with <tt>__rcu</tt>, after which running sparse
- with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
- about simple-assignment accesses to that pointer.
- Arnd Bergmann made me aware of this requirement, and also
- supplied the needed
- <a href="https://lwn.net/Articles/376011/">patch series</a>.
-<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
- will splat if a data element is passed to <tt>call_rcu()</tt>
- twice in a row, without a grace period in between.
- (This error is similar to a double free.)
- The corresponding <tt>rcu_head</tt> structures that are
- dynamically allocated are automatically tracked, but
- <tt>rcu_head</tt> structures allocated on the stack
- must be initialized with <tt>init_rcu_head_on_stack()</tt>
- and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
- Similarly, statically allocated non-stack <tt>rcu_head</tt>
- structures must be initialized with <tt>init_rcu_head()</tt>
- and cleaned up with <tt>destroy_rcu_head()</tt>.
- Mathieu Desnoyers made me aware of this requirement, and also
- supplied the needed
- <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
-<li> An infinite loop in an RCU read-side critical section will
- eventually trigger an RCU CPU stall warning splat, with
- the duration of “eventually” being controlled by the
- <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
- alternatively, by the
- <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
- parameter.
- However, RCU is not obligated to produce this splat
- unless there is a grace period waiting on that particular
- RCU read-side critical section.
- <p>
- Some extreme workloads might intentionally delay
- RCU grace periods, and systems running those workloads can
- be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
- to suppress the splats.
- This kernel parameter may also be set via <tt>sysfs</tt>.
- Furthermore, RCU CPU stall warnings are counter-productive
- during sysrq dumps and during panics.
- RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
- <tt>rcu_sysrq_end()</tt> API members to be called before
- and after long sysrq dumps.
- RCU also supplies the <tt>rcu_panic()</tt> notifier that is
- automatically invoked at the beginning of a panic to suppress
- further RCU CPU stall warnings.
-
- <p>
- This requirement made itself known in the early 1990s, pretty
- much the first time that it was necessary to debug a CPU stall.
- That said, the initial implementation in DYNIX/ptx was quite
- generic in comparison with that of Linux.
-<li> Although it would be very good to detect pointers leaking out
- of RCU read-side critical sections, there is currently no
- good way of doing this.
- One complication is the need to distinguish between pointers
- leaking and pointers that have been handed off from RCU to
- some other synchronization mechanism, for example, reference
- counting.
-<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
- information is provided via both debugfs and event tracing.
-<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
- <tt>rcu_dereference()</tt> to create typical linked
- data structures can be surprisingly error-prone.
- Therefore, RCU-protected
- <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
- and, more recently, RCU-protected
- <a href="https://lwn.net/Articles/612100/">hash tables</a>
- are available.
- Many other special-purpose RCU-protected data structures are
- available in the Linux kernel and the userspace RCU library.
-<li> Some linked structures are created at compile time, but still
- require <tt>__rcu</tt> checking.
- The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
- purpose.
-<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
- when creating linked structures that are to be published via
- a single external pointer.
- The <tt>RCU_INIT_POINTER()</tt> macro is provided for
- this task and also for assigning <tt>NULL</tt> pointers
- at runtime.
-</ol>
-
-<p>
-This not a hard-and-fast list: RCU's diagnostic capabilities will
-continue to be guided by the number and type of usage bugs found
-in real-world RCU usage.
-
-<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
-
-<p>
-The Linux kernel provides an interesting environment for all kinds of
-software, including RCU.
-Some of the relevant points of interest are as follows:
-
-<ol>
-<li> <a href="#Configuration">Configuration</a>.
-<li> <a href="#Firmware Interface">Firmware Interface</a>.
-<li> <a href="#Early Boot">Early Boot</a>.
-<li> <a href="#Interrupts and NMIs">
- Interrupts and non-maskable interrupts (NMIs)</a>.
-<li> <a href="#Loadable Modules">Loadable Modules</a>.
-<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
-<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
-<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
-<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
-<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
-<li> <a href="#Performance, Scalability, Response Time, and Reliability">
- Performance, Scalability, Response Time, and Reliability</a>.
-</ol>
-
-<p>
-This list is probably incomplete, but it does give a feel for the
-most notable Linux-kernel complications.
-Each of the following sections covers one of the above topics.
-
-<h3><a name="Configuration">Configuration</a></h3>
-
-<p>
-RCU's goal is automatic configuration, so that almost nobody
-needs to worry about RCU's <tt>Kconfig</tt> options.
-And for almost all users, RCU does in fact work well
-“out of the box.”
-
-<p>
-However, there are specialized use cases that are handled by
-kernel boot parameters and <tt>Kconfig</tt> options.
-Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
-about new <tt>Kconfig</tt> options, which requires almost all of them
-be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
-
-<p>
-This all should be quite obvious, but the fact remains that
-Linus Torvalds recently had to
-<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
-me of this requirement.
-
-<h3><a name="Firmware Interface">Firmware Interface</a></h3>
-
-<p>
-In many cases, kernel obtains information about the system from the
-firmware, and sometimes things are lost in translation.
-Or the translation is accurate, but the original message is bogus.
-
-<p>
-For example, some systems' firmware overreports the number of CPUs,
-sometimes by a large factor.
-If RCU naively believed the firmware, as it used to do,
-it would create too many per-CPU kthreads.
-Although the resulting system will still run correctly, the extra
-kthreads needlessly consume memory and can cause confusion
-when they show up in <tt>ps</tt> listings.
-
-<p>
-RCU must therefore wait for a given CPU to actually come online before
-it can allow itself to believe that the CPU actually exists.
-The resulting “ghost CPUs” (which are never going to
-come online) cause a number of
-<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
-
-<h3><a name="Early Boot">Early Boot</a></h3>
-
-<p>
-The Linux kernel's boot sequence is an interesting process,
-and RCU is used early, even before <tt>rcu_init()</tt>
-is invoked.
-In fact, a number of RCU's primitives can be used as soon as the
-initial task's <tt>task_struct</tt> is available and the
-boot CPU's per-CPU variables are set up.
-The read-side primitives (<tt>rcu_read_lock()</tt>,
-<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
-and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
-as will <tt>rcu_assign_pointer()</tt>.
-
-<p>
-Although <tt>call_rcu()</tt> may be invoked at any
-time during boot, callbacks are not guaranteed to be invoked until after
-the scheduler is fully up and running.
-This delay in callback invocation is due to the fact that RCU does not
-invoke callbacks until it is fully initialized, and this full initialization
-cannot occur until after the scheduler has initialized itself to the
-point where RCU can spawn and run its kthreads.
-In theory, it would be possible to invoke callbacks earlier,
-however, this is not a panacea because there would be severe restrictions
-on what operations those callbacks could invoke.
-
-<p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
-<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
-(<a href="#Bottom-Half Flavor">discussed below</a>),
-and
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
-will all operate normally
-during very early boot, the reason being that there is only one CPU
-and preemption is disabled.
-This means that the call <tt>synchronize_rcu()</tt> (or friends)
-itself is a quiescent
-state and thus a grace period, so the early-boot implementation can
-be a no-op.
-
-<p>
-Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
-continue to operate normally through the remainder of boot, courtesy
-of the fact that preemption is disabled across their RCU read-side
-critical sections and also courtesy of the fact that there is still
-only one CPU.
-However, once the scheduler starts initializing, preemption is enabled.
-There is still only a single CPU, but the fact that preemption is enabled
-means that the no-op implementation of <tt>synchronize_rcu()</tt> no
-longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
-Therefore, as soon as the scheduler starts initializing, the early-boot
-fastpath is disabled.
-This means that <tt>synchronize_rcu()</tt> switches to its runtime
-mode of operation where it posts callbacks, which in turn means that
-any call to <tt>synchronize_rcu()</tt> will block until the corresponding
-callback is invoked.
-Unfortunately, the callback cannot be invoked until RCU's runtime
-grace-period machinery is up and running, which cannot happen until
-the scheduler has initialized itself sufficiently to allow RCU's
-kthreads to be spawned.
-Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
-initialization can result in deadlock.
-
-<p>@@QQ@@
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<p>@@QQA@@
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-<p>@@QQE@@
-
-<p>
-I learned of these boot-time requirements as a result of a series of
-system hangs.
-
-<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
-
-<p>
-The Linux kernel has interrupts, and RCU read-side critical sections are
-legal within interrupt handlers and within interrupt-disabled regions
-of code, as are invocations of <tt>call_rcu()</tt>.
-
-<p>
-Some Linux-kernel architectures can enter an interrupt handler from
-non-idle process context, and then just never leave it, instead stealthily
-transitioning back to process context.
-This trick is sometimes used to invoke system calls from inside the kernel.
-These “half-interrupts” mean that RCU has to be very careful
-about how it counts interrupt nesting levels.
-I learned of this requirement the hard way during a rewrite
-of RCU's dyntick-idle code.
-
-<p>
-The Linux kernel has non-maskable interrupts (NMIs), and
-RCU read-side critical sections are legal within NMI handlers.
-Thankfully, RCU update-side primitives, including
-<tt>call_rcu()</tt>, are prohibited within NMI handlers.
-
-<p>
-The name notwithstanding, some Linux-kernel architectures
-can have nested NMIs, which RCU must handle correctly.
-Andy Lutomirski
-<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
-with this requirement;
-he also kindly surprised me with
-<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
-that meets this requirement.
-
-<h3><a name="Loadable Modules">Loadable Modules</a></h3>
-
-<p>
-The Linux kernel has loadable modules, and these modules can
-also be unloaded.
-After a given module has been unloaded, any attempt to call
-one of its functions results in a segmentation fault.
-The module-unload functions must therefore cancel any
-delayed calls to loadable-module functions, for example,
-any outstanding <tt>mod_timer()</tt> must be dealt with
-via <tt>del_timer_sync()</tt> or similar.
-
-<p>
-Unfortunately, there is no way to cancel an RCU callback;
-once you invoke <tt>call_rcu()</tt>, the callback function is
-going to eventually be invoked, unless the system goes down first.
-Because it is normally considered socially irresponsible to crash the system
-in response to a module unload request, we need some other way
-to deal with in-flight RCU callbacks.
-
-<p>
-RCU therefore provides
-<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
-which waits until all in-flight RCU callbacks have been invoked.
-If a module uses <tt>call_rcu()</tt>, its exit function should therefore
-prevent any future invocation of <tt>call_rcu()</tt>, then invoke
-<tt>rcu_barrier()</tt>.
-In theory, the underlying module-unload code could invoke
-<tt>rcu_barrier()</tt> unconditionally, but in practice this would
-incur unacceptable latencies.
-
-<p>
-Nikita Danilov noted this requirement for an analogous filesystem-unmount
-situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
-The need for <tt>rcu_barrier()</tt> for module unloading became
-apparent later.
-
-<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
-
-<p>
-The Linux kernel supports CPU hotplug, which means that CPUs
-can come and go.
-It is of course illegal to use any RCU API member from an offline CPU.
-This requirement was present from day one in DYNIX/ptx, but
-on the other hand, the Linux kernel's CPU-hotplug implementation
-is “interesting.”
-
-<p>
-The Linux-kernel CPU-hotplug implementation has notifiers that
-are used to allow the various kernel subsystems (including RCU)
-to respond appropriately to a given CPU-hotplug operation.
-Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even normal synchronous grace-period operations
-such as <tt>synchronize_rcu()</tt>.
-However, expedited grace-period operations such as
-<tt>synchronize_rcu_expedited()</tt> are not supported,
-due to the fact that current implementations block CPU-hotplug
-operations, which could result in deadlock.
-
-<p>
-In addition, all-callback-wait operations such as
-<tt>rcu_barrier()</tt> are also not supported, due to the
-fact that there are phases of CPU-hotplug operations where
-the outgoing CPU's callbacks will not be invoked until after
-the CPU-hotplug operation ends, which could also result in deadlock.
-
-<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
-
-<p>
-RCU depends on the scheduler, and the scheduler uses RCU to
-protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1) it releases them before exiting that same
-RCU read-side critical section, or
-(2) interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
-In particular, <tt>rcu_read_unlock()</tt> must tolerate an
-interrupt where the interrupt handler invokes both
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-This possibility requires <tt>rcu_read_unlock()</tt> to use
-negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-<p>
-This pair of mutual scheduler-RCU requirements came as a
-<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
-
-<p>
-As noted above, RCU makes use of kthreads, and it is necessary to
-avoid excessive CPU-time accumulation by these kthreads.
-This requirement was no surprise, but RCU's violation of it
-when running context-switch-heavy workloads when built with
-<tt>CONFIG_NO_HZ_FULL=y</tt>
-<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
-RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
-but there is room for further improvement.
-
-<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
-
-<p>
-It is possible to use tracing on RCU code, but tracing itself
-uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
-is provided for use by tracing, which avoids the destructive
-recursion that could otherwise ensue.
-This API is also used by virtualization in some architectures,
-where RCU readers execute in environments in which tracing
-cannot be used.
-The tracing folks both located the requirement and provided the
-needed fix, so this surprise requirement was relatively painless.
-
-<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
-
-<p>
-Interrupting idle CPUs is considered socially unacceptable,
-especially by people with battery-powered embedded systems.
-RCU therefore conserves energy by detecting which CPUs are
-idle, including tracking CPUs that have been interrupted from idle.
-This is a large part of the energy-efficiency requirement,
-so I learned of this via an irate phone call.
-
-<p>
-Because RCU avoids interrupting idle CPUs, it is illegal to
-execute an RCU read-side critical section on an idle CPU.
-(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
-if you try it.)
-The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
-event tracing is provided to work around this restriction.
-In addition, <tt>rcu_is_watching()</tt> may be used to
-test whether or not it is currently legal to run RCU read-side
-critical sections on this CPU.
-I learned of the need for diagnostics on the one hand
-and <tt>RCU_NONIDLE()</tt> on the other while inspecting
-idle-loop code.
-Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
-which is used quite heavily in the idle loop.
-
-<p>
-It is similarly socially unacceptable to interrupt an
-<tt>nohz_full</tt> CPU running in userspace.
-RCU must therefore track <tt>nohz_full</tt> userspace
-execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
-time, and be able to determine whether or not some other CPU spent
-any time idle and/or executing in userspace.
-
-<p>
-These energy-efficiency requirements have proven quite difficult to
-understand and to meet, for example, there have been more than five
-clean-sheet rewrites of RCU's energy-efficiency code, the last of
-which was finally able to demonstrate
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
-As noted earlier,
-I learned of many of these requirements via angry phone calls:
-Flaming me on the Linux-kernel mailing list was apparently not
-sufficient to fully vent their ire at RCU's energy-efficiency bugs!
-
-<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
-
-<p>
-Although small-memory non-realtime systems can simply use Tiny RCU,
-code size is only one aspect of memory efficiency.
-Another aspect is the size of the <tt>rcu_head</tt> structure
-used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
-Although this structure contains nothing more than a pair of pointers,
-it does appear in many RCU-protected data structures, including
-some that are size critical.
-The <tt>page</tt> structure is a case in point, as evidenced by
-the many occurrences of the <tt>union</tt> keyword within that structure.
-
-<p>
-This need for memory efficiency is one reason that RCU uses hand-crafted
-singly linked lists to track the <tt>rcu_head</tt> structures that
-are waiting for a grace period to elapse.
-It is also the reason why <tt>rcu_head</tt> structures do not contain
-debug information, such as fields tracking the file and line of the
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
-Although this information might appear in debug-only kernel builds at some
-point, in the meantime, the <tt>->func</tt> field will often provide
-the needed debug information.
-
-<p>
-However, in some cases, the need for memory efficiency leads to even
-more extreme measures.
-Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
-shares storage with a great many other structures that are used at
-various points in the corresponding page's lifetime.
-In order to correctly resolve certain
-<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
-the Linux kernel's memory-management subsystem needs a particular bit
-to remain zero during all phases of grace-period processing,
-and that bit happens to map to the bottom bit of the
-<tt>rcu_head</tt> structure's <tt>->next</tt> field.
-RCU makes this guarantee as long as <tt>call_rcu()</tt>
-is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
-or some future “lazy”
-variant of <tt>call_rcu()</tt> that might one day be created for
-energy-efficiency purposes.
-
-<h3><a name="Performance, Scalability, Response Time, and Reliability">
-Performance, Scalability, Response Time, and Reliability</a></h3>
-
-<p>
-Expanding on the
-<a href="#Performance and Scalability">earlier discussion</a>,
-RCU is used heavily by hot code paths in performance-critical
-portions of the Linux kernel's networking, security, virtualization,
-and scheduling code paths.
-RCU must therefore use efficient implementations, especially in its
-read-side primitives.
-To that end, it would be good if preemptible RCU's implementation
-of <tt>rcu_read_lock()</tt> could be inlined, however, doing
-this requires resolving <tt>#include</tt> issues with the
-<tt>task_struct</tt> structure.
-
-<p>
-The Linux kernel supports hardware configurations with up to
-4096 CPUs, which means that RCU must be extremely scalable.
-Algorithms that involve frequent acquisitions of global locks or
-frequent atomic operations on global variables simply cannot be
-tolerated within the RCU implementation.
-RCU therefore makes heavy use of a combining tree based on the
-<tt>rcu_node</tt> structure.
-RCU is required to tolerate all CPUs continuously invoking any
-combination of RCU's runtime primitives with minimal per-operation
-overhead.
-In fact, in many cases, increasing load must <i>decrease</i> the
-per-operation overhead, witness the batching optimizations for
-<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
-<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
-As a general rule, RCU must cheerfully accept whatever the
-rest of the Linux kernel decides to throw at it.
-
-<p>
-The Linux kernel is used for real-time workloads, especially
-in conjunction with the
-<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
-The real-time-latency response requirements are such that the
-traditional approach of disabling preemption across RCU
-read-side critical sections is inappropriate.
-Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
-use an RCU implementation that allows RCU read-side critical
-sections to be preempted.
-This requirement made its presence known after users made it
-clear that an earlier
-<a href="https://lwn.net/Articles/107930/">real-time patch</a>
-did not meet their needs, in conjunction with some
-<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
-encountered by a very early version of the -rt patchset.
-
-<p>
-In addition, RCU must make do with a sub-100-microsecond real-time latency
-budget.
-In fact, on smaller systems with the -rt patchset, the Linux kernel
-provides sub-20-microsecond real-time latencies for the whole kernel,
-including RCU.
-RCU's scalability and latency must therefore be sufficient for
-these sorts of configurations.
-To my surprise, the sub-100-microsecond real-time latency budget
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
-applies to even the largest systems [PDF]</a>,
-up to and including systems with 4096 CPUs.
-This real-time requirement motivated the grace-period kthread, which
-also simplified handling of a number of race conditions.
-
-<p>
-Finally, RCU's status as a synchronization primitive means that
-any RCU failure can result in arbitrary memory corruption that can be
-extremely difficult to debug.
-This means that RCU must be extremely reliable, which in
-practice also means that RCU must have an aggressive stress-test
-suite.
-This stress-test suite is called <tt>rcutorture</tt>.
-
-<p>
-Although the need for <tt>rcutorture</tt> was no surprise,
-the current immense popularity of the Linux kernel is posing
-interesting—and perhaps unprecedented—validation
-challenges.
-To see this, keep in mind that there are well over one billion
-instances of the Linux kernel running today, given Android
-smartphones, Linux-powered televisions, and servers.
-This number can be expected to increase sharply with the advent of
-the celebrated Internet of Things.
-
-<p>
-Suppose that RCU contains a race condition that manifests on average
-once per million years of runtime.
-This bug will be occurring about three times per <i>day</i> across
-the installed base.
-RCU could simply hide behind hardware error rates, given that no one
-should really expect their smartphone to last for a million years.
-However, anyone taking too much comfort from this thought should
-consider the fact that in most jurisdictions, a successful multi-year
-test of a given mechanism, which might include a Linux kernel,
-suffices for a number of types of safety-critical certifications.
-In fact, rumor has it that the Linux kernel is already being used
-in production for safety-critical applications.
-I don't know about you, but I would feel quite bad if a bug in RCU
-killed someone.
-Which might explain my recent focus on validation and verification.
-
-<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
-
-<p>
-One of the more surprising things about RCU is that there are now
-no fewer than five <i>flavors</i>, or API families.
-In addition, the primary flavor that has been the sole focus up to
-this point has two different implementations, non-preemptible and
-preemptible.
-The other four flavors are listed below, with requirements for each
-described in a separate section.
-
-<ol>
-<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
-<li> <a href="#Sched Flavor">Sched Flavor</a>
-<li> <a href="#Sleepable RCU">Sleepable RCU</a>
-<li> <a href="#Tasks RCU">Tasks RCU</a>
-</ol>
-
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
-
-<p>
-The softirq-disable (AKA “bottom-half”,
-hence the “_bh” abbreviations)
-flavor of RCU, or <i>RCU-bh</i>, was developed by
-Dipankar Sarma to provide a flavor of RCU that could withstand the
-network-based denial-of-service attacks researched by Robert
-Olsson.
-These attacks placed so much networking load on the system
-that some of the CPUs never exited softirq execution,
-which in turn prevented those CPUs from ever executing a context switch,
-which, in the RCU implementation of that time, prevented grace periods
-from ever ending.
-The result was an out-of-memory condition and a system hang.
-
-<p>
-The solution was the creation of RCU-bh, which does
-<tt>local_bh_disable()</tt>
-across its read-side critical sections, and which uses the transition
-from one type of softirq processing to another as a quiescent state
-in addition to context switch, idle, user mode, and offline.
-This means that RCU-bh grace periods can complete even when some of
-the CPUs execute in softirq indefinitely, thus allowing algorithms
-based on RCU-bh to withstand network-based denial-of-service attacks.
-
-<p>
-Because
-<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
-disable and re-enable softirq handlers, any attempt to start a softirq
-handlers during the
-RCU-bh read-side critical section will be deferred.
-In this case, <tt>rcu_read_unlock_bh()</tt>
-will invoke softirq processing, which can take considerable time.
-One can of course argue that this softirq overhead should be associated
-with the code following the RCU-bh read-side critical section rather
-than <tt>rcu_read_unlock_bh()</tt>, but the fact
-is that most profiling tools cannot be expected to make this sort
-of fine distinction.
-For example, suppose that a three-millisecond-long RCU-bh read-side
-critical section executes during a time of heavy networking load.
-There will very likely be an attempt to invoke at least one softirq
-handler during that three milliseconds, but any such invocation will
-be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
-This can of course make it appear at first glance as if
-<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
-includes
-<tt>rcu_read_lock_bh()</tt>,
-<tt>rcu_read_unlock_bh()</tt>,
-<tt>rcu_dereference_bh()</tt>,
-<tt>rcu_dereference_bh_check()</tt>,
-<tt>synchronize_rcu_bh()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>,
-<tt>call_rcu_bh()</tt>,
-<tt>rcu_barrier_bh()</tt>, and
-<tt>rcu_read_lock_bh_held()</tt>.
-
-<h3><a name="Sched Flavor">Sched Flavor</a></h3>
-
-<p>
-Before preemptible RCU, waiting for an RCU grace period had the
-side effect of also waiting for all pre-existing interrupt
-and NMI handlers.
-However, there are legitimate preemptible-RCU implementations that
-do not have this property, given that any point in the code outside
-of an RCU read-side critical section can be a quiescent state.
-Therefore, <i>RCU-sched</i> was created, which follows “classic”
-RCU in that an RCU-sched grace period waits for for pre-existing
-interrupt and NMI handlers.
-In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
-APIs have identical implementations, while kernels built with
-<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
-
-<p>
-Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-disable and re-enable preemption, respectively.
-This means that if there was a preemption attempt during the
-RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
-will enter the scheduler, with all the latency and overhead entailed.
-Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
-as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
-However, the highest-priority task won't be preempted, so that task
-will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
-includes
-<tt>rcu_read_lock_sched()</tt>,
-<tt>rcu_read_unlock_sched()</tt>,
-<tt>rcu_read_lock_sched_notrace()</tt>,
-<tt>rcu_read_unlock_sched_notrace()</tt>,
-<tt>rcu_dereference_sched()</tt>,
-<tt>rcu_dereference_sched_check()</tt>,
-<tt>synchronize_sched()</tt>,
-<tt>synchronize_rcu_sched_expedited()</tt>,
-<tt>call_rcu_sched()</tt>,
-<tt>rcu_barrier_sched()</tt>, and
-<tt>rcu_read_lock_sched_held()</tt>.
-However, anything that disables preemption also marks an RCU-sched
-read-side critical section, including
-<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
-<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
-and so on.
-
-<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
-
-<p>
-For well over a decade, someone saying “I need to block within
-an RCU read-side critical section” was a reliable indication
-that this someone did not understand RCU.
-After all, if you are always blocking in an RCU read-side critical
-section, you can probably afford to use a higher-overhead synchronization
-mechanism.
-However, that changed with the advent of the Linux kernel's notifiers,
-whose RCU read-side critical
-sections almost never sleep, but sometimes need to.
-This resulted in the introduction of
-<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
-or <i>SRCU</i>.
-
-<p>
-SRCU allows different domains to be defined, with each such domain
-defined by an instance of an <tt>srcu_struct</tt> structure.
-A pointer to this structure must be passed in to each SRCU function,
-for example, <tt>synchronize_srcu(&ss)</tt>, where
-<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
-The key benefit of these domains is that a slow SRCU reader in one
-domain does not delay an SRCU grace period in some other domain.
-That said, one consequence of these domains is that read-side code
-must pass a “cookie” from <tt>srcu_read_lock()</tt>
-to <tt>srcu_read_unlock()</tt>, for example, as follows:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&ss);
- 4 do_something();
- 5 srcu_read_unlock(&ss, idx);
-</pre>
-</blockquote>
-
-<p>
-As noted above, it is legal to block within SRCU read-side critical sections,
-however, with great power comes great responsibility.
-If you block forever in one of a given domain's SRCU read-side critical
-sections, then that domain's grace periods will also be blocked forever.
-Of course, one good way to block forever is to deadlock, which can
-happen if any operation in a given domain's SRCU read-side critical
-section can block waiting, either directly or indirectly, for that domain's
-grace period to elapse.
-For example, this results in a self-deadlock:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&ss);
- 4 do_something();
- 5 synchronize_srcu(&ss);
- 6 srcu_read_unlock(&ss, idx);
-</pre>
-</blockquote>
-
-<p>
-However, if line 5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
-deadlock would still be possible.
-Furthermore, if line 5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
-and if an <tt>ss1</tt>-domain SRCU read-side critical section
-acquired another mutex that was held across as <tt>ss</tt>-domain
-<tt>synchronize_srcu()</tt>,
-deadlock would again be possible.
-Such a deadlock cycle could extend across an arbitrarily large number
-of different SRCU domains.
-Again, with great power comes great responsibility.
-
-<p>
-Unlike the other RCU flavors, SRCU read-side critical sections can
-run on idle and even offline CPUs.
-This ability requires that <tt>srcu_read_lock()</tt> and
-<tt>srcu_read_unlock()</tt> contain memory barriers, which means
-that SRCU readers will run a bit slower than would RCU readers.
-It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
-API, which, in combination with <tt>srcu_read_unlock()</tt>,
-guarantees a full memory barrier.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
-includes
-<tt>srcu_read_lock()</tt>,
-<tt>srcu_read_unlock()</tt>,
-<tt>srcu_dereference()</tt>,
-<tt>srcu_dereference_check()</tt>,
-<tt>synchronize_srcu()</tt>,
-<tt>synchronize_srcu_expedited()</tt>,
-<tt>call_srcu()</tt>,
-<tt>srcu_barrier()</tt>, and
-<tt>srcu_read_lock_held()</tt>.
-It also includes
-<tt>DEFINE_SRCU()</tt>,
-<tt>DEFINE_STATIC_SRCU()</tt>, and
-<tt>init_srcu_struct()</tt>
-APIs for defining and initializing <tt>srcu_struct</tt> structures.
-
-<h3><a name="Tasks RCU">Tasks RCU</a></h3>
-
-<p>
-Some forms of tracing use “tramopolines” to handle the
-binary rewriting required to install different types of probes.
-It would be good to be able to free old trampolines, which sounds
-like a job for some form of RCU.
-However, because it is necessary to be able to install a trace
-anywhere in the code, it is not possible to use read-side markers
-such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-In addition, it does not work to have these markers in the trampoline
-itself, because there would need to be instructions following
-<tt>rcu_read_unlock()</tt>.
-Although <tt>synchronize_rcu()</tt> would guarantee that execution
-reached the <tt>rcu_read_unlock()</tt>, it would not be able to
-guarantee that execution had completely left the trampoline.
-
-<p>
-The solution, in the form of
-<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
-is to have implicit
-read-side critical sections that are delimited by voluntary context
-switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched_rcu_qs()</tt>, and
-<tt>synchronize_rcu_tasks()</tt>.
-In addition, transitions to and from userspace execution also delimit
-tasks-RCU read-side critical sections.
-
-<p>
-The tasks-RCU API is quite compact, consisting only of
-<tt>call_rcu_tasks()</tt>,
-<tt>synchronize_rcu_tasks()</tt>, and
-<tt>rcu_barrier_tasks()</tt>.
-
-<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
-
-<p>
-One of the tricks that RCU uses to attain update-side scalability is
-to increase grace-period latency with increasing numbers of CPUs.
-If this becomes a serious problem, it will be necessary to rework the
-grace-period state machine so as to avoid the need for the additional
-latency.
-
-<p>
-Expedited grace periods scan the CPUs, so their latency and overhead
-increases with increasing numbers of CPUs.
-If this becomes a serious problem on large systems, it will be necessary
-to do some redesign to avoid this scalability problem.
-
-<p>
-RCU disables CPU hotplug in a few places, perhaps most notably in the
-expedited grace-period and <tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use expedited grace periods in CPU-hotplug
-notifiers, it will be necessary to avoid disabling CPU hotplug.
-This would introduce some complexity, so there had better be a <i>very</i>
-good reason.
-
-<p>
-The tradeoff between grace-period latency on the one hand and interruptions
-of other CPUs on the other hand may need to be re-examined.
-The desire is of course for zero grace-period latency as well as zero
-interprocessor interrupts undertaken during an expedited grace period
-operation.
-While this ideal is unlikely to be achievable, it is quite possible that
-further improvements can be made.
-
-<p>
-The multiprocessor implementations of RCU use a combining tree that
-groups CPUs so as to reduce lock contention and increase cache locality.
-However, this combining tree does not spread its memory across NUMA
-nodes nor does it align the CPU groups with hardware features such
-as sockets or cores.
-Such spreading and alignment is currently believed to be unnecessary
-because the hotpath read-side primitives do not access the combining
-tree, nor does <tt>call_rcu()</tt> in the common case.
-If you believe that your architecture needs such spreading and alignment,
-then your architecture should also benefit from the
-<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
-to the number of CPUs in a socket, NUMA node, or whatever.
-If the number of CPUs is too large, use a fraction of the number of
-CPUs.
-If the number of CPUs is a large prime number, well, that certainly
-is an “interesting” architectural choice!
-More flexible arrangements might be considered, but only if
-<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
-if the inadequacy has been demonstrated by a carefully run and
-realistic system-level workload.
-
-<p>
-Please note that arrangements that require RCU to remap CPU numbers will
-require extremely good demonstration of need and full exploration of
-alternatives.
-
-<p>
-There is an embarrassingly large number of flavors of RCU, and this
-number has been increasing over time.
-Perhaps it will be possible to combine some at some future date.
-
-<p>
-RCU's various kthreads are reasonably recent additions.
-It is quite likely that adjustments will be required to more gracefully
-handle extreme loads.
-It might also be necessary to be able to relate CPU utilization by
-RCU's kthreads and softirq handlers to the code that instigated this
-CPU utilization.
-For example, RCU callback overhead might be charged back to the
-originating <tt>call_rcu()</tt> instance, though probably not
-in production kernels.
-
-<h2><a name="Summary">Summary</a></h2>
-
-<p>
-This document has presented more than two decade's worth of RCU
-requirements.
-Given that the requirements keep changing, this will not be the last
-word on this subject, but at least it serves to get an important
-subset of the requirements set forth.
-
-<h2><a name="Acknowledgments">Acknowledgments</a></h2>
-
-I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
-Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
-Andy Lutomirski for their help in rendering
-this article human readable, and to Michelle Rankin for her support
-of this effort.
-Other contributions are acknowledged in the Linux kernel's git archive.
-The cartoon is copyright (c) 2013 by Melissa Broussard,
-and is provided
-under the terms of the Creative Commons Attribution-Share Alike 3.0
-United States license.
-
-<p>@@QQAL@@
-
-</body></html>
diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh
deleted file mode 100755
index d354f06..0000000
--- a/Documentation/RCU/Design/htmlqqz.sh
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/sh
-#
-# Usage: sh htmlqqz.sh file
-#
-# Extracts and converts quick quizzes in a proto-HTML document file.htmlx.
-# Commands, all of which must be on a line by themselves:
-#
-# "<p>@@QQ@@": Start of a quick quiz.
-# "<p>@@QQA@@": Start of a quick-quiz answer.
-# "<p>@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz.
-# "<p>@@QQAL@@": Place to put quick-quiz answer list.
-#
-# Places the result in file.html.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (c) 2013 Paul E. McKenney, IBM Corporation.
-
-fn=$1
-if test ! -r $fn.htmlx
-then
- echo "Error: $fn.htmlx unreadable."
- exit 1
-fi
-
-echo "<!-- DO NOT HAND EDIT. -->" > $fn.html
-echo "<!-- Instead, edit $fn.htmlx and run 'sh htmlqqz.sh $fn' -->" >> $fn.html
-awk < $fn.htmlx >> $fn.html '
-
-state == "" && $1 != "<p>@@QQ@@" && $1 != "<p>@@QQAL@@" {
- print $0;
- if ($0 ~ /^<p>@@QQ/)
- print "Bad Quick Quiz command: " NR " (expected <p>@@QQ@@ or <p>@@QQAL@@)." > "/dev/stderr"
- next;
-}
-
-state == "" && $1 == "<p>@@QQ@@" {
- qqn++;
- qqlineno = NR;
- haveqq = 1;
- state = "qq";
- print "<p><a name=\"Quick Quiz " qqn "\"><b>Quick Quiz " qqn "</b>:</a>"
- next;
-}
-
-state == "qq" && $1 != "<p>@@QQA@@" {
- qq[qqn] = qq[qqn] $0 "\n";
- print $0
- if ($0 ~ /^<p>@@QQ/)
- print "Bad Quick Quiz command: " NR ". (expected <p>@@QQA@@)" > "/dev/stderr"
- next;
-}
-
-state == "qq" && $1 == "<p>@@QQA@@" {
- state = "qqa";
- print "<br><a href=\"#qq" qqn "answer\">Answer</a>"
- next;
-}
-
-state == "qqa" && $1 != "<p>@@QQE@@" {
- qqa[qqn] = qqa[qqn] $0 "\n";
- if ($0 ~ /^<p>@@QQ/)
- print "Bad Quick Quiz command: " NR " (expected <p>@@QQE@@)." > "/dev/stderr"
- next;
-}
-
-state == "qqa" && $1 == "<p>@@QQE@@" {
- state = "";
- next;
-}
-
-state == "" && $1 == "<p>@@QQAL@@" {
- haveqq = "";
- print "<h3><a name=\"Answers to Quick Quizzes\">"
- print "Answers to Quick Quizzes</a></h3>"
- print "";
- for (i = 1; i <= qqn; i++) {
- print "<a name=\"qq" i "answer\"></a>"
- print "<p><b>Quick Quiz " i "</b>:"
- print qq[i];
- print "";
- print "</p><p><b>Answer</b>:"
- print qqa[i];
- print "";
- print "</p><p><a href=\"#Quick%20Quiz%20" i "\"><b>Back to Quick Quiz " i "</b>.</a>"
- print "";
- }
- next;
-}
-
-END {
- if (state != "")
- print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr"
- else if (haveqq)
- print "Missing \"<p>@@QQAL@@\", no Quick Quiz." > "/dev/stderr"
-}'
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index ec6998b..00a3a38 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,17 +237,17 @@
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
These fields are as follows:
o "s" is the sequence number, with an odd number indicating that
an expedited grace period is in progress.
-o "wd0", "wd1", "wd2", and "wd3" are the number of times that an
- attempt to start an expedited grace period found that someone
- else had completed an expedited grace period that satisfies the
- attempted request. "Our work is done."
+o "wd1", "wd2", and "wd3" are the number of times that an attempt
+ to start an expedited grace period found that someone else had
+ completed an expedited grace period that satisfies the attempted
+ request. "Our work is done."
o "n" is number of times that a concurrent CPU-hotplug operation
forced a fallback to a normal grace period.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index dc49c67..111770f 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -681,22 +681,30 @@
RCU is analogous to reader-writer locking. The following unified
diff shows how closely related RCU and reader-writer locking can be.
+ @@ -5,5 +5,5 @@ struct el {
+ int data;
+ /* Other data fields */
+ };
+ -rwlock_t listmutex;
+ +spinlock_t listmutex;
+ struct el head;
+
@@ -13,15 +14,15 @@
struct list_head *lp;
struct el *p;
- - read_lock();
+ - read_lock(&listmutex);
- list_for_each_entry(p, head, lp) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, head, lp) {
if (p->key == key) {
*result = p->data;
- - read_unlock();
+ - read_unlock(&listmutex);
+ rcu_read_unlock();
return 1;
}
}
- - read_unlock();
+ - read_unlock(&listmutex);
+ rcu_read_unlock();
return 0;
}
@@ -732,7 +740,7 @@
5 int data; 5 int data;
6 /* Other data fields */ 6 /* Other data fields */
7 }; 7 };
- 8 spinlock_t listmutex; 8 spinlock_t listmutex;
+ 8 rwlock_t listmutex; 8 spinlock_t listmutex;
9 struct el head; 9 struct el head;
1 int search(long key, int *result) 1 int search(long key, int *result)
@@ -740,15 +748,15 @@
3 struct list_head *lp; 3 struct list_head *lp;
4 struct el *p; 4 struct el *p;
5 5
- 6 read_lock(); 6 rcu_read_lock();
+ 6 read_lock(&listmutex); 6 rcu_read_lock();
7 list_for_each_entry(p, head, lp) { 7 list_for_each_entry_rcu(p, head, lp) {
8 if (p->key == key) { 8 if (p->key == key) {
9 *result = p->data; 9 *result = p->data;
-10 read_unlock(); 10 rcu_read_unlock();
+10 read_unlock(&listmutex); 10 rcu_read_unlock();
11 return 1; 11 return 1;
12 } 12 }
13 } 13 }
-14 read_unlock(); 14 rcu_read_unlock();
+14 read_unlock(&listmutex); 14 rcu_read_unlock();
15 return 0; 15 return 0;
16 } 16 }
diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
index 35c3f54..eb651a6 100644
--- a/Documentation/acpi/initrd_table_override.txt
+++ b/Documentation/acpi/initrd_table_override.txt
@@ -1,5 +1,5 @@
-Overriding ACPI tables via initrd
-=================================
+Upgrading ACPI tables via initrd
+================================
1) Introduction (What is this about)
2) What is this for
@@ -9,12 +9,14 @@
1) What is this about
---------------------
-If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible to
-override nearly any ACPI table provided by the BIOS with an instrumented,
-modified one.
+If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
+upgrade the ACPI execution environment that is defined by the ACPI tables
+via upgrading the ACPI tables provided by the BIOS with an instrumented,
+modified, more recent version one, or installing brand new ACPI tables.
-For a full list of ACPI tables that can be overridden, take a look at
-the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/osl.c
+For a full list of ACPI tables that can be upgraded/installed, take a look
+at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
+drivers/acpi/tables.c.
All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
be overridable, except:
- ACPI_SIG_RSDP (has a signature of 6 bytes)
@@ -25,17 +27,20 @@
2) What is this for
-------------------
-Please keep in mind that this is a debug option.
-ACPI tables should not get overridden for productive use.
-If BIOS ACPI tables are overridden the kernel will get tainted with the
-TAINT_OVERRIDDEN_ACPI_TABLE flag.
-Complain to your platform/BIOS vendor if you find a bug which is so sever
-that a workaround is not accepted in the Linux kernel.
+Complain to your platform/BIOS vendor if you find a bug which is so severe
+that a workaround is not accepted in the Linux kernel. And this facility
+allows you to upgrade the buggy tables before your platform/BIOS vendor
+releases an upgraded BIOS binary.
-Still, it can and should be enabled in any kernel, because:
- - There is no functional change with not instrumented initrds
- - It provides a powerful feature to easily debug and test ACPI BIOS table
- compatibility with the Linux kernel.
+This facility can be used by platform/BIOS vendors to provide a Linux
+compatible environment without modifying the underlying platform firmware.
+
+This facility also provides a powerful feature to easily debug and test
+ACPI BIOS table compatibility with the Linux kernel by modifying old
+platform provided ACPI tables or inserting new ACPI tables.
+
+It can and should be enabled in any kernel because there is no functional
+change with not instrumented initrds.
3) How does it work
@@ -50,23 +55,31 @@
# For example add this statement into a _PRT (PCI Routing Table) function
# of the DSDT:
Store("HELLO WORLD", debug)
+# And increase the OEM Revision. For example, before modification:
+DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
+# After modification:
+DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
iasl -sa dsdt.dsl
# Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the
-# cpio archive.
-# The uncompressed cpio archive must be the first.
-# Other, typically compressed cpio archives, must be
-# concatenated on top of the uncompressed one.
+# They must be put into a /kernel/firmware/acpi directory inside the cpio
+# archive. Note that if the table put here matches a platform table
+# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
+# with a more recent OEM Revision, the platform table will be upgraded by
+# this table. If the table put here doesn't match a platform table
+# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
+# ID), this table will be appended.
mkdir -p kernel/firmware/acpi
cp dsdt.aml kernel/firmware/acpi
-# A maximum of: #define ACPI_OVERRIDE_TABLES 10
-# tables are currently allowed (see osl.c):
+# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
+# (see osl.c):
iasl -sa facp.dsl
iasl -sa ssdt1.dsl
cp facp.aml kernel/firmware/acpi
cp ssdt1.aml kernel/firmware/acpi
-# Create the uncompressed cpio archive and concatenate the original initrd
-# on top:
+# The uncompressed cpio archive must be the first. Other, typically
+# compressed cpio archives, must be concatenated on top of the uncompressed
+# one. Following command creates the uncompressed cpio archive and
+# concatenates the original initrd on top:
find kernel | cpio -H newc --create > /boot/instrumented_initrd
cat /boot/initrd >>/boot/instrumented_initrd
# reboot with increased acpi debug level, e.g. boot params:
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 56d6d8b..8d0df62 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -132,6 +132,10 @@
physical offset of the Image so it is recommended that the Image be
placed as close as possible to the start of system RAM.
+If an initrd/initramfs is passed to the kernel at boot, it must reside
+entirely within a 1 GB aligned physical memory window of up to 32 GB in
+size that fully covers the kernel Image as well.
+
Any memory described to the kernel (even that below the start of the
image) which is not marked as reserved from the kernel (e.g., with a
memreserve region in the device tree) will be considered as available to
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
index 885f93d..5a6b160 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
@@ -3,6 +3,7 @@
The ECC Manager counts and corrects single bit errors and counts/handles
double bit errors which are uncorrectable.
+Cyclone5 and Arria5 ECC Manager
Required Properties:
- compatible : Should be "altr,socfpga-ecc-manager"
- #address-cells: must be 1
@@ -47,3 +48,52 @@
interrupts = <0 178 1>, <0 179 1>;
};
};
+
+Arria10 SoCFPGA ECC Manager
+The Arria10 SoC ECC Manager handles the IRQs for each peripheral
+in a shared register instead of individual IRQs like the Cyclone5
+and Arria5. Therefore the device tree is different as well.
+
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ecc-manager"
+- altr,sysgr-syscon : phandle to Arria10 System Manager Block
+ containing the ECC manager registers.
+- #address-cells: must be 1
+- #size-cells: must be 1
+- interrupts : Should be single bit error interrupt, then double bit error
+ interrupt. Note the rising edge type.
+- ranges : standard definition, should translate from local addresses
+
+Subcomponents:
+
+L2 Cache ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+
+On-Chip RAM ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ocram-ecc"
+- reg : Address and size for ECC block registers.
+
+Example:
+
+ eccmgr: eccmgr@ffd06000 {
+ compatible = "altr,socfpga-a10-ecc-manager";
+ altr,sysmgr-syscon = <&sysmgr>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+
+ l2-ecc@ffd06010 {
+ compatible = "altr,socfpga-a10-l2-ecc";
+ reg = <0xffd06010 0x4>;
+ };
+
+ ocram-ecc@ff8c3000 {
+ compatible = "altr,socfpga-a10-ocram-ecc";
+ reg = <0xff8c3000 0x90>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 6eb73be..74d5417 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -22,10 +22,11 @@
"arm,arm11mpcore-pmu"
"arm,arm1176-pmu"
"arm,arm1136-pmu"
+ "brcm,vulcan-pmu"
+ "cavium,thunder-pmu"
"qcom,scorpion-pmu"
"qcom,scorpion-mp-pmu"
"qcom,krait-pmu"
- "cavium,thunder-pmu"
- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
interrupt (PPI) then 1 interrupt should be specified.
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
new file mode 100644
index 0000000..fd459f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
@@ -0,0 +1,26 @@
+
+* Samsung Exynos NoC (Network on Chip) Probe device
+
+The Samsung Exynos542x SoC has NoC (Network on Chip) Probe for NoC bus.
+NoC provides the primitive values to get the performance data. The packets
+that the Network on Chip (NoC) probes detects are transported over
+the network infrastructure to observer units. You can configure probes to
+capture packets with header or data on the data request response network,
+or as traffic debug or statistic collectors. Exynos542x bus has multiple
+NoC probes to provide bandwidth information about behavior of the SoC
+that you can use while analyzing system performance.
+
+Required properties:
+- compatible: Should be "samsung,exynos5420-nocp"
+- reg: physical base address of each NoC Probe and length of memory mapped region.
+
+Optional properties:
+- clock-names : the name of clock used by the NoC Probe, "nocp"
+- clocks : phandles for clock specified in "clock-names" property
+
+Example : NoC Probe nodes in Device Tree are listed below.
+
+ nocp_mem0_0: nocp@10CA1000 {
+ compatible = "samsung,exynos5420-nocp";
+ reg = <0x10CA1000 0x200>;
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
new file mode 100644
index 0000000..d3ec8e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -0,0 +1,409 @@
+* Generic Exynos Bus frequency device
+
+The Samsung Exynos SoC has many buses for data transfer between DRAM
+and sub-blocks in SoC. Most Exynos SoCs share the common architecture
+for buses. Generally, each bus of Exynos SoC includes a source clock
+and a power line, which are able to change the clock frequency
+of the bus in runtime. To monitor the usage of each bus in runtime,
+the driver uses the PPMU (Platform Performance Monitoring Unit), which
+is able to measure the current load of sub-blocks.
+
+The Exynos SoC includes the various sub-blocks which have the each AXI bus.
+The each AXI bus has the owned source clock but, has not the only owned
+power line. The power line might be shared among one more sub-blocks.
+So, we can divide into two type of device as the role of each sub-block.
+There are two type of bus devices as following:
+- parent bus device
+- passive bus device
+
+Basically, parent and passive bus device share the same power line.
+The parent bus device can only change the voltage of shared power line
+and the rest bus devices (passive bus device) depend on the decision of
+the parent bus device. If there are three blocks which share the VDD_xxx
+power line, Only one block should be parent device and then the rest blocks
+should depend on the parent device as passive device.
+
+ VDD_xxx |--- A block (parent)
+ |--- B block (passive)
+ |--- C block (passive)
+
+There are a little different composition among Exynos SoC because each Exynos
+SoC has different sub-blocks. Therefore, such difference should be specified
+in devicetree file instead of each device driver. In result, this driver
+is able to support the bus frequency for all Exynos SoCs.
+
+Required properties for all bus devices:
+- compatible: Should be "samsung,exynos-bus".
+- clock-names : the name of clock used by the bus, "bus".
+- clocks : phandles for clock specified in "clock-names" property.
+- operating-points-v2: the OPP table including frequency/voltage information
+ to support DVFS (Dynamic Voltage/Frequency Scaling) feature.
+
+Required properties only for parent bus device:
+- vdd-supply: the regulator to provide the buses with the voltage.
+- devfreq-events: the devfreq-event device to monitor the current utilization
+ of buses.
+
+Required properties only for passive bus device:
+- devfreq: the parent bus device.
+
+Optional properties only for parent bus device:
+- exynos,saturation-ratio: the percentage value which is used to calibrate
+ the performance count against total cycle count.
+- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
+ which is used to calculate the max voltage.
+
+Detailed correlation between sub-blocks and power line according to Exynos SoC:
+- In case of Exynos3250, there are two power line as following:
+ VDD_MIF |--- DMC
+
+ VDD_INT |--- LEFTBUS (parent device)
+ |--- PERIL
+ |--- MFC
+ |--- G3D
+ |--- RIGHTBUS
+ |--- PERIR
+ |--- FSYS
+ |--- LCD0
+ |--- PERIR
+ |--- ISP
+ |--- CAM
+
+- In case of Exynos4210, there is one power line as following:
+ VDD_INT |--- DMC (parent device)
+ |--- LEFTBUS
+ |--- PERIL
+ |--- MFC(L)
+ |--- G3D
+ |--- TV
+ |--- LCD0
+ |--- RIGHTBUS
+ |--- PERIR
+ |--- MFC(R)
+ |--- CAM
+ |--- FSYS
+ |--- GPS
+ |--- LCD0
+ |--- LCD1
+
+- In case of Exynos4x12, there are two power line as following:
+ VDD_MIF |--- DMC
+
+ VDD_INT |--- LEFTBUS (parent device)
+ |--- PERIL
+ |--- MFC(L)
+ |--- G3D
+ |--- TV
+ |--- IMAGE
+ |--- RIGHTBUS
+ |--- PERIR
+ |--- MFC(R)
+ |--- CAM
+ |--- FSYS
+ |--- GPS
+ |--- LCD0
+ |--- ISP
+
+- In case of Exynos5422, there are two power line as following:
+ VDD_MIF |--- DREX 0 (parent device, DRAM EXpress controller)
+ |--- DREX 1
+
+ VDD_INT |--- NoC_Core (parent device)
+ |--- G2D
+ |--- G3D
+ |--- DISP1
+ |--- NoC_WCORE
+ |--- GSCL
+ |--- MSCL
+ |--- ISP
+ |--- MFC
+ |--- GEN
+ |--- PERIS
+ |--- PERIC
+ |--- FSYS
+ |--- FSYS2
+
+Example1:
+ Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
+ power line (regulator). The MIF (Memory Interface) AXI bus is used to
+ transfer data between DRAM and CPU and uses the VDD_MIF regulator.
+
+ - MIF (Memory Interface) block
+ : VDD_MIF |--- DMC (Dynamic Memory Controller)
+
+ - INT (Internal) block
+ : VDD_INT |--- LEFTBUS (parent device)
+ |--- PERIL
+ |--- MFC
+ |--- G3D
+ |--- RIGHTBUS
+ |--- FSYS
+ |--- LCD0
+ |--- PERIR
+ |--- ISP
+ |--- CAM
+
+ - MIF bus's frequency/voltage table
+ -----------------------
+ |Lv| Freq | Voltage |
+ -----------------------
+ |L1| 50000 |800000 |
+ |L2| 100000 |800000 |
+ |L3| 134000 |800000 |
+ |L4| 200000 |825000 |
+ |L5| 400000 |875000 |
+ -----------------------
+
+ - INT bus's frequency/voltage table
+ ----------------------------------------------------------
+ |Block|LEFTBUS|RIGHTBUS|MCUISP |ISP |PERIL ||VDD_INT |
+ | name| |LCD0 | | | || |
+ | | |FSYS | | | || |
+ | | |MFC | | | || |
+ ----------------------------------------------------------
+ |Mode |*parent|passive |passive|passive|passive|| |
+ ----------------------------------------------------------
+ |Lv |Frequency ||Voltage |
+ ----------------------------------------------------------
+ |L1 |50000 |50000 |50000 |50000 |50000 ||900000 |
+ |L2 |80000 |80000 |80000 |80000 |80000 ||900000 |
+ |L3 |100000 |100000 |100000 |100000 |100000 ||1000000 |
+ |L4 |134000 |134000 |200000 |200000 | ||1000000 |
+ |L5 |200000 |200000 |400000 |300000 | ||1000000 |
+ ----------------------------------------------------------
+
+Example2 :
+ The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi
+ is listed below:
+
+ bus_dmc: bus_dmc {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu_dmc CLK_DIV_DMC>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_dmc_opp_table>;
+ status = "disabled";
+ };
+
+ bus_dmc_opp_table: opp_table1 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ opp-microvolt = <800000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ opp-microvolt = <800000>;
+ };
+ opp@134000000 {
+ opp-hz = /bits/ 64 <134000000>;
+ opp-microvolt = <800000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ opp-microvolt = <825000>;
+ };
+ opp@400000000 {
+ opp-hz = /bits/ 64 <400000000>;
+ opp-microvolt = <875000>;
+ };
+ };
+
+ bus_leftbus: bus_leftbus {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_GDL>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_rightbus: bus_rightbus {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_GDR>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_lcd0: bus_lcd0 {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_160>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_fsys: bus_fsys {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_200>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_mcuisp: bus_mcuisp {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_400_MCUISP>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_mcuisp_opp_table>;
+ status = "disabled";
+ };
+
+ bus_isp: bus_isp {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_266>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_isp_opp_table>;
+ status = "disabled";
+ };
+
+ bus_peril: bus_peril {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_100>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_peril_opp_table>;
+ status = "disabled";
+ };
+
+ bus_mfc: bus_mfc {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_SCLK_MFC>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_leftbus_opp_table: opp_table1 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ opp-microvolt = <900000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ opp-microvolt = <900000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ opp-microvolt = <1000000>;
+ };
+ opp@134000000 {
+ opp-hz = /bits/ 64 <134000000>;
+ opp-microvolt = <1000000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ opp-microvolt = <1000000>;
+ };
+ };
+
+ bus_mcuisp_opp_table: opp_table2 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ };
+ opp@400000000 {
+ opp-hz = /bits/ 64 <400000000>;
+ };
+ };
+
+ bus_isp_opp_table: opp_table3 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ };
+ opp@300000000 {
+ opp-hz = /bits/ 64 <300000000>;
+ };
+ };
+
+ bus_peril_opp_table: opp_table4 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ };
+ };
+
+
+ Usage case to handle the frequency and voltage of bus on runtime
+ in exynos3250-rinato.dts is listed below:
+
+ &bus_dmc {
+ devfreq-events = <&ppmu_dmc0_3>, <&ppmu_dmc1_3>;
+ vdd-supply = <&buck1_reg>; /* VDD_MIF */
+ status = "okay";
+ };
+
+ &bus_leftbus {
+ devfreq-events = <&ppmu_leftbus_3>, <&ppmu_rightbus_3>;
+ vdd-supply = <&buck3_reg>;
+ status = "okay";
+ };
+
+ &bus_rightbus {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_lcd0 {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_fsys {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_mcuisp {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_isp {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_peril {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_mfc {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/hwmon/ltc2978.txt b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
index a7afbf6..bf2a47b 100644
--- a/Documentation/devicetree/bindings/hwmon/ltc2978.txt
+++ b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
@@ -13,6 +13,7 @@
* "lltc,ltc3886"
* "lltc,ltc3887"
* "lltc,ltm2987"
+ * "lltc,ltm4675"
* "lltc,ltm4676"
- reg: I2C slave address
diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt
index 6841984..af10678 100644
--- a/Documentation/devicetree/bindings/leds/common.txt
+++ b/Documentation/devicetree/bindings/leds/common.txt
@@ -37,6 +37,9 @@
property is mandatory for the LEDs in the non-flash modes
(e.g. torch or indicator).
+- panic-indicator : This property specifies that the LED should be used,
+ if at all possible, as a panic indicator.
+
Required properties for flash LED child nodes:
- flash-max-microamp : Maximum flash LED supply current in microamperes.
- flash-max-timeout-us : Maximum timeout in microseconds after which the flash
diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt
index fea1ebf..cbbeb18 100644
--- a/Documentation/devicetree/bindings/leds/leds-gpio.txt
+++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt
@@ -23,6 +23,8 @@
property is not present.
- retain-state-suspended: (optional) The suspend state can be retained.Such
as charge-led gpio.
+- panic-indicator : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
Examples:
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
index 18d950d..88faa91 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -38,7 +38,7 @@
- bus-width: Number of data lines.
See: Documentation/devicetree/bindings/mmc/mmc.txt.
-- max-frequency: Can be 200MHz, 100Mz or 50MHz (default) and used for
+- max-frequency: Can be 200MHz, 100Mz or 50MHz (default) and used for
configuring the CCONFIG3 in the mmcss.
See: Documentation/devicetree/bindings/mmc/mmc.txt.
@@ -48,7 +48,7 @@
- vqmmc-supply: Phandle to the regulator dt node, mentioned as the vcc/vdd
supply in eMMC/SD specs.
-- sd-uhs--sdr50: To enable the SDR50 in the mmcss.
+- sd-uhs-sdr50: To enable the SDR50 in the mmcss.
See: Documentation/devicetree/bindings/mmc/mmc.txt.
- sd-uhs-sdr104: To enable the SDR104 in the mmcss.
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 7fb746d..0f610d4 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -26,3 +26,6 @@
Optional properties:
- toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
+- pinctrl-names: should be "default", "state_uhs"
+- pinctrl-0: should contain default/high speed pin ctrl
+- pinctrl-1: should contain uhs mode pin ctrl
diff --git a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
index 8babdaa..6d1b797 100644
--- a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
+++ b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
@@ -12,6 +12,12 @@
- vmmc-supply: a phandle of a regulator, supplying Vcc to the card
- vqmmc-supply: a phandle of a regulator, supplying VccQ to the card
+- pinctrl-names: Can contain a "default" entry and a "state_uhs"
+ entry. The state_uhs entry is used together with the default
+ entry when the board requires distinct settings for UHS speeds.
+
+- pinctrl-N: One property for each name listed in pinctrl-names, see
+ ../pinctrl/pinctrl-bindings.txt.
Additionally any standard mmc bindings from mmc.txt can be used.
diff --git a/Documentation/devicetree/bindings/numa.txt b/Documentation/devicetree/bindings/numa.txt
new file mode 100644
index 0000000..21b3505
--- /dev/null
+++ b/Documentation/devicetree/bindings/numa.txt
@@ -0,0 +1,275 @@
+==============================================================================
+NUMA binding description.
+==============================================================================
+
+==============================================================================
+1 - Introduction
+==============================================================================
+
+Systems employing a Non Uniform Memory Access (NUMA) architecture contain
+collections of hardware resources including processors, memory, and I/O buses,
+that comprise what is commonly known as a NUMA node.
+Processor accesses to memory within the local NUMA node is generally faster
+than processor accesses to memory outside of the local NUMA node.
+DT defines interfaces that allow the platform to convey NUMA node
+topology information to OS.
+
+==============================================================================
+2 - numa-node-id
+==============================================================================
+
+For the purpose of identification, each NUMA node is associated with a unique
+token known as a node id. For the purpose of this binding
+a node id is a 32-bit integer.
+
+A device node is associated with a NUMA node by the presence of a
+numa-node-id property which contains the node id of the device.
+
+Example:
+ /* numa node 0 */
+ numa-node-id = <0>;
+
+ /* numa node 1 */
+ numa-node-id = <1>;
+
+==============================================================================
+3 - distance-map
+==============================================================================
+
+The optional device tree node distance-map describes the relative
+distance (memory latency) between all numa nodes.
+
+- compatible : Should at least contain "numa-distance-map-v1".
+
+- distance-matrix
+ This property defines a matrix to describe the relative distances
+ between all numa nodes.
+ It is represented as a list of node pairs and their relative distance.
+
+ Note:
+ 1. Each entry represents distance from first node to second node.
+ The distances are equal in either direction.
+ 2. The distance from a node to self (local distance) is represented
+ with value 10 and all internode distance should be represented with
+ a value greater than 10.
+ 3. distance-matrix should have entries in lexicographical ascending
+ order of nodes.
+ 4. There must be only one device node distance-map which must
+ reside in the root node.
+ 5. If the distance-map node is not present, a default
+ distance-matrix is used.
+
+Example:
+ 4 nodes connected in mesh/ring topology as below,
+
+ 0_______20______1
+ | |
+ | |
+ 20 20
+ | |
+ | |
+ |_______________|
+ 3 20 2
+
+ if relative distance for each hop is 20,
+ then internode distance would be,
+ 0 -> 1 = 20
+ 1 -> 2 = 20
+ 2 -> 3 = 20
+ 3 -> 0 = 20
+ 0 -> 2 = 40
+ 1 -> 3 = 40
+
+ and dt presentation for this distance matrix is,
+
+ distance-map {
+ compatible = "numa-distance-map-v1";
+ distance-matrix = <0 0 10>,
+ <0 1 20>,
+ <0 2 40>,
+ <0 3 20>,
+ <1 0 20>,
+ <1 1 10>,
+ <1 2 20>,
+ <1 3 40>,
+ <2 0 40>,
+ <2 1 20>,
+ <2 2 10>,
+ <2 3 20>,
+ <3 0 20>,
+ <3 1 40>,
+ <3 2 20>,
+ <3 3 10>;
+ };
+
+==============================================================================
+4 - Example dts
+==============================================================================
+
+Dual socket system consists of 2 boards connected through ccn bus and
+each board having one socket/soc of 8 cpus, memory and pci bus.
+
+ memory@c00000 {
+ device_type = "memory";
+ reg = <0x0 0xc00000 0x0 0x80000000>;
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+
+ memory@10000000000 {
+ device_type = "memory";
+ reg = <0x100 0x0 0x0 0x80000000>;
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@4 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x4>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@5 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x5>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@6 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x6>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@7 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x7>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@8 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x8>;
+ enable-method = "psci";
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+ cpu@9 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x9>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@a {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xa>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@b {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xb>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@c {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xc>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@d {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xd>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@e {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xe>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@f {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xf>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ };
+
+ pcie0: pcie0@848000000000 {
+ compatible = "arm,armv8";
+ device_type = "pci";
+ bus-range = <0 255>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0x8480 0x00000000 0 0x10000000>; /* Configuration space */
+ ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>;
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+
+ pcie1: pcie1@948000000000 {
+ compatible = "arm,armv8";
+ device_type = "pci";
+ bus-range = <0 255>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0x9480 0x00000000 0 0x10000000>; /* Configuration space */
+ ranges = <0x03000000 0x9010 0x00000000 0x9010 0x00000000 0x70 0x00000000>;
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+
+ distance-map {
+ compatible = "numa-distance-map-v1";
+ distance-matrix = <0 0 10>,
+ <0 1 20>,
+ <1 1 10>;
+ };
diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index c84fb47..d23dc00 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -37,8 +37,10 @@
- "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
- "rockchip,rk3399-io-voltage-domain" for rk3399
- "rockchip,rk3399-pmu-io-voltage-domain" for rk3399 pmu-domains
-- rockchip,grf: phandle to the syscon managing the "general register files"
+Deprecated properties:
+- rockchip,grf: phandle to the syscon managing the "general register files"
+ Systems should move the io-domains to a sub-node of the grf simple-mfd.
You specify supplies using the standard regulator bindings by including
a phandle the relevant regulator. All specified supplies must be able
diff --git a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
index f80ea2f..c2c68fc 100644
--- a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
@@ -32,6 +32,13 @@
Enhanced transient response (ETR) will affect the configuration of CKADV.
+-junction-warn-millicelsius: u32, junction warning temperature threshold
+ in millicelsius. If die temperature crosses this level then
+ device generates the warning interrupts.
+
+Please note that thermal functionality is only supported on MAX77621. The
+supported threshold warning temperature for MAX77621 are 120 degC and 140 degC.
+
Example:
max8973@1b {
diff --git a/Documentation/devicetree/bindings/regulator/pv88080.txt b/Documentation/devicetree/bindings/regulator/pv88080.txt
new file mode 100644
index 0000000..38a6142
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/pv88080.txt
@@ -0,0 +1,49 @@
+* Powerventure Semiconductor PV88080 Voltage Regulator
+
+Required properties:
+- compatible: "pvs,pv88080".
+- reg: I2C slave address, usually 0x49.
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name, with valid
+ values listed below. The content of each sub-node is defined by the
+ standard binding for regulators; see regulator.txt.
+ BUCK1, BUCK2, and BUCK3.
+
+Optional properties:
+- Any optional property defined in regulator.txt
+
+Example
+
+ pmic: pv88080@49 {
+ compatible = "pvs,pv88080";
+ reg = <0x49>;
+ interrupt-parent = <&gpio>;
+ interrupts = <24 24>;
+
+ regulators {
+ BUCK1 {
+ regulator-name = "buck1";
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1393750>;
+ regulator-min-microamp = < 220000>;
+ regulator-max-microamp = <7040000>;
+ };
+
+ BUCK2 {
+ regulator-name = "buck2";
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1393750>;
+ regulator-min-microamp = <1496000>;
+ regulator-max-microamp = <4189000>;
+ };
+
+ BUCK3 {
+ regulator-name = "buck3";
+ regulator-min-microvolt = <1400000>;
+ regulator-max-microvolt = <2193750>;
+ regulator-min-microamp = <1496000>;
+ regulator-max-microamp = <4189000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index d00bfd8..46c6f3e 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -7,6 +7,7 @@
"qcom,pm8841-regulators"
"qcom,pm8916-regulators"
"qcom,pm8941-regulators"
+ "qcom,pm8994-regulators"
- interrupts:
Usage: optional
@@ -68,6 +69,37 @@
Definition: Reference to regulator supplying the input pin, as
described in the data sheet.
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_s7-supply:
+- vdd_s8-supply:
+- vdd_s9-supply:
+- vdd_s10-supply:
+- vdd_s11-supply:
+- vdd_s12-supply:
+- vdd_l1-supply:
+- vdd_l2_l26_l28-supply:
+- vdd_l3_l11-supply:
+- vdd_l4_l27_l31-supply:
+- vdd_l5_l7-supply:
+- vdd_l6_l12_l32-supply:
+- vdd_l8_l16_l30-supply:
+- vdd_l9_l10_l18_l22-supply:
+- vdd_l13_l19_l23_l24-supply:
+- vdd_l14_l15-supply:
+- vdd_l17_l29-supply:
+- vdd_l20_l21-supply:
+- vdd_l25-supply:
+- vdd_lvs_1_2-supply:
+ Usage: optional (pm8994 only)
+ Value type: <phandle>
+ Definition: Reference to regulator supplying the input pin, as
+ described in the data sheet.
+
The regulator node houses sub-nodes for each regulator within the device. Each
sub-node is identified using the node's name, with valid values listed for each
@@ -85,6 +117,11 @@
l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
mvs1, mvs2
+pm8994:
+ s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, l1, l2, l3, l4, l5,
+ l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20,
+ l21, l22, l23, l24, l25, l26, l27, l28, l29, l30, l31, l32, lvs1, lvs2
+
The content of each sub-node is defined by the standard binding for regulators -
see regulator.txt - with additional custom properties described below:
diff --git a/Documentation/devicetree/bindings/regulator/regulator-max77620.txt b/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
index b3c8ca6..1c4bfe7 100644
--- a/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
@@ -94,6 +94,28 @@
This is applicable if suspend state
FPS source is selected as FPS0, FPS1 or
FPS2.
+- maxim,ramp-rate-setting: integer, ramp rate(uV/us) setting to be
+ configured to the device.
+ The platform may have different ramp
+ rate than advertised ramp rate if it has
+ design variation from Maxim's
+ recommended. On this case, platform
+ specific ramp rate is used for ramp time
+ calculation and this property is used
+ for device register configurations.
+ The measured ramp rate of platform is
+ provided by the regulator-ramp-delay
+ as described in <devicetree/bindings/
+ regulator/regulator.txt>.
+ Maxim Max77620 supports following ramp
+ delay:
+ SD: 13.75mV/us, 27.5mV/us, 55mV/us
+ LDOs: 5mV/us, 100mV/us
+
+Note: If the measured ramp delay is same as advertised ramp delay then it is not
+required to provide the ramp delay with property "maxim,ramp-rate-setting". The
+ramp rate can be provided by the regulator-ramp-delay which will be used for
+ramp time calculation for voltage change as well as for device configuration.
Example:
--------
diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
index c58db75..c3f6546 100644
--- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
@@ -14,8 +14,8 @@
- "setup-address" - contains setup register address of ABB module (ti,abb-v3)
- "int-address" - contains address of interrupt register for ABB module
(also see Optional properties)
-- #address-cell: should be 0
-- #size-cell: should be 0
+- #address-cells: should be 0
+- #size-cells: should be 0
- clocks: should point to the clock node used by ABB module
- ti,settling-time: Settling time in uSecs from SoC documentation for ABB module
to settle down(target time for SR2_WTCNT_VALUE).
@@ -69,7 +69,7 @@
abb_x: regulator-abb-x {
compatible = "ti,abb-v1";
regulator-name = "abb_x";
- #address-cell = <0>;
+ #address-cells = <0>;
#size-cells = <0>;
reg = <0x483072f0 0x8>, <0x48306818 0x4>;
reg-names = "base-address", "int-address";
@@ -89,7 +89,7 @@
abb_y: regulator-abb-y {
compatible = "ti,abb-v2";
regulator-name = "abb_y";
- #address-cell = <0>;
+ #address-cells = <0>;
#size-cells = <0>;
reg = <0x4a307bd0 0x8>, <0x4a306014 0x4>, <0x4A002268 0x8>;
reg-names = "base-address", "int-address", "efuse-address";
@@ -110,7 +110,7 @@
abb_z: regulator-abb-z {
compatible = "ti,abb-v2";
regulator-name = "abb_z";
- #address-cell = <0>;
+ #address-cells = <0>;
#size-cells = <0>;
reg = <0x4ae07ce4 0x8>, <0x4ae06010 0x4>,
<0x4a002194 0x8>, <0x4ae0C314 0x4>;
diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
index 75b0c16..74a91c4 100644
--- a/Documentation/devicetree/bindings/regulator/twl-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
@@ -57,6 +57,12 @@
Optional properties:
- Any optional property defined in bindings/regulator/regulator.txt
+For twl4030 regulators/LDOs:
+ - regulator-initial-mode:
+ - 0x08 - Sleep mode, the nominal output voltage is maintained with low power
+ consumption with low load current capability.
+ - 0x0e - Active mode, the regulator can deliver its nominal output voltage
+ with full-load current capability.
Example:
diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power
index e2b1b69..fb594c2 100644
--- a/Documentation/hwmon/fam15h_power
+++ b/Documentation/hwmon/fam15h_power
@@ -10,14 +10,22 @@
Datasheets:
BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
BIOS and Kernel Developer's Guide (BKDG) For AMD Family 16h Processors
+ AMD64 Architecture Programmer's Manual Volume 2: System Programming
Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
Description
-----------
+1) Processor TDP (Thermal design power)
+
+Given a fixed frequency and voltage, the power consumption of a
+processor varies based on the workload being executed. Derated power
+is the power consumed when running a specific application. Thermal
+design power (TDP) is an example of derated power.
+
This driver permits reading of registers providing power information
-of AMD Family 15h and 16h processors.
+of AMD Family 15h and 16h processors via TDP algorithm.
For AMD Family 15h and 16h processors the following power values can
be calculated using different processor northbridge function
@@ -37,3 +45,58 @@
On multi-node processors the calculated value is for the entire
package and not for a single node. Thus the driver creates sysfs
attributes only for internal node0 of a multi-node processor.
+
+2) Accumulated Power Mechanism
+
+This driver also introduces an algorithm that should be used to
+calculate the average power consumed by a processor during a
+measurement interval Tm. The feature of accumulated power mechanism is
+indicated by CPUID Fn8000_0007_EDX[12].
+
+* Tsample: compute unit power accumulator sample period
+* Tref: the PTSC counter period
+* PTSC: performance timestamp counter
+* N: the ratio of compute unit power accumulator sample period to the
+ PTSC period
+* Jmax: max compute unit accumulated power which is indicated by
+ MaxCpuSwPwrAcc MSR C001007b
+* Jx/Jy: compute unit accumulated power which is indicated by
+ CpuSwPwrAcc MSR C001007a
+* Tx/Ty: the value of performance timestamp counter which is indicated
+ by CU_PTSC MSR C0010280
+* PwrCPUave: CPU average power
+
+i. Determine the ratio of Tsample to Tref by executing CPUID Fn8000_0007.
+ N = value of CPUID Fn8000_0007_ECX[CpuPwrSampleTimeRatio[15:0]].
+
+ii. Read the full range of the cumulative energy value from the new
+MSR MaxCpuSwPwrAcc.
+ Jmax = value returned.
+iii. At time x, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+ Jx = value read from CpuSwPwrAcc and Tx = value read from
+PTSC.
+
+iv. At time y, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+ Jy = value read from CpuSwPwrAcc and Ty = value read from
+PTSC.
+
+v. Calculate the average power consumption for a compute unit over
+time period (y-x). Unit of result is uWatt.
+ if (Jy < Jx) // Rollover has occurred
+ Jdelta = (Jy + Jmax) - Jx
+ else
+ Jdelta = Jy - Jx
+ PwrCPUave = N * Jdelta * 1000 / (Ty - Tx)
+
+This driver provides PwrCPUave and interval(default is 10 millisecond
+and maximum is 1 second):
+* power1_average (PwrCPUave)
+* power1_average_interval (Interval)
+
+The power1_average_interval can be updated at /etc/sensors3.conf file
+as below:
+
+chip "fam15h_power-*"
+ set power1_average_interval 0.01
+
+Then save it with "sensors -s".
diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
index 733296d..fff6f6b 100644
--- a/Documentation/hwmon/it87
+++ b/Documentation/hwmon/it87
@@ -9,6 +9,9 @@
* IT8620E
Prefix: 'it8620'
Addresses scanned: from Super I/O config space (8 I/O ports)
+ * IT8628E
+ Prefix: 'it8628'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
Datasheet: Not publicly available
* IT8705F
Prefix: 'it87'
@@ -114,8 +117,8 @@
Description
-----------
-This driver implements support for the IT8603E, IT8620E, IT8623E, IT8705F,
-IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F,
+This driver implements support for the IT8603E, IT8620E, IT8623E, IT8628E,
+IT8705F, IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F,
IT8758E, IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E, and
SiS950 chips.
@@ -158,8 +161,8 @@
IT8728F. It only supports 3 fans, 16-bit fan mode, and the full speed mode
of the fan is not supported (value 0 of pwmX_enable).
-The IT8620E is another custom design, hardware monitoring part is similar to
-IT8728F. It only supports 16-bit fan mode.
+The IT8620E and IT8628E are custom designs, hardware monitoring part is similar
+to IT8728F. It only supports 16-bit fan mode. Both chips support up to 6 fans.
The IT8790E supports up to 3 fans. 16-bit fan mode is always enabled.
@@ -187,8 +190,8 @@
2.8 volts with a resolution of 0.0109 volt. The battery voltage in8 does not
have limit registers.
-On the IT8603E, IT8721F/IT8758E, IT8732F, IT8781F, IT8782F, and IT8783E/F, some
-voltage inputs are internal and scaled inside the chip:
+On the IT8603E, IT8620E, IT8628E, IT8721F/IT8758E, IT8732F, IT8781F, IT8782F,
+and IT8783E/F, some voltage inputs are internal and scaled inside the chip:
* in3 (optional)
* in7 (optional for IT8781F, IT8782F, and IT8783E/F)
* in8 (always)
diff --git a/Documentation/hwmon/max31722 b/Documentation/hwmon/max31722
new file mode 100644
index 0000000..090da845
--- /dev/null
+++ b/Documentation/hwmon/max31722
@@ -0,0 +1,34 @@
+Kernel driver max31722
+======================
+
+Supported chips:
+ * Maxim Integrated MAX31722
+ Prefix: 'max31722'
+ ACPI ID: MAX31722
+ Addresses scanned: -
+ Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31722-MAX31723.pdf
+ * Maxim Integrated MAX31723
+ Prefix: 'max31723'
+ ACPI ID: MAX31723
+ Addresses scanned: -
+ Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31722-MAX31723.pdf
+
+Author: Tiberiu Breana <tiberiu.a.breana@intel.com>
+
+Description
+-----------
+
+This driver adds support for the Maxim Integrated MAX31722/MAX31723 thermometers
+and thermostats running over an SPI interface.
+
+Usage Notes
+-----------
+
+This driver uses ACPI to auto-detect devices. See ACPI IDs in the above section.
+
+Sysfs entries
+-------------
+
+The following attribute is supported:
+
+temp1_input Measured temperature. Read-only.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 0b3de80..d3240fe 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -131,6 +131,7 @@
More X86-64 boot options can be found in
Documentation/x86/x86_64/boot-options.txt .
X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
+ X86_UV SGI UV support is enabled.
XEN Xen support is enabled
In addition, the following text indicates that the option:
@@ -167,16 +168,18 @@
acpi= [HW,ACPI,X86,ARM64]
Advanced Configuration and Power Interface
- Format: { force | off | strict | noirq | rsdt |
+ Format: { force | on | off | strict | noirq | rsdt |
copy_dsdt }
force -- enable ACPI if default was off
+ on -- enable ACPI but allow fallback to DT [arm64]
off -- disable ACPI if default was on
noirq -- do not use ACPI for IRQ routing
strict -- Be less tolerant of platforms that are not
strictly ACPI specification compliant.
rsdt -- prefer RSDT over (default) XSDT
copy_dsdt -- copy DSDT to memory
- For ARM64, ONLY "acpi=off" or "acpi=force" are available
+ For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force"
+ are available
See also Documentation/power/runtime_pm.txt, pci=noacpi
@@ -312,6 +315,8 @@
acpi_osi=!* # remove all strings
acpi_osi=! # disable all built-in OS vendor
strings
+ acpi_osi=!! # enable all built-in OS vendor
+ strings
acpi_osi= # disable all strings
'acpi_osi=!' can be used in combination with single or
@@ -542,6 +547,13 @@
Format: <int> (must be >=0)
Default: 64
+ bau= [X86_UV] Enable the BAU on SGI UV. The default
+ behavior is to disable the BAU (i.e. bau=0).
+ Format: { "0" | "1" }
+ 0 - Disable the BAU.
+ 1 - Enable the BAU.
+ unset - Disable the BAU.
+
baycom_epp= [HW,AX25]
Format: <io>,<mode>
@@ -1661,6 +1673,11 @@
hwp_only
Only load intel_pstate on systems which support
hardware P state control (HWP) if available.
+ support_acpi_ppc
+ Enforce ACPI _PPC performance limits. If the Fixed ACPI
+ Description Table, specifies preferred power management
+ profile as "Enterprise Server" or "Performance Server",
+ then this feature is turned on by default.
intremap= [X86-64, Intel-IOMMU]
on enable Interrupt Remapping (default)
@@ -3284,6 +3301,44 @@
Lazy RCU callbacks are those which RCU can
prove do nothing more than free memory.
+ rcuperf.gp_exp= [KNL]
+ Measure performance of expedited synchronous
+ grace-period primitives.
+
+ rcuperf.holdoff= [KNL]
+ Set test-start holdoff period. The purpose of
+ this parameter is to delay the start of the
+ test until boot completes in order to avoid
+ interference.
+
+ rcuperf.nreaders= [KNL]
+ Set number of RCU readers. The value -1 selects
+ N, where N is the number of CPUs. A value
+ "n" less than -1 selects N-n+1, where N is again
+ the number of CPUs. For example, -2 selects N
+ (the number of CPUs), -3 selects N+1, and so on.
+ A value of "n" less than or equal to -N selects
+ a single reader.
+
+ rcuperf.nwriters= [KNL]
+ Set number of RCU writers. The values operate
+ the same as for rcuperf.nreaders.
+ N, where N is the number of CPUs
+
+ rcuperf.perf_runnable= [BOOT]
+ Start rcuperf running at boot time.
+
+ rcuperf.shutdown= [KNL]
+ Shut the system down after performance tests
+ complete. This is useful for hands-off automated
+ testing.
+
+ rcuperf.perf_type= [KNL]
+ Specify the RCU implementation to test.
+
+ rcuperf.verbose= [KNL]
+ Enable additional printk() statements.
+
rcutorture.cbflood_inter_holdoff= [KNL]
Set holdoff time (jiffies) between successive
callback-flood tests.
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 5001280..9de1c15 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -97,7 +97,7 @@
<hardirq-safe> -> <hardirq-unsafe>
<softirq-safe> -> <softirq-unsafe>
-The first rule comes from the fact the a hardirq-safe lock could be
+The first rule comes from the fact that a hardirq-safe lock could be
taken by a hardirq context, interrupting a hardirq-unsafe lock - and
thus could result in a lock inversion deadlock. Likewise, a softirq-safe
lock could be taken by an softirq context, interrupting a softirq-unsafe
@@ -220,7 +220,7 @@
when the chain is validated for the first time, is then put into a hash
table, which hash-table can be checked in a lockfree manner. If the
locking chain occurs again later on, the hash table tells us that we
-dont have to validate the chain again.
+don't have to validate the chain again.
Troubleshooting:
----------------
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3729cbe..147ae8e 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -4,8 +4,40 @@
By: David Howells <dhowells@redhat.com>
Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Will Deacon <will.deacon@arm.com>
+ Peter Zijlstra <peterz@infradead.org>
-Contents:
+==========
+DISCLAIMER
+==========
+
+This document is not a specification; it is intentionally (for the sake of
+brevity) and unintentionally (due to being human) incomplete. This document is
+meant as a guide to using the various memory barriers provided by Linux, but
+in case of any doubt (and there are many) please ask.
+
+To repeat, this document is not a specification of what Linux expects from
+hardware.
+
+The purpose of this document is twofold:
+
+ (1) to specify the minimum functionality that one can rely on for any
+ particular barrier, and
+
+ (2) to provide a guide as to how to use the barriers that are available.
+
+Note that an architecture can provide more than the minimum requirement
+for any particular barrier, but if the architecure provides less than
+that, that architecture is incorrect.
+
+Note also that it is possible that a barrier may be a no-op for an
+architecture because the way that arch works renders an explicit barrier
+unnecessary in that case.
+
+
+========
+CONTENTS
+========
(*) Abstract memory access model.
@@ -31,15 +63,15 @@
(*) Implicit kernel memory barriers.
- - Locking functions.
+ - Lock acquisition functions.
- Interrupt disabling functions.
- Sleep and wake-up functions.
- Miscellaneous functions.
- (*) Inter-CPU locking barrier effects.
+ (*) Inter-CPU acquiring barrier effects.
- - Locks vs memory accesses.
- - Locks vs I/O accesses.
+ - Acquires vs memory accesses.
+ - Acquires vs I/O accesses.
(*) Where are memory barriers needed?
@@ -61,6 +93,7 @@
(*) The things CPUs get up to.
- And then there's the Alpha.
+ - Virtual Machine Guests.
(*) Example uses.
@@ -148,7 +181,7 @@
CPU 1 CPU 2
=============== ===============
- { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
B = 4; Q = P;
P = &B D = *Q;
@@ -430,8 +463,9 @@
This acts as a one-way permeable barrier. It guarantees that all memory
operations after the ACQUIRE operation will appear to happen after the
ACQUIRE operation with respect to the other components of the system.
- ACQUIRE operations include LOCK operations and smp_load_acquire()
- operations.
+ ACQUIRE operations include LOCK operations and both smp_load_acquire()
+ and smp_cond_acquire() operations. The later builds the necessary ACQUIRE
+ semantics from relying on a control dependency and smp_rmb().
Memory operations that occur before an ACQUIRE operation may appear to
happen after it completes.
@@ -464,6 +498,11 @@
This means that ACQUIRE acts as a minimal "acquire" operation and
RELEASE acts as a minimal "release" operation.
+A subset of the atomic operations described in atomic_ops.txt have ACQUIRE
+and RELEASE variants in addition to fully-ordered and relaxed (no barrier
+semantics) definitions. For compound atomics performing both a load and a
+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
+only to the store portion of the operation.
Memory barriers are only required where there's a possibility of interaction
between two CPUs or between a CPU and a device. If it can be guaranteed that
@@ -517,7 +556,7 @@
CPU 1 CPU 2
=============== ===============
- { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
B = 4;
<write barrier>
WRITE_ONCE(P, &B)
@@ -544,7 +583,7 @@
CPU 1 CPU 2
=============== ===============
- { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
B = 4;
<write barrier>
WRITE_ONCE(P, &B);
@@ -813,9 +852,10 @@
the same variable, then those stores must be ordered, either by
preceding both of them with smp_mb() or by using smp_store_release()
to carry out the stores. Please note that it is -not- sufficient
- to use barrier() at beginning of each leg of the "if" statement,
- as optimizing compilers do not necessarily respect barrier()
- in this case.
+ to use barrier() at beginning of each leg of the "if" statement
+ because, as shown by the example above, optimizing compilers can
+ destroy the control dependency while respecting the letter of the
+ barrier() law.
(*) Control dependencies require at least one run-time conditional
between the prior load and the subsequent store, and this
@@ -1731,15 +1771,15 @@
All memory barriers except the data dependency barriers imply a compiler
-barrier. Data dependencies do not impose any additional compiler ordering.
+barrier. Data dependencies do not impose any additional compiler ordering.
Aside: In the case of data dependencies, the compiler would be expected
to issue the loads in the correct order (eg. `a[b]` would have to load
the value of b before loading a[b]), however there is no guarantee in
the C specification that the compiler may not speculate the value of b
(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
-tmp = a[b]; ). There is also the problem of a compiler reloading b after
-having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+tmp = a[b]; ). There is also the problem of a compiler reloading b after
+having loaded a[b], thus having a newer copy of b than a[b]. A consensus
has not yet been reached about these problems, however the READ_ONCE()
macro is a good place to start looking.
@@ -1794,6 +1834,7 @@
(*) lockless_dereference();
+
This can be thought of as a pointer-fetch wrapper around the
smp_read_barrier_depends() data-dependency barrier.
@@ -1858,7 +1899,7 @@
ordered I/O regions to be partially ordered. Its effects may go beyond the
CPU->Hardware interface and actually affect the hardware at some level.
-See the subsection "Locks vs I/O accesses" for more information.
+See the subsection "Acquires vs I/O accesses" for more information.
===============================
@@ -1873,8 +1914,8 @@
of arch specific code.
-ACQUIRING FUNCTIONS
--------------------
+LOCK ACQUISITION FUNCTIONS
+--------------------------
The Linux kernel has a number of locking constructs:
@@ -1895,7 +1936,7 @@
Memory operations issued before the ACQUIRE may be completed after
the ACQUIRE operation has completed. An smp_mb__before_spinlock(),
combined with a following ACQUIRE, orders prior stores against
- subsequent loads and stores. Note that this is weaker than smp_mb()!
+ subsequent loads and stores. Note that this is weaker than smp_mb()!
The smp_mb__before_spinlock() primitive is free on many architectures.
(2) RELEASE operation implication:
@@ -2090,9 +2131,9 @@
event_indicated = 1;
wake_up_process(event_daemon);
-A write memory barrier is implied by wake_up() and co. if and only if they wake
-something up. The barrier occurs before the task state is cleared, and so sits
-between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+A write memory barrier is implied by wake_up() and co. if and only if they
+wake something up. The barrier occurs before the task state is cleared, and so
+sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
CPU 1 CPU 2
=============================== ===============================
@@ -2206,7 +2247,7 @@
Then there is no guarantee as to what order CPU 3 will see the accesses to *A
through *H occur in, other than the constraints imposed by the separate locks
-on the separate CPUs. It might, for example, see:
+on the separate CPUs. It might, for example, see:
*E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
@@ -2486,9 +2527,9 @@
clear_bit_unlock();
__clear_bit_unlock();
-These implement ACQUIRE-class and RELEASE-class operations. These should be used in
-preference to other operations when implementing locking primitives, because
-their implementations can be optimised on many architectures.
+These implement ACQUIRE-class and RELEASE-class operations. These should be
+used in preference to other operations when implementing locking primitives,
+because their implementations can be optimised on many architectures.
[!] Note that special memory barrier primitives are available for these
situations because on some CPUs the atomic instructions used imply full memory
@@ -2568,12 +2609,12 @@
Normally this won't be a problem because the I/O accesses done inside such
sections will include synchronous load operations on strictly ordered I/O
-registers that form implicit I/O barriers. If this isn't sufficient then an
+registers that form implicit I/O barriers. If this isn't sufficient then an
mmiowb() may need to be used explicitly.
A similar situation may occur between an interrupt routine and two routines
-running on separate CPUs that communicate with each other. If such a case is
+running on separate CPUs that communicate with each other. If such a case is
likely, then interrupt-disabling locks should be used to guarantee ordering.
@@ -2587,8 +2628,8 @@
(*) inX(), outX():
These are intended to talk to I/O space rather than memory space, but
- that's primarily a CPU-specific concept. The i386 and x86_64 processors do
- indeed have special I/O space access cycles and instructions, but many
+ that's primarily a CPU-specific concept. The i386 and x86_64 processors
+ do indeed have special I/O space access cycles and instructions, but many
CPUs don't have such a concept.
The PCI bus, amongst others, defines an I/O space concept which - on such
@@ -2610,7 +2651,7 @@
Whether these are guaranteed to be fully ordered and uncombined with
respect to each other on the issuing CPU depends on the characteristics
- defined for the memory window through which they're accessing. On later
+ defined for the memory window through which they're accessing. On later
i386 architecture machines, for example, this is controlled by way of the
MTRR registers.
@@ -2635,10 +2676,10 @@
(*) readX_relaxed(), writeX_relaxed()
These are similar to readX() and writeX(), but provide weaker memory
- ordering guarantees. Specifically, they do not guarantee ordering with
+ ordering guarantees. Specifically, they do not guarantee ordering with
respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
- ordering with respect to LOCK or UNLOCK operations. If the latter is
- required, an mmiowb() barrier can be used. Note that relaxed accesses to
+ ordering with respect to LOCK or UNLOCK operations. If the latter is
+ required, an mmiowb() barrier can be used. Note that relaxed accesses to
the same peripheral are guaranteed to be ordered with respect to each
other.
@@ -3040,8 +3081,9 @@
See the subsection on "Cache Coherency" above.
+
VIRTUAL MACHINE GUESTS
--------------------
+----------------------
Guests running within virtual machines might be affected by SMP effects even if
the guest itself is compiled without SMP support. This is an artifact of
@@ -3050,7 +3092,7 @@
To handle this case optimally, low-level virt_mb() etc macros are available.
These have the same effect as smp_mb() etc when SMP is enabled, but generate
-identical code for SMP and non-SMP systems. For example, virtual machine guests
+identical code for SMP and non-SMP systems. For example, virtual machine guests
should use virt_mb() rather than smp_mb() when synchronizing against a
(possibly SMP) host.
@@ -3058,6 +3100,7 @@
in particular, they do not control MMIO effects: to control
MMIO effects, use mandatory barriers.
+
============
EXAMPLE USES
============
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index fcddfd5..daabdd7 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -60,6 +60,7 @@
- panic_on_warn
- perf_cpu_time_max_percent
- perf_event_paranoid
+- perf_event_max_stack
- pid_max
- powersave-nap [ PPC only ]
- printk
@@ -654,6 +655,19 @@
==============================================================
+perf_event_max_stack:
+
+Controls maximum number of stack frames to copy for (attr.sample_type &
+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
+'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 127.
+
+==============================================================
+
pid_max:
PID allocation wrap value. When the kernel's next PID value
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index f52f297..9857606 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1562,12 +1562,12 @@
<idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
<idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
<idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
- <idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit
- <idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz
+ <idle>-0 3dN.1 13us : cpu_load_update_nohz <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 13us : _raw_spin_lock <-cpu_load_update_nohz
<idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
- <idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz
- <idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load
- <idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz
+ <idle>-0 3dN.2 13us : __cpu_load_update <-cpu_load_update_nohz
+ <idle>-0 3dN.2 14us : sched_avg_update <-__cpu_load_update
+ <idle>-0 3dN.2 14us : _raw_spin_unlock <-cpu_load_update_nohz
<idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
<idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
<idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 54944c7..2a4ee63 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -196,3 +196,35 @@
"debugpat" boot parameter. With this parameter, various debug messages are
printed to dmesg log.
+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT Call Sequence PAT State PAT MSR
+ =========================================================
+ E E MTRR -> PAT init Enabled OS
+ E D MTRR -> PAT init Disabled -
+ D E MTRR -> PAT disable Disabled BIOS
+ D D MTRR -> PAT disable Disabled -
+ - np/E PAT -> PAT disable Disabled BIOS
+ - np/D PAT -> PAT disable Disabled -
+ E !P/E MTRR -> PAT init Disabled BIOS
+ D !P/E MTRR -> PAT disable Disabled BIOS
+ !M !P/E MTRR stub -> PAT disable Disabled BIOS
+
+ Legend
+ ------------------------------------------------
+ E Feature enabled in CPU
+ D Feature disabled/unsupported in CPU
+ np "nopat" boot option specified
+ !P CONFIG_X86_PAT option unset
+ !M CONFIG_MTRR option unset
+ Enabled PAT state set to enabled
+ Disabled PAT state set to disabled
+ OS PAT initializes PAT MSR with OS setting
+ BIOS PAT keeps PAT MSR with BIOS setting
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 9c567a4..f40d40d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1322,6 +1322,7 @@
F: arch/arm/boot/dts/armada*
F: arch/arm/boot/dts/kirkwood*
F: arch/arm64/boot/dts/marvell/armada*
+F: drivers/cpufreq/mvebu-cpufreq.c
ARM/Marvell Berlin SoC support
@@ -3539,6 +3540,15 @@
F: include/linux/devfreq-event.h
F: Documentation/devicetree/bindings/devfreq/event/
+BUS FREQUENCY DRIVER FOR SAMSUNG EXYNOS
+M: Chanwoo Choi <cw00.choi@samsung.com>
+L: linux-pm@vger.kernel.org
+L: linux-samsung-soc@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git
+S: Maintained
+F: drivers/devfreq/exynos-bus.c
+F: Documentation/devicetree/bindings/devfreq/exynos-bus.txt
+
DEVICE NUMBER REGISTRY
M: Torben Mathiasen <device@lanana.org>
W: http://lanana.org/docs/device-list/index.html
@@ -7020,9 +7030,9 @@
M: Krzysztof Kozlowski <k.kozlowski@samsung.com>
L: linux-kernel@vger.kernel.org
S: Supported
-F: drivers/*/max14577.c
+F: drivers/*/max14577*.c
F: drivers/*/max77686*.c
-F: drivers/*/max77693.c
+F: drivers/*/max77693*.c
F: drivers/extcon/extcon-max14577.c
F: drivers/extcon/extcon-max77693.c
F: drivers/rtc/rtc-max77686.c
@@ -11246,14 +11256,13 @@
F: drivers/media/i2c/tc358743*
F: include/media/i2c/tc358743.h
-TMIO MMC DRIVER
-M: Ian Molton <ian@mnementh.co.uk>
+TMIO/SDHI MMC DRIVER
+M: Wolfram Sang <wsa+renesas@sang-engineering.com>
L: linux-mmc@vger.kernel.org
-S: Maintained
+S: Supported
F: drivers/mmc/host/tmio_mmc*
F: drivers/mmc/host/sh_mobile_sdhi.c
-F: include/linux/mmc/tmio.h
-F: include/linux/mmc/sh_mobile_sdhi.h
+F: include/linux/mfd/tmio.h
TMP401 HARDWARE MONITOR DRIVER
M: Guenter Roeck <linux@roeck-us.net>
@@ -12011,7 +12020,9 @@
W: http://www.slimlogic.co.uk/?p=48
T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator.git
S: Supported
+F: Documentation/devicetree/bindings/regulator/
F: drivers/regulator/
+F: include/dt-bindings/regulator/
F: include/linux/regulator/
VRF
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index a83bbea6..0131a70 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -63,7 +63,7 @@
return res >= 0 ? 1 : 0;
}
-static inline void __down_write(struct rw_semaphore *sem)
+static inline long ___down_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
@@ -83,10 +83,24 @@
:"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
:"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
#endif
- if (unlikely(oldcount))
+ return oldcount;
+}
+
+static inline void __down_write(struct rw_semaphore *sem)
+{
+ if (unlikely(___down_write(sem)))
rwsem_down_write_failed(sem);
}
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+ if (unlikely(___down_write(sem)))
+ if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+ return -EINTR;
+
+ return 0;
+}
+
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 3848259..baefe1d 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -36,7 +36,7 @@
struct of_cpuidle_method {
const char *method;
- struct cpuidle_ops *ops;
+ const struct cpuidle_ops *ops;
};
#define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops) \
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index e0eea72..a708fa1 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -17,33 +17,27 @@
#include <asm/mach/map.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
+#include <asm/ptrace.h>
#ifdef CONFIG_EFI
void efi_init(void);
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
+int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
-#define efi_call_virt(f, ...) \
+#define arch_efi_call_virt_setup() efi_virtmap_load()
+#define arch_efi_call_virt_teardown() efi_virtmap_unload()
+
+#define arch_efi_call_virt(f, args...) \
({ \
efi_##f##_t *__f; \
- efi_status_t __s; \
- \
- efi_virtmap_load(); \
__f = efi.systab->runtime->f; \
- __s = __f(__VA_ARGS__); \
- efi_virtmap_unload(); \
- __s; \
+ __f(args); \
})
-#define __efi_call_virt(f, ...) \
-({ \
- efi_##f##_t *__f; \
- \
- efi_virtmap_load(); \
- __f = efi.systab->runtime->f; \
- __f(__VA_ARGS__); \
- efi_virtmap_unload(); \
-})
+#define ARCH_EFI_IRQ_FLAGS_MASK \
+ (PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
+ PSR_T_BIT | MODE_MASK)
static inline void efi_set_pgd(struct mm_struct *mm)
{
@@ -59,7 +53,16 @@
/* arch specific definitions used by the stub code */
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define __efi_call_early(f, ...) f(__VA_ARGS__)
+#define efi_is_64bit() (false)
+
+struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg);
+void free_screen_info(efi_system_table_t *sys_table, struct screen_info *si);
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
/*
* A reasonable upper bound for the uncompressed kernel size is 32 MBytes,
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 3850701..738d5ee 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -265,6 +265,15 @@
kvm_call_hyp(__init_stage2_translation);
}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+ phys_addr_t phys_idmap_start)
+{
+ /*
+ * TODO
+ * kvm_call_reset(boot_pgd_ptr, phys_idmap_start);
+ */
+}
+
static inline int kvm_arch_dev_ioctl_check_extension(long ext)
{
return 0;
@@ -277,7 +286,6 @@
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
-static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index da44be9..f17a8d4 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -66,6 +66,7 @@
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_mmu_get_boot_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index fa5b42d..3cc14dd 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -15,6 +15,7 @@
#include <linux/compiler.h>
#include <linux/sched.h>
+#include <linux/preempt.h>
#include <asm/cacheflush.h>
#include <asm/cachetype.h>
#include <asm/proc-fns.h>
@@ -66,6 +67,7 @@
cpu_switch_mm(mm->pgd, mm);
}
+#ifndef MODULE
#define finish_arch_post_lock_switch \
finish_arch_post_lock_switch
static inline void finish_arch_post_lock_switch(void)
@@ -87,6 +89,7 @@
preempt_enable_no_resched();
}
}
+#endif /* !MODULE */
#endif /* CONFIG_MMU */
diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c
index 703926e..a44b268e 100644
--- a/arch/arm/kernel/cpuidle.c
+++ b/arch/arm/kernel/cpuidle.c
@@ -70,7 +70,7 @@
*
* Returns a struct cpuidle_ops pointer, NULL if not found.
*/
-static struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
+static const struct cpuidle_ops *__init arm_cpuidle_get_ops(const char *method)
{
struct of_cpuidle_method *m = __cpuidle_method_of_table;
@@ -88,7 +88,7 @@
*
* Get the method name defined in the 'enable-method' property, retrieve the
* associated cpuidle_ops and do a struct copy. This copy is needed because all
- * cpuidle_ops are tagged __initdata and will be unloaded after the init
+ * cpuidle_ops are tagged __initconst and will be unloaded after the init
* process.
*
* Return 0 on sucess, -ENOENT if no 'enable-method' is defined, -EOPNOTSUPP if
@@ -97,7 +97,7 @@
static int __init arm_cpuidle_read_ops(struct device_node *dn, int cpu)
{
const char *enable_method;
- struct cpuidle_ops *ops;
+ const struct cpuidle_ops *ops;
enable_method = of_get_property(dn, "enable-method", NULL);
if (!enable_method)
diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c
index ff8a9d8..9f43ba0 100644
--- a/arch/arm/kernel/efi.c
+++ b/arch/arm/kernel/efi.c
@@ -11,6 +11,41 @@
#include <asm/mach/map.h>
#include <asm/mmu_context.h>
+static int __init set_permissions(pte_t *ptep, pgtable_t token,
+ unsigned long addr, void *data)
+{
+ efi_memory_desc_t *md = data;
+ pte_t pte = *ptep;
+
+ if (md->attribute & EFI_MEMORY_RO)
+ pte = set_pte_bit(pte, __pgprot(L_PTE_RDONLY));
+ if (md->attribute & EFI_MEMORY_XP)
+ pte = set_pte_bit(pte, __pgprot(L_PTE_XN));
+ set_pte_ext(ptep, pte, PTE_EXT_NG);
+ return 0;
+}
+
+int __init efi_set_mapping_permissions(struct mm_struct *mm,
+ efi_memory_desc_t *md)
+{
+ unsigned long base, size;
+
+ base = md->virt_addr;
+ size = md->num_pages << EFI_PAGE_SHIFT;
+
+ /*
+ * We can only use apply_to_page_range() if we can guarantee that the
+ * entire region was mapped using pages. This should be the case if the
+ * region does not cover any naturally aligned SECTION_SIZE sized
+ * blocks.
+ */
+ if (round_down(base + size, SECTION_SIZE) <
+ round_up(base, SECTION_SIZE) + SECTION_SIZE)
+ return apply_to_page_range(mm, base, size, set_permissions, md);
+
+ return 0;
+}
+
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
struct map_desc desc = {
@@ -34,5 +69,11 @@
desc.type = MT_DEVICE;
create_mapping_late(mm, &desc, true);
+
+ /*
+ * If stricter permissions were specified, apply them now.
+ */
+ if (md->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))
+ return efi_set_mapping_permissions(mm, md);
return 0;
}
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 6284779..b8df458 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -631,7 +631,7 @@
info->address &= ~alignment_mask;
info->ctrl.len <<= offset;
- if (!bp->overflow_handler) {
+ if (is_default_overflow_handler(bp)) {
/*
* Mismatch breakpoints are required for single-stepping
* breakpoints.
@@ -754,7 +754,7 @@
* mismatch breakpoint so we can single-step over the
* watchpoint trigger.
*/
- if (!wp->overflow_handler)
+ if (is_default_overflow_handler(wp))
enable_single_step(wp, instruction_pointer(regs));
unlock:
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
index 4e02ae5..27563be 100644
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c
@@ -75,7 +75,7 @@
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
- while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ while ((entry->nr < sysctl_perf_event_max_stack) &&
tail && !((unsigned long)tail & 0x3))
tail = user_backtrace(tail, entry);
}
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 2c4bea3..7d4e285 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -883,7 +883,8 @@
request_resource(&ioport_resource, &lp2);
}
-#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE)
+#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) || \
+ defined(CONFIG_EFI)
struct screen_info screen_info = {
.orig_video_lines = 30,
.orig_video_cols = 80,
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dded1b7..9ef013d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -16,7 +16,6 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
-#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/err.h>
@@ -66,6 +65,8 @@
static bool vgic_present;
+static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
BUG_ON(preemptible());
@@ -90,11 +91,6 @@
return &kvm_arm_running_vcpu;
}
-int kvm_arch_hardware_enable(void)
-{
- return 0;
-}
-
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -1033,11 +1029,6 @@
}
}
-static void cpu_init_stage2(void *dummy)
-{
- __cpu_init_stage2();
-}
-
static void cpu_init_hyp_mode(void *dummy)
{
phys_addr_t boot_pgd_ptr;
@@ -1065,43 +1056,87 @@
{
if (is_kernel_in_hyp_mode()) {
/*
- * cpu_init_stage2() is safe to call even if the PM
+ * __cpu_init_stage2() is safe to call even if the PM
* event was cancelled before the CPU was reset.
*/
- cpu_init_stage2(NULL);
+ __cpu_init_stage2();
} else {
if (__hyp_get_vectors() == hyp_default_vectors)
cpu_init_hyp_mode(NULL);
}
}
-static int hyp_init_cpu_notify(struct notifier_block *self,
- unsigned long action, void *cpu)
+static void cpu_hyp_reset(void)
{
- switch (action) {
- case CPU_STARTING:
- case CPU_STARTING_FROZEN:
- cpu_hyp_reinit();
- }
+ phys_addr_t boot_pgd_ptr;
+ phys_addr_t phys_idmap_start;
- return NOTIFY_OK;
+ if (!is_kernel_in_hyp_mode()) {
+ boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+ phys_idmap_start = kvm_get_idmap_start();
+
+ __cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start);
+ }
}
-static struct notifier_block hyp_init_cpu_nb = {
- .notifier_call = hyp_init_cpu_notify,
-};
+static void _kvm_arch_hardware_enable(void *discard)
+{
+ if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+ cpu_hyp_reinit();
+ __this_cpu_write(kvm_arm_hardware_enabled, 1);
+ }
+}
+
+int kvm_arch_hardware_enable(void)
+{
+ _kvm_arch_hardware_enable(NULL);
+ return 0;
+}
+
+static void _kvm_arch_hardware_disable(void *discard)
+{
+ if (__this_cpu_read(kvm_arm_hardware_enabled)) {
+ cpu_hyp_reset();
+ __this_cpu_write(kvm_arm_hardware_enabled, 0);
+ }
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ _kvm_arch_hardware_disable(NULL);
+}
#ifdef CONFIG_CPU_PM
static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
unsigned long cmd,
void *v)
{
- if (cmd == CPU_PM_EXIT) {
- cpu_hyp_reinit();
- return NOTIFY_OK;
- }
+ /*
+ * kvm_arm_hardware_enabled is left with its old value over
+ * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
+ * re-enable hyp.
+ */
+ switch (cmd) {
+ case CPU_PM_ENTER:
+ if (__this_cpu_read(kvm_arm_hardware_enabled))
+ /*
+ * don't update kvm_arm_hardware_enabled here
+ * so that the hardware will be re-enabled
+ * when we resume. See below.
+ */
+ cpu_hyp_reset();
- return NOTIFY_DONE;
+ return NOTIFY_OK;
+ case CPU_PM_EXIT:
+ if (__this_cpu_read(kvm_arm_hardware_enabled))
+ /* The hardware was enabled before suspend. */
+ cpu_hyp_reinit();
+
+ return NOTIFY_OK;
+
+ default:
+ return NOTIFY_DONE;
+ }
}
static struct notifier_block hyp_init_cpu_pm_nb = {
@@ -1143,16 +1178,12 @@
static int init_subsystems(void)
{
- int err;
+ int err = 0;
/*
- * Register CPU Hotplug notifier
+ * Enable hardware so that subsystem initialisation can access EL2.
*/
- err = register_cpu_notifier(&hyp_init_cpu_nb);
- if (err) {
- kvm_err("Cannot register KVM init CPU notifier (%d)\n", err);
- return err;
- }
+ on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
/*
* Register CPU lower-power notifier
@@ -1170,9 +1201,10 @@
case -ENODEV:
case -ENXIO:
vgic_present = false;
+ err = 0;
break;
default:
- return err;
+ goto out;
}
/*
@@ -1180,12 +1212,15 @@
*/
err = kvm_timer_hyp_init();
if (err)
- return err;
+ goto out;
kvm_perf_init();
kvm_coproc_table_init();
- return 0;
+out:
+ on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+
+ return err;
}
static void teardown_hyp_mode(void)
@@ -1198,17 +1233,11 @@
free_hyp_pgds();
for_each_possible_cpu(cpu)
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
- unregister_cpu_notifier(&hyp_init_cpu_nb);
hyp_cpu_pm_exit();
}
static int init_vhe_mode(void)
{
- /*
- * Execute the init code on each CPU.
- */
- on_each_cpu(cpu_init_stage2, NULL, 1);
-
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@@ -1295,11 +1324,6 @@
}
}
- /*
- * Execute the init code on each CPU.
- */
- on_each_cpu(cpu_init_hyp_mode, NULL, 1);
-
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
#endif
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d6d4191..be30212 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1666,6 +1666,11 @@
return hyp_idmap_vector;
}
+phys_addr_t kvm_get_idmap_start(void)
+{
+ return hyp_idmap_start;
+}
+
int kvm_mmu_init(void)
{
int err;
diff --git a/arch/arm/mach-berlin/berlin.c b/arch/arm/mach-berlin/berlin.c
index 25d7387..ac181c6 100644
--- a/arch/arm/mach-berlin/berlin.c
+++ b/arch/arm/mach-berlin/berlin.c
@@ -18,11 +18,6 @@
#include <asm/hardware/cache-l2x0.h>
#include <asm/mach/arch.h>
-static void __init berlin_init_late(void)
-{
- platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
static const char * const berlin_dt_compat[] = {
"marvell,berlin",
NULL,
@@ -30,7 +25,6 @@
DT_MACHINE_START(BERLIN_DT, "Marvell Berlin")
.dt_compat = berlin_dt_compat,
- .init_late = berlin_init_late,
/*
* with DT probing for L2CCs, berlin_init_machine can be removed.
* Note: 88DE3005 (Armada 1500-mini) uses pl310 l2cc
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 725e693..add3771 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -751,16 +751,6 @@
.end = IRQ_DA8XX_MMCSDINT0,
.flags = IORESOURCE_IRQ,
},
- { /* DMA RX */
- .start = DA8XX_DMA_MMCSD0_RX,
- .end = DA8XX_DMA_MMCSD0_RX,
- .flags = IORESOURCE_DMA,
- },
- { /* DMA TX */
- .start = DA8XX_DMA_MMCSD0_TX,
- .end = DA8XX_DMA_MMCSD0_TX,
- .flags = IORESOURCE_DMA,
- },
};
static struct platform_device da8xx_mmcsd0_device = {
@@ -788,16 +778,6 @@
.end = IRQ_DA850_MMCSDINT0_1,
.flags = IORESOURCE_IRQ,
},
- { /* DMA RX */
- .start = DA850_DMA_MMCSD1_RX,
- .end = DA850_DMA_MMCSD1_RX,
- .flags = IORESOURCE_DMA,
- },
- { /* DMA TX */
- .start = DA850_DMA_MMCSD1_TX,
- .end = DA850_DMA_MMCSD1_TX,
- .flags = IORESOURCE_DMA,
- },
};
static struct platform_device da850_mmcsd1_device = {
diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c
index 6257aa4..67d26c5 100644
--- a/arch/arm/mach-davinci/devices.c
+++ b/arch/arm/mach-davinci/devices.c
@@ -144,14 +144,6 @@
.start = IRQ_SDIOINT,
.flags = IORESOURCE_IRQ,
},
- /* DMA channels: RX, then TX */
- {
- .start = EDMA_CTLR_CHAN(0, DAVINCI_DMA_MMCRXEVT),
- .flags = IORESOURCE_DMA,
- }, {
- .start = EDMA_CTLR_CHAN(0, DAVINCI_DMA_MMCTXEVT),
- .flags = IORESOURCE_DMA,
- },
};
static struct platform_device davinci_mmcsd0_device = {
@@ -181,14 +173,6 @@
.start = IRQ_DM355_SDIOINT1,
.flags = IORESOURCE_IRQ,
},
- /* DMA channels: RX, then TX */
- {
- .start = EDMA_CTLR_CHAN(0, 30), /* rx */
- .flags = IORESOURCE_DMA,
- }, {
- .start = EDMA_CTLR_CHAN(0, 31), /* tx */
- .flags = IORESOURCE_DMA,
- },
};
static struct platform_device davinci_mmcsd1_device = {
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index bbf51a4..4d3b056 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -213,33 +213,6 @@
exynos_map_pmu();
}
-static const struct of_device_id exynos_cpufreq_matches[] = {
- { .compatible = "samsung,exynos3250", .data = "cpufreq-dt" },
- { .compatible = "samsung,exynos4210", .data = "cpufreq-dt" },
- { .compatible = "samsung,exynos4212", .data = "cpufreq-dt" },
- { .compatible = "samsung,exynos4412", .data = "cpufreq-dt" },
- { .compatible = "samsung,exynos5250", .data = "cpufreq-dt" },
-#ifndef CONFIG_BL_SWITCHER
- { .compatible = "samsung,exynos5420", .data = "cpufreq-dt" },
- { .compatible = "samsung,exynos5800", .data = "cpufreq-dt" },
-#endif
- { /* sentinel */ }
-};
-
-static void __init exynos_cpufreq_init(void)
-{
- struct device_node *root = of_find_node_by_path("/");
- const struct of_device_id *match;
-
- match = of_match_node(exynos_cpufreq_matches, root);
- if (!match) {
- platform_device_register_simple("exynos-cpufreq", -1, NULL, 0);
- return;
- }
-
- platform_device_register_simple(match->data, -1, NULL, 0);
-}
-
static void __init exynos_dt_machine_init(void)
{
/*
@@ -262,8 +235,6 @@
of_machine_is_compatible("samsung,exynos5250"))
platform_device_register(&exynos_cpuidle);
- exynos_cpufreq_init();
-
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
diff --git a/arch/arm/mach-imx/imx27-dt.c b/arch/arm/mach-imx/imx27-dt.c
index bd42d1b..530a728 100644
--- a/arch/arm/mach-imx/imx27-dt.c
+++ b/arch/arm/mach-imx/imx27-dt.c
@@ -18,15 +18,6 @@
#include "common.h"
#include "mx27.h"
-static void __init imx27_dt_init(void)
-{
- struct platform_device_info devinfo = { .name = "cpufreq-dt", };
-
- of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-
- platform_device_register_full(&devinfo);
-}
-
static const char * const imx27_dt_board_compat[] __initconst = {
"fsl,imx27",
NULL
@@ -36,6 +27,5 @@
.map_io = mx27_map_io,
.init_early = imx27_init_early,
.init_irq = mx27_init_irq,
- .init_machine = imx27_dt_init,
.dt_compat = imx27_dt_board_compat,
MACHINE_END
diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c
index 6883fba..10a82a4 100644
--- a/arch/arm/mach-imx/mach-imx51.c
+++ b/arch/arm/mach-imx/mach-imx51.c
@@ -50,13 +50,10 @@
static void __init imx51_dt_init(void)
{
- struct platform_device_info devinfo = { .name = "cpufreq-dt", };
-
imx51_ipu_mipi_setup();
imx_src_init();
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
- platform_device_register_full(&devinfo);
}
static void __init imx51_init_late(void)
diff --git a/arch/arm/mach-imx/mach-imx53.c b/arch/arm/mach-imx/mach-imx53.c
index 86316a9..18b5c5c13 100644
--- a/arch/arm/mach-imx/mach-imx53.c
+++ b/arch/arm/mach-imx/mach-imx53.c
@@ -40,8 +40,6 @@
static void __init imx53_init_late(void)
{
imx53_pm_init();
-
- platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
}
static const char * const imx53_dt_board_compat[] __initconst = {
diff --git a/arch/arm/mach-imx/mach-imx7d.c b/arch/arm/mach-imx/mach-imx7d.c
index 5a27f20..b450f52 100644
--- a/arch/arm/mach-imx/mach-imx7d.c
+++ b/arch/arm/mach-imx/mach-imx7d.c
@@ -105,11 +105,6 @@
irqchip_init();
}
-static void __init imx7d_init_late(void)
-{
- platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
static const char *const imx7d_dt_compat[] __initconst = {
"fsl,imx7d",
NULL,
@@ -117,7 +112,6 @@
DT_MACHINE_START(IMX7D, "Freescale i.MX7 Dual (Device Tree)")
.init_irq = imx7d_init_irq,
- .init_late = imx7d_init_late,
.init_machine = imx7d_init_machine,
.dt_compat = imx7d_dt_compat,
MACHINE_END
diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index ed8fda4..b444423 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -20,7 +20,6 @@
#include <linux/clk.h>
#include <linux/cpu_pm.h>
-#include <linux/cpufreq-dt.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/io.h>
@@ -29,7 +28,6 @@
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
-#include <linux/pm_opp.h>
#include <linux/resource.h>
#include <linux/slab.h>
#include <linux/smp.h>
@@ -608,86 +606,3 @@
return 0;
}
-
-struct cpufreq_dt_platform_data cpufreq_dt_pd = {
- .independent_clocks = true,
-};
-
-static int __init armada_xp_pmsu_cpufreq_init(void)
-{
- struct device_node *np;
- struct resource res;
- int ret, cpu;
-
- if (!of_machine_is_compatible("marvell,armadaxp"))
- return 0;
-
- /*
- * In order to have proper cpufreq handling, we need to ensure
- * that the Device Tree description of the CPU clock includes
- * the definition of the PMU DFS registers. If not, we do not
- * register the clock notifier and the cpufreq driver. This
- * piece of code is only for compatibility with old Device
- * Trees.
- */
- np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
- if (!np)
- return 0;
-
- ret = of_address_to_resource(np, 1, &res);
- if (ret) {
- pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
- of_node_put(np);
- return 0;
- }
-
- of_node_put(np);
-
- /*
- * For each CPU, this loop registers the operating points
- * supported (which are the nominal CPU frequency and half of
- * it), and registers the clock notifier that will take care
- * of doing the PMSU part of a frequency transition.
- */
- for_each_possible_cpu(cpu) {
- struct device *cpu_dev;
- struct clk *clk;
- int ret;
-
- cpu_dev = get_cpu_device(cpu);
- if (!cpu_dev) {
- pr_err("Cannot get CPU %d\n", cpu);
- continue;
- }
-
- clk = clk_get(cpu_dev, 0);
- if (IS_ERR(clk)) {
- pr_err("Cannot get clock for CPU %d\n", cpu);
- return PTR_ERR(clk);
- }
-
- /*
- * In case of a failure of dev_pm_opp_add(), we don't
- * bother with cleaning up the registered OPP (there's
- * no function to do so), and simply cancel the
- * registration of the cpufreq device.
- */
- ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
- if (ret) {
- clk_put(clk);
- return ret;
- }
-
- ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
- if (ret) {
- clk_put(clk);
- return ret;
- }
- }
-
- platform_device_register_data(NULL, "cpufreq-dt", -1,
- &cpufreq_dt_pd, sizeof(cpufreq_dt_pd));
- return 0;
-}
-
-device_initcall(armada_xp_pmsu_cpufreq_init);
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 58920bc..2f7b11d 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -277,13 +277,10 @@
static inline void omap_init_cpufreq(void)
{
- struct platform_device_info devinfo = { };
+ struct platform_device_info devinfo = { .name = "omap-cpufreq" };
if (!of_have_populated_dt())
- devinfo.name = "omap-cpufreq";
- else
- devinfo.name = "cpufreq-dt";
- platform_device_register_full(&devinfo);
+ platform_device_register_full(&devinfo);
}
static int __init omap2_common_pm_init(void)
diff --git a/arch/arm/mach-rockchip/rockchip.c b/arch/arm/mach-rockchip/rockchip.c
index 3f07cc5..beb71da 100644
--- a/arch/arm/mach-rockchip/rockchip.c
+++ b/arch/arm/mach-rockchip/rockchip.c
@@ -74,7 +74,6 @@
{
rockchip_suspend_init();
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
- platform_device_register_simple("cpufreq-dt", 0, NULL, 0);
}
static const char * const rockchip_board_dt_compat[] = {
diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index a65c80ac..c9ea0e6 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -38,7 +38,6 @@
# PM objects
obj-$(CONFIG_SUSPEND) += suspend.o
-obj-$(CONFIG_CPU_FREQ) += cpufreq.o
obj-$(CONFIG_PM_RCAR) += pm-rcar.o
obj-$(CONFIG_PM_RMOBILE) += pm-rmobile.o
obj-$(CONFIG_ARCH_RCAR_GEN2) += pm-rcar-gen2.o
diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 5464b7a..3b562d8 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h
@@ -25,16 +25,9 @@
static inline void shmobile_smp_apmu_suspend_init(void) { }
#endif
-#ifdef CONFIG_CPU_FREQ
-int shmobile_cpufreq_init(void);
-#else
-static inline int shmobile_cpufreq_init(void) { return 0; }
-#endif
-
static inline void __init shmobile_init_late(void)
{
shmobile_suspend_init();
- shmobile_cpufreq_init();
}
#endif /* __ARCH_MACH_COMMON_H */
diff --git a/arch/arm/mach-shmobile/cpufreq.c b/arch/arm/mach-shmobile/cpufreq.c
deleted file mode 100644
index 634d701..0000000
--- a/arch/arm/mach-shmobile/cpufreq.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * CPUFreq support code for SH-Mobile ARM
- *
- * Copyright (C) 2014 Gaku Inami
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/platform_device.h>
-
-#include "common.h"
-
-int __init shmobile_cpufreq_init(void)
-{
- platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
- return 0;
-}
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index 575195b..65e1817 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -38,6 +38,8 @@
extern void socfpga_sysmgr_init(void);
void socfpga_init_l2_ecc(void);
void socfpga_init_ocram_ecc(void);
+void socfpga_init_arria10_l2_ecc(void);
+void socfpga_init_arria10_ocram_ecc(void);
extern void __iomem *sys_manager_base_addr;
extern void __iomem *rst_manager_base_addr;
diff --git a/arch/arm/mach-socfpga/l2_cache.c b/arch/arm/mach-socfpga/l2_cache.c
index e3907ab..4267c95f 100644
--- a/arch/arm/mach-socfpga/l2_cache.c
+++ b/arch/arm/mach-socfpga/l2_cache.c
@@ -17,6 +17,20 @@
#include <linux/of_platform.h>
#include <linux/of_address.h>
+#include "core.h"
+
+/* A10 System Manager L2 ECC Control register */
+#define A10_MPU_CTRL_L2_ECC_OFST 0x0
+#define A10_MPU_CTRL_L2_ECC_EN BIT(0)
+
+/* A10 System Manager Global IRQ Mask register */
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
+#define A10_SYSMGR_ECC_INTMASK_CLR_L2 BIT(0)
+
+/* A10 System Manager L2 ECC IRQ Clear register */
+#define A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST 0xA8
+#define A10_SYSMGR_MPU_CLEAR_L2_ECC (BIT(31) | BIT(15))
+
void socfpga_init_l2_ecc(void)
{
struct device_node *np;
@@ -39,3 +53,38 @@
writel(0x01, mapped_l2_edac_addr);
iounmap(mapped_l2_edac_addr);
}
+
+void socfpga_init_arria10_l2_ecc(void)
+{
+ struct device_node *np;
+ void __iomem *mapped_l2_edac_addr;
+
+ /* Find the L2 EDAC device tree node */
+ np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-l2-ecc");
+ if (!np) {
+ pr_err("Unable to find socfpga-a10-l2-ecc in dtb\n");
+ return;
+ }
+
+ mapped_l2_edac_addr = of_iomap(np, 0);
+ of_node_put(np);
+ if (!mapped_l2_edac_addr) {
+ pr_err("Unable to find L2 ECC mapping in dtb\n");
+ return;
+ }
+
+ if (!sys_manager_base_addr) {
+ pr_err("System Mananger not mapped for L2 ECC\n");
+ goto exit;
+ }
+ /* Clear any pending IRQs */
+ writel(A10_SYSMGR_MPU_CLEAR_L2_ECC, (sys_manager_base_addr +
+ A10_SYSMGR_MPU_CLEAR_L2_ECC_OFST));
+ /* Enable ECC */
+ writel(A10_SYSMGR_ECC_INTMASK_CLR_L2, sys_manager_base_addr +
+ A10_SYSMGR_ECC_INTMASK_CLR_OFST);
+ writel(A10_MPU_CTRL_L2_ECC_EN, mapped_l2_edac_addr +
+ A10_MPU_CTRL_L2_ECC_OFST);
+exit:
+ iounmap(mapped_l2_edac_addr);
+}
diff --git a/arch/arm/mach-socfpga/ocram.c b/arch/arm/mach-socfpga/ocram.c
index 60ec643..10d6732 100644
--- a/arch/arm/mach-socfpga/ocram.c
+++ b/arch/arm/mach-socfpga/ocram.c
@@ -13,12 +13,15 @@
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/delay.h>
#include <linux/io.h>
#include <linux/genalloc.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
+#include "core.h"
+
#define ALTR_OCRAM_CLEAR_ECC 0x00000018
#define ALTR_OCRAM_ECC_EN 0x00000019
@@ -47,3 +50,133 @@
iounmap(mapped_ocr_edac_addr);
}
+
+/* Arria10 OCRAM Section */
+#define ALTR_A10_ECC_CTRL_OFST 0x08
+#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0))
+#define ALTR_A10_ECC_INITA BIT(16)
+
+#define ALTR_A10_ECC_INITSTAT_OFST 0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST 0x10
+#define ALTR_A10_ECC_SERRINTEN BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST 0x20
+#define ALTR_A10_ECC_SERRPENA BIT(0)
+#define ALTR_A10_ECC_DERRPENA BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \
+ ALTR_A10_ECC_DERRPENA)
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1)
+
+#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000
+
+static inline void ecc_set_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+ u32 value = readl(ioaddr);
+
+ value |= bit_mask;
+ writel(value, ioaddr);
+}
+
+static inline void ecc_clear_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+ u32 value = readl(ioaddr);
+
+ value &= ~bit_mask;
+ writel(value, ioaddr);
+}
+
+static inline int ecc_test_bits(u32 bit_mask, void __iomem *ioaddr)
+{
+ u32 value = readl(ioaddr);
+
+ return (value & bit_mask) ? 1 : 0;
+}
+
+/*
+ * This function uses the memory initialization block in the Arria10 ECC
+ * controller to initialize/clear the entire memory data and ECC data.
+ */
+static int altr_init_memory_port(void __iomem *ioaddr)
+{
+ int limit = ALTR_A10_ECC_INIT_WATCHDOG_10US;
+
+ ecc_set_bits(ALTR_A10_ECC_INITA, (ioaddr + ALTR_A10_ECC_CTRL_OFST));
+ while (limit--) {
+ if (ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA,
+ (ioaddr + ALTR_A10_ECC_INITSTAT_OFST)))
+ break;
+ udelay(1);
+ }
+ if (limit < 0)
+ return -EBUSY;
+
+ /* Clear any pending ECC interrupts */
+ writel(ALTR_A10_ECC_ERRPENA_MASK,
+ (ioaddr + ALTR_A10_ECC_INTSTAT_OFST));
+
+ return 0;
+}
+
+void socfpga_init_arria10_ocram_ecc(void)
+{
+ struct device_node *np;
+ int ret = 0;
+ void __iomem *ecc_block_base;
+
+ if (!sys_manager_base_addr) {
+ pr_err("SOCFPGA: sys-mgr is not initialized\n");
+ return;
+ }
+
+ /* Find the OCRAM EDAC device tree node */
+ np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ocram-ecc");
+ if (!np) {
+ pr_err("Unable to find socfpga-a10-ocram-ecc\n");
+ return;
+ }
+
+ /* Map the ECC Block */
+ ecc_block_base = of_iomap(np, 0);
+ of_node_put(np);
+ if (!ecc_block_base) {
+ pr_err("Unable to map OCRAM ECC block\n");
+ return;
+ }
+
+ /* Disable ECC */
+ writel(ALTR_A10_OCRAM_ECC_EN_CTL,
+ sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_SET_OFST);
+ ecc_clear_bits(ALTR_A10_ECC_SERRINTEN,
+ (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST));
+ ecc_clear_bits(ALTR_A10_OCRAM_ECC_EN_CTL,
+ (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+
+ /* Ensure all writes complete */
+ wmb();
+
+ /* Use HW initialization block to initialize memory for ECC */
+ ret = altr_init_memory_port(ecc_block_base);
+ if (ret) {
+ pr_err("ECC: cannot init OCRAM PORTA memory\n");
+ goto exit;
+ }
+
+ /* Enable ECC */
+ ecc_set_bits(ALTR_A10_OCRAM_ECC_EN_CTL,
+ (ecc_block_base + ALTR_A10_ECC_CTRL_OFST));
+ ecc_set_bits(ALTR_A10_ECC_SERRINTEN,
+ (ecc_block_base + ALTR_A10_ECC_ERRINTEN_OFST));
+ writel(ALTR_A10_OCRAM_ECC_EN_CTL,
+ sys_manager_base_addr + A10_SYSMGR_ECC_INTMASK_CLR_OFST);
+
+ /* Ensure all writes complete */
+ wmb();
+exit:
+ iounmap(ecc_block_base);
+}
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 7e0aad2..dde14f7 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -66,6 +66,16 @@
socfpga_init_ocram_ecc();
}
+static void __init socfpga_arria10_init_irq(void)
+{
+ irqchip_init();
+ socfpga_sysmgr_init();
+ if (IS_ENABLED(CONFIG_EDAC_ALTERA_L2C))
+ socfpga_init_arria10_l2_ecc();
+ if (IS_ENABLED(CONFIG_EDAC_ALTERA_OCRAM))
+ socfpga_init_arria10_ocram_ecc();
+}
+
static void socfpga_cyclone5_restart(enum reboot_mode mode, const char *cmd)
{
u32 temp;
@@ -113,7 +123,7 @@
DT_MACHINE_START(SOCFPGA_A10, "Altera SOCFPGA Arria10")
.l2c_aux_val = 0,
.l2c_aux_mask = ~0,
- .init_irq = socfpga_init_irq,
+ .init_irq = socfpga_arria10_init_irq,
.restart = socfpga_arria10_restart,
.dt_compat = altera_a10_dt_match,
MACHINE_END
diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 3c15619..95dca8c 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -17,11 +17,6 @@
#include <asm/mach/arch.h>
-static void __init sunxi_dt_cpufreq_init(void)
-{
- platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
-}
-
static const char * const sunxi_board_dt_compat[] = {
"allwinner,sun4i-a10",
"allwinner,sun5i-a10s",
@@ -32,7 +27,6 @@
DT_MACHINE_START(SUNXI_DT, "Allwinner sun4i/sun5i Families")
.dt_compat = sunxi_board_dt_compat,
- .init_late = sunxi_dt_cpufreq_init,
MACHINE_END
static const char * const sun6i_board_dt_compat[] = {
@@ -53,7 +47,6 @@
DT_MACHINE_START(SUN6I_DT, "Allwinner sun6i (A31) Family")
.init_time = sun6i_timer_init,
.dt_compat = sun6i_board_dt_compat,
- .init_late = sunxi_dt_cpufreq_init,
MACHINE_END
static const char * const sun7i_board_dt_compat[] = {
@@ -63,7 +56,6 @@
DT_MACHINE_START(SUN7I_DT, "Allwinner sun7i (A20) Family")
.dt_compat = sun7i_board_dt_compat,
- .init_late = sunxi_dt_cpufreq_init,
MACHINE_END
static const char * const sun8i_board_dt_compat[] = {
@@ -77,7 +69,6 @@
DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family")
.init_time = sun6i_timer_init,
.dt_compat = sun8i_board_dt_compat,
- .init_late = sunxi_dt_cpufreq_init,
MACHINE_END
static const char * const sun9i_board_dt_compat[] = {
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 860ffb6..da876d2 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -110,7 +110,6 @@
*/
static void __init zynq_init_machine(void)
{
- struct platform_device_info devinfo = { .name = "cpufreq-dt", };
struct soc_device_attribute *soc_dev_attr;
struct soc_device *soc_dev;
struct device *parent = NULL;
@@ -145,7 +144,6 @@
of_platform_populate(NULL, of_default_bus_match_table, NULL, parent);
platform_device_register(&zynq_cpuidle_device);
- platform_device_register_full(&devinfo);
}
static void __init zynq_timer_init(void)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4f43622..8845c0d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -11,6 +11,7 @@
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
select ARCH_WANT_FRAME_POINTERS
@@ -58,11 +59,14 @@
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARM_SMCCC
select HAVE_BPF_JIT
select HAVE_C_RECORDMCOUNT
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
+ select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
@@ -76,6 +80,7 @@
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
+ select HAVE_MEMBLOCK_NODE_MAP if NUMA
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -89,15 +94,13 @@
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
+ select OF_NUMA if NUMA && OF
select OF_RESERVED_MEM
select PERF_USE_VMALLOC
select POWER_RESET
select POWER_SUPPLY
- select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
- select HAVE_CONTEXT_TRACKING
- select HAVE_ARM_SMCCC
help
ARM 64-bit (AArch64) Linux support.
@@ -546,10 +549,35 @@
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
+# Common NUMA Features
+config NUMA
+ bool "Numa Memory Allocation and Scheduler Support"
+ depends on SMP
+ help
+ Enable NUMA (Non Uniform Memory Access) support.
+
+ The kernel will try to allocate memory used by a CPU on the
+ local memory of the CPU and add some more
+ NUMA awareness to the kernel.
+
+config NODES_SHIFT
+ int "Maximum NUMA Nodes (as a power of 2)"
+ range 1 10
+ default "2"
+ depends on NEED_MULTIPLE_NODES
+ help
+ Specify the maximum number of NUMA Nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+config USE_PERCPU_NUMA_NODE_ID
+ def_bool y
+ depends on NUMA
+
source kernel/Kconfig.preempt
source kernel/Kconfig.hz
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ depends on !HIBERNATION
def_bool y
config ARCH_HAS_HOLES_MEMORYMODEL
@@ -578,9 +606,6 @@
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
-config HAVE_ARCH_TRANSPARENT_HUGEPAGE
- def_bool y
-
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
@@ -953,6 +978,14 @@
source "kernel/power/Kconfig"
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y
+ depends on CPU_PM
+
+config ARCH_HIBERNATION_HEADER
+ def_bool y
+ depends on HIBERNATION
+
config ARCH_SUSPEND_POSSIBLE
def_bool y
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 7e76845..710fde4 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -59,7 +59,7 @@
If in doubt, say Y
config DEBUG_ALIGN_RODATA
- depends on DEBUG_RODATA && ARM64_4K_PAGES
+ depends on DEBUG_RODATA
bool "Align linker sections up to SECTION_SIZE"
help
If this option is enabled, sections that may potentially be marked as
diff --git a/arch/arm64/boot/dts/broadcom/vulcan.dtsi b/arch/arm64/boot/dts/broadcom/vulcan.dtsi
index 85820e2..34e11a9 100644
--- a/arch/arm64/boot/dts/broadcom/vulcan.dtsi
+++ b/arch/arm64/boot/dts/broadcom/vulcan.dtsi
@@ -86,7 +86,7 @@
};
pmu {
- compatible = "arm,armv8-pmuv3";
+ compatible = "brcm,vulcan-pmu", "arm,armv8-pmuv3";
interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_HIGH>; /* PMU overflow */
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index a44ef99..c5e0132 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -264,6 +264,7 @@
CONFIG_PHY_HI6220_USB=y
CONFIG_PHY_XGENE=y
CONFIG_ARM_SCPI_PROTOCOL=y
+CONFIG_ACPI=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_FANOTIFY=y
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 70f7b9e..10b017c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -1,5 +1,5 @@
/*
- * Based on arch/arm/include/asm/assembler.h
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
*
* Copyright (C) 1996-2000 Russell King
* Copyright (C) 2012 ARM Ltd.
@@ -23,22 +23,13 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
/*
- * Stack pushing/popping (register pairs only). Equivalent to store decrement
- * before, load increment after.
- */
- .macro push, xreg1, xreg2
- stp \xreg1, \xreg2, [sp, #-16]!
- .endm
-
- .macro pop, xreg1, xreg2
- ldp \xreg1, \xreg2, [sp], #16
- .endm
-
-/*
* Enable and disable interrupts.
*/
.macro disable_irq
@@ -212,6 +203,102 @@
.endm
/*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+ .macro vma_vm_mm, rd, rn
+ ldr \rd, [\rn, #VMA_VM_MM]
+ .endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ */
+ .macro mmid, rd, rn
+ ldr \rd, [\rn, #MM_CONTEXT_ID]
+ .endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ */
+ .macro dcache_line_size, reg, tmp
+ mrs \tmp, ctr_el0 // read CTR
+ ubfm \tmp, \tmp, #16, #19 // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register.
+ */
+ .macro icache_line_size, reg, tmp
+ mrs \tmp, ctr_el0 // read CTR
+ and \tmp, \tmp, #0xf // cache line size encoding
+ mov \reg, #4 // bytes per word
+ lsl \reg, \reg, \tmp // actual cache line size
+ .endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+ .macro tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+ ldr_l \tmpreg, idmap_t0sz
+ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+ .endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [kaddr, kaddr + size)
+ *
+ * op: operation passed to dc instruction
+ * domain: domain used in dsb instruciton
+ * kaddr: starting virtual address of the region
+ * size: size of the region
+ * Corrupts: kaddr, size, tmp1, tmp2
+ */
+ .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+ dcache_line_size \tmp1, \tmp2
+ add \size, \kaddr, \size
+ sub \tmp2, \tmp1, #1
+ bic \kaddr, \kaddr, \tmp2
+9998: dc \op, \kaddr
+ add \kaddr, \kaddr, \tmp1
+ cmp \kaddr, \size
+ b.lo 9998b
+ dsb \domain
+ .endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+ .macro reset_pmuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx \tmpreg, \tmpreg, #8, #4
+ cmp \tmpreg, #1 // Skip if no PMU present
+ b.lt 9000f
+ msr pmuserenr_el0, xzr // Disable PMU access from EL0
+9000:
+ .endm
+
+/*
+ * copy_page - copy src to dest using temp registers t1-t8
+ */
+ .macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
+9998: ldp \t1, \t2, [\src]
+ ldp \t3, \t4, [\src, #16]
+ ldp \t5, \t6, [\src, #32]
+ ldp \t7, \t8, [\src, #48]
+ add \src, \src, #64
+ stnp \t1, \t2, [\dest]
+ stnp \t3, \t4, [\dest, #16]
+ stnp \t5, \t6, [\dest, #32]
+ stnp \t7, \t8, [\dest, #48]
+ add \dest, \dest, #64
+ tst \src, #(PAGE_SIZE - 1)
+ b.ne 9998b
+ .endm
+
+/*
* Annotate a function as position independent, i.e., safe to be called before
* the kernel virtual mapping is activated.
*/
@@ -233,4 +320,24 @@
.long \sym\()_hi32
.endm
+ /*
+ * mov_q - move an immediate constant into a 64-bit register using
+ * between 2 and 4 movz/movk instructions (depending on the
+ * magnitude and sign of the operand)
+ */
+ .macro mov_q, reg, val
+ .if (((\val) >> 31) == 0 || ((\val) >> 31) == 0x1ffffffff)
+ movz \reg, :abs_g1_s:\val
+ .else
+ .if (((\val) >> 47) == 0 || ((\val) >> 47) == 0x1ffff)
+ movz \reg, :abs_g2_s:\val
+ .else
+ movz \reg, :abs_g3:\val
+ movk \reg, :abs_g2_nc:\val
+ .endif
+ movk \reg, :abs_g1_nc:\val
+ .endif
+ movk \reg, :abs_g0_nc:\val
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b9b6494..224efe7 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -35,8 +35,9 @@
#define ARM64_ALT_PAN_NOT_UAO 10
#define ARM64_HAS_VIRT_HOST_EXTN 11
#define ARM64_WORKAROUND_CAVIUM_27456 12
+#define ARM64_HAS_32BIT_EL0 13
-#define ARM64_NCAPS 13
+#define ARM64_NCAPS 14
#ifndef __ASSEMBLY__
@@ -77,10 +78,17 @@
struct arm64_ftr_bits *ftr_bits;
};
+/* scope of capability check */
+enum {
+ SCOPE_SYSTEM,
+ SCOPE_LOCAL_CPU,
+};
+
struct arm64_cpu_capabilities {
const char *desc;
u16 capability;
- bool (*matches)(const struct arm64_cpu_capabilities *);
+ int def_scope; /* default scope */
+ bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
void (*enable)(void *); /* Called on all active CPUs */
union {
struct { /* To be used for erratum handling only */
@@ -101,6 +109,8 @@
extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+bool this_cpu_has_cap(unsigned int cap);
+
static inline bool cpu_have_feature(unsigned int num)
{
return elf_hwcap & (1UL << num);
@@ -170,12 +180,20 @@
cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
}
+static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
+
+ return val == ID_AA64PFR0_EL0_32BIT_64BIT;
+}
+
void __init setup_cpu_features(void);
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info);
void check_local_cpu_errata(void);
+void verify_local_cpu_errata(void);
void verify_local_cpu_capabilities(void);
u64 read_system_reg(u32 id);
@@ -185,6 +203,11 @@
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
+static inline bool system_supports_32bit_el0(void)
+{
+ return cpus_have_cap(ARM64_HAS_32BIT_EL0);
+}
+
static inline bool system_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 8e88a69..622db3c 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -4,6 +4,7 @@
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/neon.h>
+#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_EFI
@@ -14,32 +15,29 @@
int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
-#define efi_call_virt(f, ...) \
+#define efi_set_mapping_permissions efi_create_mapping
+
+#define arch_efi_call_virt_setup() \
({ \
- efi_##f##_t *__f; \
- efi_status_t __s; \
- \
kernel_neon_begin(); \
efi_virtmap_load(); \
- __f = efi.systab->runtime->f; \
- __s = __f(__VA_ARGS__); \
- efi_virtmap_unload(); \
- kernel_neon_end(); \
- __s; \
})
-#define __efi_call_virt(f, ...) \
+#define arch_efi_call_virt(f, args...) \
({ \
efi_##f##_t *__f; \
- \
- kernel_neon_begin(); \
- efi_virtmap_load(); \
__f = efi.systab->runtime->f; \
- __f(__VA_ARGS__); \
+ __f(args); \
+})
+
+#define arch_efi_call_virt_teardown() \
+({ \
efi_virtmap_unload(); \
kernel_neon_end(); \
})
+#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+
/* arch specific definitions used by the stub code */
/*
@@ -50,7 +48,16 @@
#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */
#define MAX_FDT_OFFSET SZ_512M
-#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
+#define __efi_call_early(f, ...) f(__VA_ARGS__)
+#define efi_is_64bit() (true)
+
+#define alloc_screen_info(x...) &screen_info
+#define free_screen_info(x...)
+
+static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+}
#define EFI_ALLOC_ALIGN SZ_64K
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 24ed037..7a09c48 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -177,7 +177,8 @@
/* AArch32 EABI. */
#define EF_ARM_EABI_MASK 0xff000000
-#define compat_elf_check_arch(x) (((x)->e_machine == EM_ARM) && \
+#define compat_elf_check_arch(x) (system_supports_32bit_el0() && \
+ ((x)->e_machine == EM_ARM) && \
((x)->e_flags & EF_ARM_EABI_MASK))
#define compat_start_thread compat_start_thread
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 5c6375d..7e51d1b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -19,6 +19,7 @@
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
+#include <asm/sparsemem.h>
/*
* The linear mapping and the start of memory are both 2M aligned (per
@@ -86,10 +87,24 @@
* (64k granule), or a multiple that can be mapped using contiguous bits
* in the page tables: 32 * PMD_SIZE (16k granule)
*/
-#ifdef CONFIG_ARM64_64K_PAGES
-#define ARM64_MEMSTART_ALIGN SZ_512M
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ARM64_MEMSTART_SHIFT PUD_SHIFT
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ARM64_MEMSTART_SHIFT (PMD_SHIFT + 5)
#else
-#define ARM64_MEMSTART_ALIGN SZ_1G
+#define ARM64_MEMSTART_SHIFT PMD_SHIFT
+#endif
+
+/*
+ * sparsemem vmemmap imposes an additional requirement on the alignment of
+ * memstart_addr, due to the fact that the base of the vmemmap region
+ * has a direct correspondence, and needs to appear sufficiently aligned
+ * in the virtual address space.
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
+#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS)
+#else
+#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT)
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3f29887..1b3dc9df 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -84,17 +84,6 @@
#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
-/* Hyp System Control Register (SCTLR_EL2) bits */
-#define SCTLR_EL2_EE (1 << 25)
-#define SCTLR_EL2_WXN (1 << 19)
-#define SCTLR_EL2_I (1 << 12)
-#define SCTLR_EL2_SA (1 << 3)
-#define SCTLR_EL2_C (1 << 2)
-#define SCTLR_EL2_A (1 << 1)
-#define SCTLR_EL2_M 1
-#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \
- SCTLR_EL2_SA | SCTLR_EL2_I)
-
/* TCR_EL2 Registers bits */
#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
#define TCR_EL2_TBI (1 << 20)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 40a0a24..7561f63 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -22,6 +22,8 @@
#define ARM_EXCEPTION_IRQ 0
#define ARM_EXCEPTION_TRAP 1
+/* The hyp-stub will return this for any kvm_call_hyp() call */
+#define ARM_EXCEPTION_HYP_GONE 2
#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
@@ -40,6 +42,7 @@
extern char __kvm_hyp_init[];
extern char __kvm_hyp_init_end[];
+extern char __kvm_hyp_reset[];
extern char __kvm_hyp_vector[];
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f5c6bd2..90a8d23 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -46,6 +46,8 @@
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
int kvm_arch_dev_ioctl_check_extension(long ext);
+unsigned long kvm_hyp_reset_entry(void);
+void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start);
struct kvm_arch {
/* The VMID generation used for the virt. memory system */
@@ -352,7 +354,17 @@
hyp_stack_ptr, vector_ptr);
}
-static inline void kvm_arch_hardware_disable(void) {}
+static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr,
+ phys_addr_t phys_idmap_start)
+{
+ /*
+ * Call reset code, and switch back to stub hyp vectors.
+ * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation.
+ */
+ __kvm_call_hyp((void *)kvm_hyp_reset_entry(),
+ boot_pgd_ptr, phys_idmap_start);
+}
+
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 22732a5..e8d39d4 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -109,6 +109,7 @@
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_mmu_get_boot_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
+phys_addr_t kvm_get_idmap_start(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 12f8a00..72a3025 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -40,6 +40,21 @@
#define PCI_IO_SIZE SZ_16M
/*
+ * Log2 of the upper bound of the size of a struct page. Used for sizing
+ * the vmemmap region only, does not affect actual memory footprint.
+ * We don't use sizeof(struct page) directly since taking its size here
+ * requires its definition to be available at this point in the inclusion
+ * chain, and it may not be a power of 2 in the first place.
+ */
+#define STRUCT_PAGE_MAX_SHIFT 6
+
+/*
+ * VMEMMAP_SIZE - allows the whole linear region to be covered by
+ * a struct page array
+ */
+#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT))
+
+/*
* PAGE_OFFSET - the virtual address of the start of the kernel image (top
* (VA_BITS - 1))
* VA_BITS - the maximum number of bits for virtual addresses.
@@ -54,7 +69,8 @@
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (VA_START + KASAN_SHADOW_SIZE)
#define MODULES_VSIZE (SZ_128M)
-#define PCI_IO_END (PAGE_OFFSET - SZ_2M)
+#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE)
+#define PCI_IO_END (VMEMMAP_START - SZ_2M)
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
#define FIXADDR_TOP (PCI_IO_START - SZ_2M)
#define TASK_SIZE_64 (UL(1) << VA_BITS)
@@ -71,6 +87,9 @@
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
+#define KERNEL_START _text
+#define KERNEL_END _end
+
/*
* The size of the KASAN shadow region. This should be 1/8th of the
* size of the entire kernel virtual address space.
@@ -192,9 +211,19 @@
*/
#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET)
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#else
+#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
+#define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
+#define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET))
+#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
+
+#define virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
+ + PHYS_OFFSET) >> PAGE_SHIFT)
+#endif
#endif
#include <asm-generic/memory_model.h>
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 990124a..97b1d8f 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -29,6 +29,7 @@
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
extern void paging_init(void);
+extern void bootmem_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/include/asm/mmzone.h b/arch/arm64/include/asm/mmzone.h
new file mode 100644
index 0000000..a0de9e6
--- /dev/null
+++ b/arch/arm64/include/asm/mmzone.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[(nid)])
+
+#endif /* CONFIG_NUMA */
+#endif /* __ASM_MMZONE_H */
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
new file mode 100644
index 0000000..e9b4f29
--- /dev/null
+++ b/arch/arm64/include/asm/numa.h
@@ -0,0 +1,45 @@
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+
+#ifdef CONFIG_NUMA
+
+/* currently, arm64 implements flat NUMA topology */
+#define parent_node(node) (node)
+
+int __node_distance(int from, int to);
+#define node_distance(a, b) __node_distance(a, b)
+
+extern nodemask_t numa_nodes_parsed __initdata;
+
+/* Mappings between node number and cpus on that node. */
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+void numa_clear_node(unsigned int cpu);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+const struct cpumask *cpumask_of_node(int node);
+#else
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ return node_to_cpumask_map[node];
+}
+#endif
+
+void __init arm64_numa_init(void);
+int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+void __init numa_set_distance(int from, int to, int distance);
+void __init numa_free_distance(void);
+void __init early_map_cpu_to_node(unsigned int cpu, int nid);
+void numa_store_cpu_info(unsigned int cpu);
+
+#else /* CONFIG_NUMA */
+
+static inline void numa_store_cpu_info(unsigned int cpu) { }
+static inline void arm64_numa_init(void) { }
+static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
+
+#endif /* CONFIG_NUMA */
+
+#endif /* __ASM_NUMA_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index ae615b9..17b45f7 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -19,6 +19,8 @@
#ifndef __ASM_PAGE_H
#define __ASM_PAGE_H
+#include <linux/const.h>
+
/* PAGE_SHIFT determines the page size */
/* CONT_SHIFT determines the number of pages which can be tracked together */
#ifdef CONFIG_ARM64_64K_PAGES
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5c25b83..9786f77 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -133,7 +133,6 @@
* Section
*/
#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
-#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 2b1bd7e..69b2fd4 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -27,10 +27,6 @@
typedef u64 pudval_t;
typedef u64 pgdval_t;
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-
/*
* These are used to make use of C type-checking..
*/
@@ -58,34 +54,6 @@
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
-#else /* !STRICT_MM_TYPECHECKS */
-
-typedef pteval_t pte_t;
-#define pte_val(x) (x)
-#define __pte(x) (x)
-
-#if CONFIG_PGTABLE_LEVELS > 2
-typedef pmdval_t pmd_t;
-#define pmd_val(x) (x)
-#define __pmd(x) (x)
-#endif
-
-#if CONFIG_PGTABLE_LEVELS > 3
-typedef pudval_t pud_t;
-#define pud_val(x) (x)
-#define __pud(x) (x)
-#endif
-
-typedef pgdval_t pgd_t;
-#define pgd_val(x) (x)
-#define __pgd(x) (x)
-
-typedef pteval_t pgprot_t;
-#define pgprot_val(x) (x)
-#define __pgprot(x) (x)
-
-#endif /* STRICT_MM_TYPECHECKS */
-
#if CONFIG_PGTABLE_LEVELS == 2
#include <asm-generic/pgtable-nopmd.h>
#elif CONFIG_PGTABLE_LEVELS == 3
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 989fef1..2da46ae 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -24,22 +24,16 @@
#include <asm/pgtable-prot.h>
/*
- * VMALLOC and SPARSEMEM_VMEMMAP ranges.
+ * VMALLOC range.
*
- * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
- * (rounded up to PUD_SIZE).
* VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
- * fixed mappings and modules
+ * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
+ * and fixed mappings
*/
-#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-
#define VMALLOC_START (MODULES_END)
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
-#define VMEMMAP_START (VMALLOC_END + SZ_64K)
-#define vmemmap ((struct page *)VMEMMAP_START - \
- SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT))
+#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
#define FIRST_USER_ADDRESS 0UL
@@ -58,7 +52,7 @@
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
+#define ZERO_PAGE(vaddr) pfn_to_page(PHYS_PFN(__pa(empty_zero_page)))
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
@@ -272,6 +266,21 @@
return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
}
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pte_protnone(pmd_pte(pmd));
+}
+#endif
+
/*
* THP definitions.
*/
@@ -280,15 +289,16 @@
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#define pmd_present(pmd) pte_present(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
-#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
+#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))
+#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
@@ -327,9 +337,8 @@
unsigned long size, pgprot_t vma_prot);
#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_present(pmd) (pmd_val(pmd))
-#define pmd_bad(pmd) (!(pmd_val(pmd) & 2))
+#define pmd_bad(pmd) (!(pmd_val(pmd) & PMD_TABLE_BIT))
#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_TABLE)
@@ -394,7 +403,7 @@
#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!(pud_val(pud) & 2))
+#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
#define pud_present(pud) (pud_val(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -526,6 +535,21 @@
}
#ifdef CONFIG_ARM64_HW_AFDBM
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty)
+{
+ return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+}
+#endif
+
/*
* Atomic pte/pmd modifications.
*/
@@ -578,9 +602,9 @@
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
{
return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp));
}
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 817a067..433e504 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -113,6 +113,17 @@
dsb(ishst);
}
+/*
+ * The calling secondary CPU has detected serious configuration mismatch,
+ * which calls for a kernel panic. Update the boot status and park the calling
+ * CPU.
+ */
+static inline void cpu_panic_kernel(void)
+{
+ update_cpu_boot_status(CPU_PANIC_KERNEL);
+ cpu_park_loop();
+}
+
#endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 59a5b0f1..024d623 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -1,7 +1,8 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
-#define NR_CTX_REGS 11
+#define NR_CTX_REGS 10
+#define NR_CALLEE_SAVED_REGS 12
/*
* struct cpu_suspend_ctx must be 16-byte aligned since it is allocated on
@@ -16,11 +17,34 @@
u64 sp;
} __aligned(16);
-struct sleep_save_sp {
- phys_addr_t *save_ptr_stash;
- phys_addr_t save_ptr_stash_phys;
+/*
+ * Memory to save the cpu state is allocated on the stack by
+ * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter().
+ * This data must survive until cpu_resume() is called.
+ *
+ * This struct desribes the size and the layout of the saved cpu state.
+ * The layout of the callee_saved_regs is defined by the implementation
+ * of __cpu_suspend_enter(), and cpu_resume(). This struct must be passed
+ * in by the caller as __cpu_suspend_enter()'s stack-frame is gone once it
+ * returns, and the data would be subsequently corrupted by the call to the
+ * finisher.
+ */
+struct sleep_stack_data {
+ struct cpu_suspend_ctx system_regs;
+ unsigned long callee_saved_regs[NR_CALLEE_SAVED_REGS];
};
+extern unsigned long *sleep_save_stash;
+
extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
extern void cpu_resume(void);
+int __cpu_suspend_enter(struct sleep_stack_data *state);
+void __cpu_suspend_exit(void);
+void _cpu_resume(void);
+
+int swsusp_arch_suspend(void);
+int swsusp_arch_resume(void);
+int arch_hibernation_header_save(void *addr, unsigned int max_size);
+int arch_hibernation_header_restore(void *addr);
+
#endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 1287416..751e901 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -86,10 +86,21 @@
#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
(!!x)<<8 | 0x1f)
-/* SCTLR_EL1 */
-#define SCTLR_EL1_CP15BEN (0x1 << 5)
-#define SCTLR_EL1_SED (0x1 << 8)
-#define SCTLR_EL1_SPAN (0x1 << 23)
+/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_EE (1 << 25)
+#define SCTLR_ELx_I (1 << 12)
+#define SCTLR_ELx_SA (1 << 3)
+#define SCTLR_ELx_C (1 << 2)
+#define SCTLR_ELx_A (1 << 1)
+#define SCTLR_ELx_M 1
+
+#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
+ SCTLR_ELx_SA | SCTLR_ELx_I)
+
+/* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_SPAN (1 << 23)
+#define SCTLR_EL1_SED (1 << 8)
+#define SCTLR_EL1_CP15BEN (1 << 5)
/* id_aa64isar0 */
@@ -115,6 +126,7 @@
#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
+#define ID_AA64PFR0_EL0_32BIT_64BIT 0x2
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
@@ -145,7 +157,11 @@
#define ID_AA64MMFR1_VMIDBITS_16 2
/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_LVA_SHIFT 16
+#define ID_AA64MMFR2_IESB_SHIFT 12
+#define ID_AA64MMFR2_LSM_SHIFT 8
#define ID_AA64MMFR2_UAO_SHIFT 4
+#define ID_AA64MMFR2_CNP_SHIFT 0
/* id_aa64dfr0 */
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index a3e9d6f..8b57339 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -22,6 +22,16 @@
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
+#ifdef CONFIG_NUMA
+
+struct pci_bus;
+int pcibus_to_node(struct pci_bus *bus);
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+
+#endif /* CONFIG_NUMA */
+
#include <asm-generic/topology.h>
#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 9f22dd6..dcbcf8d 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -18,6 +18,22 @@
#ifndef __ASM__VIRT_H
#define __ASM__VIRT_H
+/*
+ * The arm64 hcall implementation uses x0 to specify the hcall type. A value
+ * less than 0xfff indicates a special hcall, such as get/set vector.
+ * Any other value is used as a pointer to the function to call.
+ */
+
+/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */
+#define HVC_GET_VECTORS 0
+
+/*
+ * HVC_SET_VECTORS - Set the value of the vbar_el2 register.
+ *
+ * @x1: Physical address of the new vector table.
+ */
+#define HVC_SET_VECTORS 1
+
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
@@ -60,6 +76,12 @@
return el == CurrentEL_EL2;
}
+#ifdef CONFIG_ARM64_VHE
+extern void verify_cpu_run_el(void);
+#else
+static inline void verify_cpu_run_el(void) {}
+#endif
+
/* The section containing the hypervisor text */
extern char __hyp_text_start[];
extern char __hyp_text_end[];
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 3793003..2173149 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -45,6 +45,7 @@
arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o
arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index d1ce8e2..3e4f1a4 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -42,6 +42,7 @@
EXPORT_SYMBOL(acpi_pci_disabled);
static bool param_acpi_off __initdata;
+static bool param_acpi_on __initdata;
static bool param_acpi_force __initdata;
static int __init parse_acpi(char *arg)
@@ -52,6 +53,8 @@
/* "acpi=off" disables both ACPI table parsing and interpreter */
if (strcmp(arg, "off") == 0)
param_acpi_off = true;
+ else if (strcmp(arg, "on") == 0) /* prefer ACPI over DT */
+ param_acpi_on = true;
else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
param_acpi_force = true;
else
@@ -66,12 +69,24 @@
void *data)
{
/*
- * Return 1 as soon as we encounter a node at depth 1 that is
- * not the /chosen node.
+ * Ignore anything not directly under the root node; we'll
+ * catch its parent instead.
*/
- if (depth == 1 && (strcmp(uname, "chosen") != 0))
- return 1;
- return 0;
+ if (depth != 1)
+ return 0;
+
+ if (strcmp(uname, "chosen") == 0)
+ return 0;
+
+ if (strcmp(uname, "hypervisor") == 0 &&
+ of_flat_dt_is_compatible(node, "xen,xen"))
+ return 0;
+
+ /*
+ * This node at depth 1 is neither a chosen node nor a xen node,
+ * which we do not expect.
+ */
+ return 1;
}
/*
@@ -184,11 +199,13 @@
/*
* Enable ACPI instead of device tree unless
* - ACPI has been disabled explicitly (acpi=off), or
- * - the device tree is not empty (it has more than just a /chosen node)
- * and ACPI has not been force enabled (acpi=force)
+ * - the device tree is not empty (it has more than just a /chosen node,
+ * and a /hypervisor node when running on Xen)
+ * and ACPI has not been [force] enabled (acpi=on|force)
*/
if (param_acpi_off ||
- (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+ (!param_acpi_on && !param_acpi_force &&
+ of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
return;
/*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 3ae6b31..f8e5d47 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
+#include <linux/suspend.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -119,11 +120,14 @@
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask));
DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff));
- DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp));
- DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
- DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
+ DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs));
+ DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs));
#endif
DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0));
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
+ BLANK();
+ DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
+ DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
+ DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
return 0;
}
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 06afd04..d427894 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -22,14 +22,16 @@
#include <asm/cpufeature.h>
static bool __maybe_unused
-is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
{
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
entry->midr_range_min,
entry->midr_range_max);
}
#define MIDR_RANGE(model, min, max) \
+ .def_scope = SCOPE_LOCAL_CPU, \
.matches = is_affected_midr_range, \
.midr_model = model, \
.midr_range_min = min, \
@@ -101,6 +103,26 @@
}
};
+/*
+ * The CPU Errata work arounds are detected and applied at boot time
+ * and the related information is freed soon after. If the new CPU requires
+ * an errata not detected at boot, fail this CPU.
+ */
+void verify_local_cpu_errata(void)
+{
+ const struct arm64_cpu_capabilities *caps = arm64_errata;
+
+ for (; caps->matches; caps++)
+ if (!cpus_have_cap(caps->capability) &&
+ caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ pr_crit("CPU%d: Requires work around for %s, not detected"
+ " at boot time\n",
+ smp_processor_id(),
+ caps->desc ? : "an erratum");
+ cpu_die_early();
+ }
+}
+
void check_local_cpu_errata(void)
{
update_cpu_capabilities(arm64_errata, "enabling workaround for");
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 943f514..811773d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -71,7 +71,8 @@
/* meta feature for alternatives */
static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
+
static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
@@ -130,7 +131,11 @@
};
static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -435,22 +440,26 @@
init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
- init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
- init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
- init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
- init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
- init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
- init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
- init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
- init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
- init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
- init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
- init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
- init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
- init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
- init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
- init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
- init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+ init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+ init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+ init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+ init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+ init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+ init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+ init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+ init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+ init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+ init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+ init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+ init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+ init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+ init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+ init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+ }
+
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -555,47 +564,51 @@
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
/*
- * If we have AArch32, we care about 32-bit features for compat. These
- * registers should be RES0 otherwise.
+ * If we have AArch32, we care about 32-bit features for compat.
+ * If the system doesn't support AArch32, don't update them.
*/
- taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+ if (id_aa64pfr0_32bit_el0(read_system_reg(SYS_ID_AA64PFR0_EL1)) &&
+ id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+
+ taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
info->reg_id_dfr0, boot->reg_id_dfr0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
info->reg_id_isar0, boot->reg_id_isar0);
- taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
info->reg_id_isar1, boot->reg_id_isar1);
- taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
info->reg_id_isar2, boot->reg_id_isar2);
- taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
info->reg_id_isar3, boot->reg_id_isar3);
- taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
info->reg_id_isar4, boot->reg_id_isar4);
- taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
info->reg_id_isar5, boot->reg_id_isar5);
- /*
- * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
- * ACTLR formats could differ across CPUs and therefore would have to
- * be trapped for virtualization anyway.
- */
- taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+ /*
+ * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+ * ACTLR formats could differ across CPUs and therefore would have to
+ * be trapped for virtualization anyway.
+ */
+ taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
info->reg_id_mmfr0, boot->reg_id_mmfr0);
- taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
info->reg_id_mmfr1, boot->reg_id_mmfr1);
- taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
info->reg_id_mmfr2, boot->reg_id_mmfr2);
- taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
info->reg_id_mmfr3, boot->reg_id_mmfr3);
- taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
info->reg_id_pfr0, boot->reg_id_pfr0);
- taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
info->reg_id_pfr1, boot->reg_id_pfr1);
- taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
info->reg_mvfr0, boot->reg_mvfr0);
- taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
info->reg_mvfr1, boot->reg_mvfr1);
- taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+ taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
info->reg_mvfr2, boot->reg_mvfr2);
+ }
/*
* Mismatched CPU features are a recipe for disaster. Don't even
@@ -614,254 +627,9 @@
return regp->sys_val;
}
-#include <linux/irqchip/arm-gic-v3.h>
-
-static bool
-feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
-{
- int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
-
- return val >= entry->min_field_value;
-}
-
-static bool
-has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
-{
- u64 val;
-
- val = read_system_reg(entry->sys_reg);
- return feature_matches(val, entry);
-}
-
-static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
-{
- bool has_sre;
-
- if (!has_cpuid_feature(entry))
- return false;
-
- has_sre = gic_enable_sre();
- if (!has_sre)
- pr_warn_once("%s present but disabled by higher exception level\n",
- entry->desc);
-
- return has_sre;
-}
-
-static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
-{
- u32 midr = read_cpuid_id();
- u32 rv_min, rv_max;
-
- /* Cavium ThunderX pass 1.x and 2.x */
- rv_min = 0;
- rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
-
- return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
-}
-
-static bool runs_at_el2(const struct arm64_cpu_capabilities *entry)
-{
- return is_kernel_in_hyp_mode();
-}
-
-static const struct arm64_cpu_capabilities arm64_features[] = {
- {
- .desc = "GIC system register CPU interface",
- .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
- .matches = has_useable_gicv3_cpuif,
- .sys_reg = SYS_ID_AA64PFR0_EL1,
- .field_pos = ID_AA64PFR0_GIC_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
- },
-#ifdef CONFIG_ARM64_PAN
- {
- .desc = "Privileged Access Never",
- .capability = ARM64_HAS_PAN,
- .matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR1_EL1,
- .field_pos = ID_AA64MMFR1_PAN_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 1,
- .enable = cpu_enable_pan,
- },
-#endif /* CONFIG_ARM64_PAN */
-#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
- {
- .desc = "LSE atomic instructions",
- .capability = ARM64_HAS_LSE_ATOMICS,
- .matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64ISAR0_EL1,
- .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
- .sign = FTR_UNSIGNED,
- .min_field_value = 2,
- },
-#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
- {
- .desc = "Software prefetching using PRFM",
- .capability = ARM64_HAS_NO_HW_PREFETCH,
- .matches = has_no_hw_prefetch,
- },
-#ifdef CONFIG_ARM64_UAO
- {
- .desc = "User Access Override",
- .capability = ARM64_HAS_UAO,
- .matches = has_cpuid_feature,
- .sys_reg = SYS_ID_AA64MMFR2_EL1,
- .field_pos = ID_AA64MMFR2_UAO_SHIFT,
- .min_field_value = 1,
- .enable = cpu_enable_uao,
- },
-#endif /* CONFIG_ARM64_UAO */
-#ifdef CONFIG_ARM64_PAN
- {
- .capability = ARM64_ALT_PAN_NOT_UAO,
- .matches = cpufeature_pan_not_uao,
- },
-#endif /* CONFIG_ARM64_PAN */
- {
- .desc = "Virtualization Host Extensions",
- .capability = ARM64_HAS_VIRT_HOST_EXTN,
- .matches = runs_at_el2,
- },
- {},
-};
-
-#define HWCAP_CAP(reg, field, s, min_value, type, cap) \
- { \
- .desc = #cap, \
- .matches = has_cpuid_feature, \
- .sys_reg = reg, \
- .field_pos = field, \
- .sign = s, \
- .min_field_value = min_value, \
- .hwcap_type = type, \
- .hwcap = cap, \
- }
-
-static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
-#ifdef CONFIG_COMPAT
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
- HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
-#endif
- {},
-};
-
-static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
-{
- switch (cap->hwcap_type) {
- case CAP_HWCAP:
- elf_hwcap |= cap->hwcap;
- break;
-#ifdef CONFIG_COMPAT
- case CAP_COMPAT_HWCAP:
- compat_elf_hwcap |= (u32)cap->hwcap;
- break;
- case CAP_COMPAT_HWCAP2:
- compat_elf_hwcap2 |= (u32)cap->hwcap;
- break;
-#endif
- default:
- WARN_ON(1);
- break;
- }
-}
-
-/* Check if we have a particular HWCAP enabled */
-static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
-{
- bool rc;
-
- switch (cap->hwcap_type) {
- case CAP_HWCAP:
- rc = (elf_hwcap & cap->hwcap) != 0;
- break;
-#ifdef CONFIG_COMPAT
- case CAP_COMPAT_HWCAP:
- rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
- break;
- case CAP_COMPAT_HWCAP2:
- rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
- break;
-#endif
- default:
- WARN_ON(1);
- rc = false;
- }
-
- return rc;
-}
-
-static void __init setup_cpu_hwcaps(void)
-{
- int i;
- const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
-
- for (i = 0; hwcaps[i].matches; i++)
- if (hwcaps[i].matches(&hwcaps[i]))
- cap_set_hwcap(&hwcaps[i]);
-}
-
-void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
- const char *info)
-{
- int i;
-
- for (i = 0; caps[i].matches; i++) {
- if (!caps[i].matches(&caps[i]))
- continue;
-
- if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
- pr_info("%s %s\n", info, caps[i].desc);
- cpus_set_cap(caps[i].capability);
- }
-}
-
-/*
- * Run through the enabled capabilities and enable() it on all active
- * CPUs
- */
-static void __init
-enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
-{
- int i;
-
- for (i = 0; caps[i].matches; i++)
- if (caps[i].enable && cpus_have_cap(caps[i].capability))
- on_each_cpu(caps[i].enable, NULL, true);
-}
-
-/*
- * Flag to indicate if we have computed the system wide
- * capabilities based on the boot time active CPUs. This
- * will be used to determine if a new booting CPU should
- * go through the verification process to make sure that it
- * supports the system capabilities, without using a hotplug
- * notifier.
- */
-static bool sys_caps_initialised;
-
-static inline void set_sys_caps_initialised(void)
-{
- sys_caps_initialised = true;
-}
-
/*
* __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ * Read the system register on the current CPU
*/
static u64 __raw_read_system_reg(u32 sys_id)
{
@@ -902,15 +670,314 @@
}
}
+#include <linux/irqchip/arm-gic-v3.h>
+
+static bool
+feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
+{
+ int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign);
+
+ return val >= entry->min_field_value;
+}
+
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u64 val;
+
+ WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
+ if (scope == SCOPE_SYSTEM)
+ val = read_system_reg(entry->sys_reg);
+ else
+ val = __raw_read_system_reg(entry->sys_reg);
+
+ return feature_matches(val, entry);
+}
+
+static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ bool has_sre;
+
+ if (!has_cpuid_feature(entry, scope))
+ return false;
+
+ has_sre = gic_enable_sre();
+ if (!has_sre)
+ pr_warn_once("%s present but disabled by higher exception level\n",
+ entry->desc);
+
+ return has_sre;
+}
+
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ u32 midr = read_cpuid_id();
+ u32 rv_min, rv_max;
+
+ /* Cavium ThunderX pass 1.x and 2.x */
+ rv_min = 0;
+ rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+ return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
+static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ return is_kernel_in_hyp_mode();
+}
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+ {
+ .desc = "GIC system register CPU interface",
+ .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_useable_gicv3_cpuif,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .field_pos = ID_AA64PFR0_GIC_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = 1,
+ },
+#ifdef CONFIG_ARM64_PAN
+ {
+ .desc = "Privileged Access Never",
+ .capability = ARM64_HAS_PAN,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR1_EL1,
+ .field_pos = ID_AA64MMFR1_PAN_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = 1,
+ .enable = cpu_enable_pan,
+ },
+#endif /* CONFIG_ARM64_PAN */
+#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS)
+ {
+ .desc = "LSE atomic instructions",
+ .capability = ARM64_HAS_LSE_ATOMICS,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR0_EL1,
+ .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = 2,
+ },
+#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+ {
+ .desc = "Software prefetching using PRFM",
+ .capability = ARM64_HAS_NO_HW_PREFETCH,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_no_hw_prefetch,
+ },
+#ifdef CONFIG_ARM64_UAO
+ {
+ .desc = "User Access Override",
+ .capability = ARM64_HAS_UAO,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64MMFR2_EL1,
+ .field_pos = ID_AA64MMFR2_UAO_SHIFT,
+ .min_field_value = 1,
+ .enable = cpu_enable_uao,
+ },
+#endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+ {
+ .capability = ARM64_ALT_PAN_NOT_UAO,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = cpufeature_pan_not_uao,
+ },
+#endif /* CONFIG_ARM64_PAN */
+ {
+ .desc = "Virtualization Host Extensions",
+ .capability = ARM64_HAS_VIRT_HOST_EXTN,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = runs_at_el2,
+ },
+ {
+ .desc = "32-bit EL0 Support",
+ .capability = ARM64_HAS_32BIT_EL0,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR0_EL0_SHIFT,
+ .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+ },
+ {},
+};
+
+#define HWCAP_CAP(reg, field, s, min_value, type, cap) \
+ { \
+ .desc = #cap, \
+ .def_scope = SCOPE_SYSTEM, \
+ .matches = has_cpuid_feature, \
+ .sys_reg = reg, \
+ .field_pos = field, \
+ .sign = s, \
+ .min_field_value = min_value, \
+ .hwcap_type = type, \
+ .hwcap = cap, \
+ }
+
+static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+ {},
+};
+
+static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
+#ifdef CONFIG_COMPAT
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+ HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+#endif
+ {},
+};
+
+static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+ switch (cap->hwcap_type) {
+ case CAP_HWCAP:
+ elf_hwcap |= cap->hwcap;
+ break;
+#ifdef CONFIG_COMPAT
+ case CAP_COMPAT_HWCAP:
+ compat_elf_hwcap |= (u32)cap->hwcap;
+ break;
+ case CAP_COMPAT_HWCAP2:
+ compat_elf_hwcap2 |= (u32)cap->hwcap;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+
+/* Check if we have a particular HWCAP enabled */
+static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+ bool rc;
+
+ switch (cap->hwcap_type) {
+ case CAP_HWCAP:
+ rc = (elf_hwcap & cap->hwcap) != 0;
+ break;
+#ifdef CONFIG_COMPAT
+ case CAP_COMPAT_HWCAP:
+ rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
+ break;
+ case CAP_COMPAT_HWCAP2:
+ rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ rc = false;
+ }
+
+ return rc;
+}
+
+static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
+{
+ for (; hwcaps->matches; hwcaps++)
+ if (hwcaps->matches(hwcaps, hwcaps->def_scope))
+ cap_set_elf_hwcap(hwcaps);
+}
+
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+ const char *info)
+{
+ for (; caps->matches; caps++) {
+ if (!caps->matches(caps, caps->def_scope))
+ continue;
+
+ if (!cpus_have_cap(caps->capability) && caps->desc)
+ pr_info("%s %s\n", info, caps->desc);
+ cpus_set_cap(caps->capability);
+ }
+}
+
+/*
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs
+ */
+static void __init
+enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+{
+ for (; caps->matches; caps++)
+ if (caps->enable && cpus_have_cap(caps->capability))
+ on_each_cpu(caps->enable, NULL, true);
+}
+
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+ sys_caps_initialised = true;
+}
+
/*
* Check for CPU features that are used in early boot
* based on the Boot CPU value.
*/
static void check_early_cpu_features(void)
{
+ verify_cpu_run_el();
verify_cpu_asid_bits();
}
+static void
+verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+{
+
+ for (; caps->matches; caps++)
+ if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ pr_crit("CPU%d: missing HWCAP: %s\n",
+ smp_processor_id(), caps->desc);
+ cpu_die_early();
+ }
+}
+
+static void
+verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
+{
+ for (; caps->matches; caps++) {
+ if (!cpus_have_cap(caps->capability))
+ continue;
+ /*
+ * If the new CPU misses an advertised feature, we cannot proceed
+ * further, park the cpu.
+ */
+ if (!caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ pr_crit("CPU%d: missing feature: %s\n",
+ smp_processor_id(), caps->desc);
+ cpu_die_early();
+ }
+ if (caps->enable)
+ caps->enable(NULL);
+ }
+}
+
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
@@ -921,8 +988,6 @@
*/
void verify_local_cpu_capabilities(void)
{
- int i;
- const struct arm64_cpu_capabilities *caps;
check_early_cpu_features();
@@ -933,32 +998,11 @@
if (!sys_caps_initialised)
return;
- caps = arm64_features;
- for (i = 0; caps[i].matches; i++) {
- if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
- continue;
- /*
- * If the new CPU misses an advertised feature, we cannot proceed
- * further, park the cpu.
- */
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
- pr_crit("CPU%d: missing feature: %s\n",
- smp_processor_id(), caps[i].desc);
- cpu_die_early();
- }
- if (caps[i].enable)
- caps[i].enable(NULL);
- }
-
- for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
- if (!cpus_have_hwcap(&caps[i]))
- continue;
- if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) {
- pr_crit("CPU%d: missing HWCAP: %s\n",
- smp_processor_id(), caps[i].desc);
- cpu_die_early();
- }
- }
+ verify_local_cpu_errata();
+ verify_local_cpu_features(arm64_features);
+ verify_local_elf_hwcaps(arm64_elf_hwcaps);
+ if (system_supports_32bit_el0())
+ verify_local_elf_hwcaps(compat_elf_hwcaps);
}
static void __init setup_feature_capabilities(void)
@@ -967,6 +1011,24 @@
enable_cpu_capabilities(arm64_features);
}
+/*
+ * Check if the current CPU has a given feature capability.
+ * Should be called from non-preemptible context.
+ */
+bool this_cpu_has_cap(unsigned int cap)
+{
+ const struct arm64_cpu_capabilities *caps;
+
+ if (WARN_ON(preemptible()))
+ return false;
+
+ for (caps = arm64_features; caps->desc; caps++)
+ if (caps->capability == cap && caps->matches)
+ return caps->matches(caps, SCOPE_LOCAL_CPU);
+
+ return false;
+}
+
void __init setup_cpu_features(void)
{
u32 cwg;
@@ -974,7 +1036,10 @@
/* Set the CPU feature capabilies */
setup_feature_capabilities();
- setup_cpu_hwcaps();
+ setup_elf_hwcaps(arm64_elf_hwcaps);
+
+ if (system_supports_32bit_el0())
+ setup_elf_hwcaps(compat_elf_hwcaps);
/* Advertise that we have computed the system capabilities */
set_sys_caps_initialised();
@@ -993,7 +1058,7 @@
}
static bool __maybe_unused
-cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 9047cab6..e11857f 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -19,7 +19,8 @@
{
int ret = -EOPNOTSUPP;
- if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
+ if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_suspend &&
+ cpu_ops[cpu]->cpu_init_idle)
ret = cpu_ops[cpu]->cpu_init_idle(cpu);
return ret;
@@ -36,11 +37,5 @@
{
int cpu = smp_processor_id();
- /*
- * If cpu_ops have not been registered or suspend
- * has not been initialized, cpu_suspend call fails early.
- */
- if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
- return -EOPNOTSUPP;
return cpu_ops[cpu]->cpu_suspend(index);
}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 84c8684..3808470 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -87,7 +87,8 @@
"idivt",
"vfpd32",
"lpae",
- "evtstrm"
+ "evtstrm",
+ NULL
};
static const char *const compat_hwcap2_str[] = {
@@ -216,23 +217,26 @@
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
- info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
- info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
- info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
- info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
- info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
- info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
- info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
- info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
- info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
- info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
- info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
- info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
- info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+ /* Update the 32bit ID registers only if AArch32 is implemented */
+ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+ info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+ info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+ info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+ info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+ info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+ info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+ info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+ info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+ info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+ info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+ info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+ info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+ info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
- info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
- info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
- info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+ info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+ info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+ info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+ }
cpuinfo_detect_icache_policy(info);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index c45f296..4fbf3c5 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -135,9 +135,8 @@
static int os_lock_notify(struct notifier_block *self,
unsigned long action, void *data)
{
- int cpu = (unsigned long)data;
if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
- smp_call_function_single(cpu, clear_os_lock, NULL, 1);
+ clear_os_lock(NULL);
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index cae3112..e88c064 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -62,7 +62,7 @@
*/
mov x20, x0 // DTB address
ldr x0, [sp, #16] // relocated _text address
- movz x21, #:abs_g0:stext_offset
+ ldr w21, =stext_offset
add x21, x0, x21
/*
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index b6abc85..78f5248 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -17,22 +17,51 @@
#include <asm/efi.h>
+/*
+ * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
+ * executable, everything else can be mapped with the XN bits
+ * set. Also take the new (optional) RO/XP bits into account.
+ */
+static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
+{
+ u64 attr = md->attribute;
+ u32 type = md->type;
+
+ if (type == EFI_MEMORY_MAPPED_IO)
+ return PROT_DEVICE_nGnRE;
+
+ if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr),
+ "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?"))
+ /*
+ * If the region is not aligned to the page size of the OS, we
+ * can not use strict permissions, since that would also affect
+ * the mapping attributes of the adjacent regions.
+ */
+ return pgprot_val(PAGE_KERNEL_EXEC);
+
+ /* R-- */
+ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
+ (EFI_MEMORY_XP | EFI_MEMORY_RO))
+ return pgprot_val(PAGE_KERNEL_RO);
+
+ /* R-X */
+ if (attr & EFI_MEMORY_RO)
+ return pgprot_val(PAGE_KERNEL_ROX);
+
+ /* RW- */
+ if (attr & EFI_MEMORY_XP || type != EFI_RUNTIME_SERVICES_CODE)
+ return pgprot_val(PAGE_KERNEL);
+
+ /* RWX */
+ return pgprot_val(PAGE_KERNEL_EXEC);
+}
+
+/* we will fill this structure from the stub, so don't put it in .bss */
+struct screen_info screen_info __section(.data);
+
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
- pteval_t prot_val;
-
- /*
- * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
- * executable, everything else can be mapped with the XN bits
- * set.
- */
- if ((md->attribute & EFI_MEMORY_WB) == 0)
- prot_val = PROT_DEVICE_nGnRE;
- else if (md->type == EFI_RUNTIME_SERVICES_CODE ||
- !PAGE_ALIGNED(md->phys_addr))
- prot_val = pgprot_val(PAGE_KERNEL_EXEC);
- else
- prot_val = pgprot_val(PAGE_KERNEL);
+ pteval_t prot_val = create_mapping_protection(md);
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 85da0f5..2c6e598 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -25,6 +25,7 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/boot.h>
#include <asm/ptrace.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
@@ -51,9 +52,6 @@
#error TEXT_OFFSET must be less than 2MB
#endif
-#define KERNEL_START _text
-#define KERNEL_END _end
-
/*
* Kernel startup entry point.
* ---------------------------
@@ -102,8 +100,6 @@
#endif
#ifdef CONFIG_EFI
- .globl __efistub_stext_offset
- .set __efistub_stext_offset, stext - _head
.align 3
pe_header:
.ascii "PE"
@@ -123,11 +119,11 @@
.short 0x20b // PE32+ format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
- .long _end - stext // SizeOfCode
+ .long _end - efi_header_end // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long __efistub_entry - _head // AddressOfEntryPoint
- .long __efistub_stext_offset // BaseOfCode
+ .long efi_header_end - _head // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
@@ -144,7 +140,7 @@
.long _end - _head // SizeOfImage
// Everything before the kernel image is considered part of the header
- .long __efistub_stext_offset // SizeOfHeaders
+ .long efi_header_end - _head // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
@@ -188,10 +184,10 @@
.byte 0
.byte 0
.byte 0 // end of 0 padding of section name
- .long _end - stext // VirtualSize
- .long __efistub_stext_offset // VirtualAddress
- .long _edata - stext // SizeOfRawData
- .long __efistub_stext_offset // PointerToRawData
+ .long _end - efi_header_end // VirtualSize
+ .long efi_header_end - _head // VirtualAddress
+ .long _edata - efi_header_end // SizeOfRawData
+ .long efi_header_end - _head // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
@@ -200,20 +196,23 @@
.long 0xe0500020 // Characteristics (section flags)
/*
- * EFI will load stext onwards at the 4k section alignment
+ * EFI will load .text onwards at the 4k section alignment
* described in the PE/COFF header. To ensure that instruction
* sequences using an adrp and a :lo12: immediate will function
- * correctly at this alignment, we must ensure that stext is
+ * correctly at this alignment, we must ensure that .text is
* placed at a 4k boundary in the Image to begin with.
*/
.align 12
+efi_header_end:
#endif
+ __INIT
+
ENTRY(stext)
bl preserve_boot_args
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
- mov x23, xzr // KASLR offset, defaults to 0
adrp x24, __PHYS_OFFSET
+ and x23, x24, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
bl __create_page_tables // x25=TTBR0, x26=TTBR1
/*
@@ -222,13 +221,11 @@
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
- ldr x27, 0f // address to jump to after
+ bl __cpu_setup // initialise processor
+ adr_l x27, __primary_switch // address to jump to after
// MMU has been enabled
- adr_l lr, __enable_mmu // return (PIC) address
- b __cpu_setup // initialise processor
+ b __enable_mmu
ENDPROC(stext)
- .align 3
-0: .quad __mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
@@ -338,7 +335,7 @@
cmp x0, x6
b.lo 1b
- ldr x7, =SWAPPER_MM_MMUFLAGS
+ mov x7, SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
@@ -394,12 +391,13 @@
* Map the kernel image (starting with PHYS_OFFSET).
*/
mov x0, x26 // swapper_pg_dir
- ldr x5, =KIMAGE_VADDR
+ mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6
- ldr w6, kernel_img_size
- add x6, x6, x5
- mov x3, x24 // phys offset
+ adrp x6, _end // runtime __pa(_end)
+ adrp x3, _text // runtime __pa(_text)
+ sub x6, x6, x3 // _end - _text
+ add x6, x6, x5 // runtime __va(_end)
create_block_map x0, x7, x3, x5, x6
/*
@@ -414,16 +412,13 @@
ret x28
ENDPROC(__create_page_tables)
-
-kernel_img_size:
- .long _end - (_head - TEXT_OFFSET)
.ltorg
/*
* The following fragment of code is executed with the MMU enabled.
*/
.set initial_sp, init_thread_union + THREAD_START_SP
-__mmap_switched:
+__primary_switched:
mov x28, lr // preserve LR
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
@@ -437,44 +432,6 @@
bl __pi_memset
dsb ishst // Make zero page visible to PTW
-#ifdef CONFIG_RELOCATABLE
-
- /*
- * Iterate over each entry in the relocation table, and apply the
- * relocations in place.
- */
- adr_l x8, __dynsym_start // start of symbol table
- adr_l x9, __reloc_start // start of reloc table
- adr_l x10, __reloc_end // end of reloc table
-
-0: cmp x9, x10
- b.hs 2f
- ldp x11, x12, [x9], #24
- ldr x13, [x9, #-8]
- cmp w12, #R_AARCH64_RELATIVE
- b.ne 1f
- add x13, x13, x23 // relocate
- str x13, [x11, x23]
- b 0b
-
-1: cmp w12, #R_AARCH64_ABS64
- b.ne 0b
- add x12, x12, x12, lsl #1 // symtab offset: 24x top word
- add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
- ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
- ldr x15, [x12, #8] // Elf64_Sym::st_value
- cmp w14, #-0xf // SHN_ABS (0xfff1) ?
- add x14, x15, x23 // relocate
- csel x15, x14, x15, ne
- add x15, x13, x15
- str x15, [x11, x23]
- b 0b
-
-2: adr_l x8, kimage_vaddr // make relocated kimage_vaddr
- dc cvac, x8 // value visible to secondaries
- dsb sy // with MMU off
-#endif
-
adr_l sp, initial_sp, x4
mov x4, sp
and x4, x4, #~(THREAD_SIZE - 1)
@@ -490,17 +447,19 @@
bl kasan_early_init
#endif
#ifdef CONFIG_RANDOMIZE_BASE
- cbnz x23, 0f // already running randomized?
+ tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
+ b.ne 0f
mov x0, x21 // pass FDT address in x0
+ mov x1, x23 // pass modulo offset in x1
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
- mov x23, x0 // record KASLR offset
+ orr x23, x23, x0 // record KASLR offset
ret x28 // we must enable KASLR, return
// to __enable_mmu()
0:
#endif
b start_kernel
-ENDPROC(__mmap_switched)
+ENDPROC(__primary_switched)
/*
* end early head section, begin head code that is also used for
@@ -650,7 +609,7 @@
* Sets the __boot_cpu_mode flag depending on the CPU boot mode passed
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
-ENTRY(set_cpu_boot_mode_flag)
+set_cpu_boot_mode_flag:
adr_l x1, __boot_cpu_mode
cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f
@@ -683,7 +642,7 @@
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
- ldr x1, =MPIDR_HWID_BITMASK
+ mov_q x1, MPIDR_HWID_BITMASK
and x0, x0, x1
adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
@@ -703,7 +662,7 @@
b secondary_startup
ENDPROC(secondary_entry)
-ENTRY(secondary_startup)
+secondary_startup:
/*
* Common entry point for secondary CPUs.
*/
@@ -711,14 +670,11 @@
adrp x26, swapper_pg_dir
bl __cpu_setup // initialise processor
- ldr x8, kimage_vaddr
- ldr w9, 0f
- sub x27, x8, w9, sxtw // address to jump to after enabling the MMU
+ adr_l x27, __secondary_switch // address to jump to after enabling the MMU
b __enable_mmu
ENDPROC(secondary_startup)
-0: .long (_text - TEXT_OFFSET) - __secondary_switched
-ENTRY(__secondary_switched)
+__secondary_switched:
adr_l x5, vectors
msr vbar_el1, x5
isb
@@ -768,7 +724,7 @@
* If it isn't, park the CPU
*/
.section ".idmap.text", "ax"
-__enable_mmu:
+ENTRY(__enable_mmu)
mrs x22, sctlr_el1 // preserve old SCTLR_EL1 value
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
@@ -806,7 +762,6 @@
ic iallu // flush instructions fetched
dsb nsh // via old mapping
isb
- add x27, x27, x23 // relocated __mmap_switched
#endif
br x27
ENDPROC(__enable_mmu)
@@ -819,3 +774,53 @@
wfi
b 1b
ENDPROC(__no_granule_support)
+
+__primary_switch:
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Iterate over each entry in the relocation table, and apply the
+ * relocations in place.
+ */
+ ldr w8, =__dynsym_offset // offset to symbol table
+ ldr w9, =__rela_offset // offset to reloc table
+ ldr w10, =__rela_size // size of reloc table
+
+ mov_q x11, KIMAGE_VADDR // default virtual offset
+ add x11, x11, x23 // actual virtual offset
+ add x8, x8, x11 // __va(.dynsym)
+ add x9, x9, x11 // __va(.rela)
+ add x10, x9, x10 // __va(.rela) + sizeof(.rela)
+
+0: cmp x9, x10
+ b.hs 2f
+ ldp x11, x12, [x9], #24
+ ldr x13, [x9, #-8]
+ cmp w12, #R_AARCH64_RELATIVE
+ b.ne 1f
+ add x13, x13, x23 // relocate
+ str x13, [x11, x23]
+ b 0b
+
+1: cmp w12, #R_AARCH64_ABS64
+ b.ne 0b
+ add x12, x12, x12, lsl #1 // symtab offset: 24x top word
+ add x12, x8, x12, lsr #(32 - 3) // ... shifted into bottom word
+ ldrsh w14, [x12, #6] // Elf64_Sym::st_shndx
+ ldr x15, [x12, #8] // Elf64_Sym::st_value
+ cmp w14, #-0xf // SHN_ABS (0xfff1) ?
+ add x14, x15, x23 // relocate
+ csel x15, x14, x15, ne
+ add x15, x13, x15
+ str x15, [x11, x23]
+ b 0b
+
+2:
+#endif
+ ldr x8, =__primary_switched
+ br x8
+ENDPROC(__primary_switch)
+
+__secondary_switch:
+ ldr x8, =__secondary_switched
+ br x8
+ENDPROC(__secondary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
new file mode 100644
index 0000000..46f29b6
--- /dev/null
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -0,0 +1,176 @@
+/*
+ * Hibernate low-level support
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ * Author: James Morse <james.morse@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/cputype.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/virt.h>
+
+/*
+ * To prevent the possibility of old and new partial table walks being visible
+ * in the tlb, switch the ttbr to a zero page when we invalidate the old
+ * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
+ * Even switching to our copied tables will cause a changed output address at
+ * each stage of the walk.
+ */
+.macro break_before_make_ttbr_switch zero_page, page_table
+ msr ttbr1_el1, \zero_page
+ isb
+ tlbi vmalle1is
+ dsb ish
+ msr ttbr1_el1, \page_table
+ isb
+.endm
+
+
+/*
+ * Resume from hibernate
+ *
+ * Loads temporary page tables then restores the memory image.
+ * Finally branches to cpu_resume() to restore the state saved by
+ * swsusp_arch_suspend().
+ *
+ * Because this code has to be copied to a 'safe' page, it can't call out to
+ * other functions by PC-relative address. Also remember that it may be
+ * mid-way through over-writing other functions. For this reason it contains
+ * code from flush_icache_range() and uses the copy_page() macro.
+ *
+ * This 'safe' page is mapped via ttbr0, and executed from there. This function
+ * switches to a copy of the linear map in ttbr1, performs the restore, then
+ * switches ttbr1 to the original kernel's swapper_pg_dir.
+ *
+ * All of memory gets written to, including code. We need to clean the kernel
+ * text to the Point of Coherence (PoC) before secondary cores can be booted.
+ * Because the kernel modules and executable pages mapped to user space are
+ * also written as data, we clean all pages we touch to the Point of
+ * Unification (PoU).
+ *
+ * x0: physical address of temporary page tables
+ * x1: physical address of swapper page tables
+ * x2: address of cpu_resume
+ * x3: linear map address of restore_pblist in the current kernel
+ * x4: physical address of __hyp_stub_vectors, or 0
+ * x5: physical address of a zero page that remains zero after resume
+ */
+.pushsection ".hibernate_exit.text", "ax"
+ENTRY(swsusp_arch_suspend_exit)
+ /*
+ * We execute from ttbr0, change ttbr1 to our copied linear map tables
+ * with a break-before-make via the zero page
+ */
+ break_before_make_ttbr_switch x5, x0
+
+ mov x21, x1
+ mov x30, x2
+ mov x24, x4
+ mov x25, x5
+
+ /* walk the restore_pblist and use copy_page() to over-write memory */
+ mov x19, x3
+
+1: ldr x10, [x19, #HIBERN_PBE_ORIG]
+ mov x0, x10
+ ldr x1, [x19, #HIBERN_PBE_ADDR]
+
+ copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
+
+ add x1, x10, #PAGE_SIZE
+ /* Clean the copied page to PoU - based on flush_icache_range() */
+ dcache_line_size x2, x3
+ sub x3, x2, #1
+ bic x4, x10, x3
+2: dc cvau, x4 /* clean D line / unified line */
+ add x4, x4, x2
+ cmp x4, x1
+ b.lo 2b
+
+ ldr x19, [x19, #HIBERN_PBE_NEXT]
+ cbnz x19, 1b
+ dsb ish /* wait for PoU cleaning to finish */
+
+ /* switch to the restored kernels page tables */
+ break_before_make_ttbr_switch x25, x21
+
+ ic ialluis
+ dsb ish
+ isb
+
+ cbz x24, 3f /* Do we need to re-initialise EL2? */
+ hvc #0
+3: ret
+
+ .ltorg
+ENDPROC(swsusp_arch_suspend_exit)
+
+/*
+ * Restore the hyp stub.
+ * This must be done before the hibernate page is unmapped by _cpu_resume(),
+ * but happens before any of the hyp-stub's code is cleaned to PoC.
+ *
+ * x24: The physical address of __hyp_stub_vectors
+ */
+el1_sync:
+ msr vbar_el2, x24
+ eret
+ENDPROC(el1_sync)
+
+.macro invalid_vector label
+\label:
+ b \label
+ENDPROC(\label)
+.endm
+
+ invalid_vector el2_sync_invalid
+ invalid_vector el2_irq_invalid
+ invalid_vector el2_fiq_invalid
+ invalid_vector el2_error_invalid
+ invalid_vector el1_sync_invalid
+ invalid_vector el1_irq_invalid
+ invalid_vector el1_fiq_invalid
+ invalid_vector el1_error_invalid
+
+/* el2 vectors - switch el2 here while we restore the memory image. */
+ .align 11
+ENTRY(hibernate_el2_vectors)
+ ventry el2_sync_invalid // Synchronous EL2t
+ ventry el2_irq_invalid // IRQ EL2t
+ ventry el2_fiq_invalid // FIQ EL2t
+ ventry el2_error_invalid // Error EL2t
+
+ ventry el2_sync_invalid // Synchronous EL2h
+ ventry el2_irq_invalid // IRQ EL2h
+ ventry el2_fiq_invalid // FIQ EL2h
+ ventry el2_error_invalid // Error EL2h
+
+ ventry el1_sync // Synchronous 64-bit EL1
+ ventry el1_irq_invalid // IRQ 64-bit EL1
+ ventry el1_fiq_invalid // FIQ 64-bit EL1
+ ventry el1_error_invalid // Error 64-bit EL1
+
+ ventry el1_sync_invalid // Synchronous 32-bit EL1
+ ventry el1_irq_invalid // IRQ 32-bit EL1
+ ventry el1_fiq_invalid // FIQ 32-bit EL1
+ ventry el1_error_invalid // Error 32-bit EL1
+END(hibernate_el2_vectors)
+
+.popsection
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
new file mode 100644
index 0000000..f8df75d
--- /dev/null
+++ b/arch/arm64/kernel/hibernate.c
@@ -0,0 +1,487 @@
+/*:
+ * Hibernate support specific for ARM64
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ * https://lkml.org/lkml/2010/6/18/4
+ * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ * https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#define pr_fmt(x) "hibernate: " x
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/pm.h>
+#include <linux/sched.h>
+#include <linux/suspend.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <asm/barrier.h>
+#include <asm/cacheflush.h>
+#include <asm/irqflags.h>
+#include <asm/memory.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+#include <asm/virt.h>
+
+/*
+ * Hibernate core relies on this value being 0 on resume, and marks it
+ * __nosavedata assuming it will keep the resume kernel's '0' value. This
+ * doesn't happen with either KASLR.
+ *
+ * defined as "__visible int in_suspend __nosavedata" in
+ * kernel/power/hibernate.c
+ */
+extern int in_suspend;
+
+/* Find a symbols alias in the linear map */
+#define LMADDR(x) phys_to_virt(virt_to_phys(x))
+
+/* Do we need to reset el2? */
+#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
+
+/*
+ * Start/end of the hibernate exit code, this must be copied to a 'safe'
+ * location in memory, and executed from there.
+ */
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+
+/* temporary el2 vectors in the __hibernate_exit_text section. */
+extern char hibernate_el2_vectors[];
+
+/* hyp-stub vectors, used to restore el2 during resume from hibernate. */
+extern char __hyp_stub_vectors[];
+
+/*
+ * Values that may not change over hibernate/resume. We put the build number
+ * and date in here so that we guarantee not to resume with a different
+ * kernel.
+ */
+struct arch_hibernate_hdr_invariants {
+ char uts_version[__NEW_UTS_LEN + 1];
+};
+
+/* These values need to be know across a hibernate/restore. */
+static struct arch_hibernate_hdr {
+ struct arch_hibernate_hdr_invariants invariants;
+
+ /* These are needed to find the relocated kernel if built with kaslr */
+ phys_addr_t ttbr1_el1;
+ void (*reenter_kernel)(void);
+
+ /*
+ * We need to know where the __hyp_stub_vectors are after restore to
+ * re-configure el2.
+ */
+ phys_addr_t __hyp_stub_vectors;
+} resume_hdr;
+
+static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
+{
+ memset(i, 0, sizeof(*i));
+ memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version));
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+ unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+ return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+ WARN_ON(num_online_cpus() != 1);
+}
+
+void notrace restore_processor_state(void)
+{
+}
+
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct arch_hibernate_hdr *hdr = addr;
+
+ if (max_size < sizeof(*hdr))
+ return -EOVERFLOW;
+
+ arch_hdr_invariants(&hdr->invariants);
+ hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir);
+ hdr->reenter_kernel = _cpu_resume;
+
+ /* We can't use __hyp_get_vectors() because kvm may still be loaded */
+ if (el2_reset_needed())
+ hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors);
+ else
+ hdr->__hyp_stub_vectors = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_save);
+
+int arch_hibernation_header_restore(void *addr)
+{
+ struct arch_hibernate_hdr_invariants invariants;
+ struct arch_hibernate_hdr *hdr = addr;
+
+ arch_hdr_invariants(&invariants);
+ if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) {
+ pr_crit("Hibernate image not generated by this kernel!\n");
+ return -EINVAL;
+ }
+
+ resume_hdr = *hdr;
+
+ return 0;
+}
+EXPORT_SYMBOL(arch_hibernation_header_restore);
+
+/*
+ * Copies length bytes, starting at src_start into an new page,
+ * perform cache maintentance, then maps it at the specified address low
+ * address as executable.
+ *
+ * This is used by hibernate to copy the code it needs to execute when
+ * overwriting the kernel text. This function generates a new set of page
+ * tables, which it loads into ttbr0.
+ *
+ * Length is provided as we probably only want 4K of data, even on a 64K
+ * page system.
+ */
+static int create_safe_exec_page(void *src_start, size_t length,
+ unsigned long dst_addr,
+ phys_addr_t *phys_dst_addr,
+ void *(*allocator)(gfp_t mask),
+ gfp_t mask)
+{
+ int rc = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long dst = (unsigned long)allocator(mask);
+
+ if (!dst) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy((void *)dst, src_start, length);
+ flush_icache_range(dst, dst + length);
+
+ pgd = pgd_offset_raw(allocator(mask), dst_addr);
+ if (pgd_none(*pgd)) {
+ pud = allocator(mask);
+ if (!pud) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pgd_populate(&init_mm, pgd, pud);
+ }
+
+ pud = pud_offset(pgd, dst_addr);
+ if (pud_none(*pud)) {
+ pmd = allocator(mask);
+ if (!pmd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pud_populate(&init_mm, pud, pmd);
+ }
+
+ pmd = pmd_offset(pud, dst_addr);
+ if (pmd_none(*pmd)) {
+ pte = allocator(mask);
+ if (!pte) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ pmd_populate_kernel(&init_mm, pmd, pte);
+ }
+
+ pte = pte_offset_kernel(pmd, dst_addr);
+ set_pte(pte, __pte(virt_to_phys((void *)dst) |
+ pgprot_val(PAGE_KERNEL_EXEC)));
+
+ /* Load our new page tables */
+ asm volatile("msr ttbr0_el1, %0;"
+ "isb;"
+ "tlbi vmalle1is;"
+ "dsb ish;"
+ "isb" : : "r"(virt_to_phys(pgd)));
+
+ *phys_dst_addr = virt_to_phys((void *)dst);
+
+out:
+ return rc;
+}
+
+
+int swsusp_arch_suspend(void)
+{
+ int ret = 0;
+ unsigned long flags;
+ struct sleep_stack_data state;
+
+ local_dbg_save(flags);
+
+ if (__cpu_suspend_enter(&state)) {
+ ret = swsusp_save();
+ } else {
+ /* Clean kernel to PoC for secondary core startup */
+ __flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+
+ /*
+ * Tell the hibernation core that we've just restored
+ * the memory
+ */
+ in_suspend = 0;
+
+ __cpu_suspend_exit();
+ }
+
+ local_dbg_restore(flags);
+
+ return ret;
+}
+
+static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+ unsigned long end)
+{
+ pte_t *src_pte;
+ pte_t *dst_pte;
+ unsigned long addr = start;
+
+ dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pte)
+ return -ENOMEM;
+ pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
+ dst_pte = pte_offset_kernel(dst_pmd, start);
+
+ src_pte = pte_offset_kernel(src_pmd, start);
+ do {
+ if (!pte_none(*src_pte))
+ /*
+ * Resume will overwrite areas that may be marked
+ * read only (code, rodata). Clear the RDONLY bit from
+ * the temporary mappings we use during restore.
+ */
+ set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
+ } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+
+ return 0;
+}
+
+static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+ unsigned long end)
+{
+ pmd_t *src_pmd;
+ pmd_t *dst_pmd;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pud_none(*dst_pud)) {
+ dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pmd)
+ return -ENOMEM;
+ pud_populate(&init_mm, dst_pud, dst_pmd);
+ }
+ dst_pmd = pmd_offset(dst_pud, start);
+
+ src_pmd = pmd_offset(src_pud, start);
+ do {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*src_pmd))
+ continue;
+ if (pmd_table(*src_pmd)) {
+ if (copy_pte(dst_pmd, src_pmd, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pmd(dst_pmd,
+ __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+ unsigned long end)
+{
+ pud_t *dst_pud;
+ pud_t *src_pud;
+ unsigned long next;
+ unsigned long addr = start;
+
+ if (pgd_none(*dst_pgd)) {
+ dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!dst_pud)
+ return -ENOMEM;
+ pgd_populate(&init_mm, dst_pgd, dst_pud);
+ }
+ dst_pud = pud_offset(dst_pgd, start);
+
+ src_pud = pud_offset(src_pgd, start);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*src_pud))
+ continue;
+ if (pud_table(*(src_pud))) {
+ if (copy_pmd(dst_pud, src_pud, addr, next))
+ return -ENOMEM;
+ } else {
+ set_pud(dst_pud,
+ __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+ }
+ } while (dst_pud++, src_pud++, addr = next, addr != end);
+
+ return 0;
+}
+
+static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+ unsigned long end)
+{
+ unsigned long next;
+ unsigned long addr = start;
+ pgd_t *src_pgd = pgd_offset_k(start);
+
+ dst_pgd = pgd_offset_raw(dst_pgd, start);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*src_pgd))
+ continue;
+ if (copy_pud(dst_pgd, src_pgd, addr, next))
+ return -ENOMEM;
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
+/*
+ * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
+ *
+ * Memory allocated by get_safe_page() will be dealt with by the hibernate code,
+ * we don't need to free it here.
+ */
+int swsusp_arch_resume(void)
+{
+ int rc = 0;
+ void *zero_page;
+ size_t exit_size;
+ pgd_t *tmp_pg_dir;
+ void *lm_restore_pblist;
+ phys_addr_t phys_hibernate_exit;
+ void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *,
+ void *, phys_addr_t, phys_addr_t);
+
+ /*
+ * Locate the exit code in the bottom-but-one page, so that *NULL
+ * still has disastrous affects.
+ */
+ hibernate_exit = (void *)PAGE_SIZE;
+ exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start;
+ /*
+ * Copy swsusp_arch_suspend_exit() to a safe page. This will generate
+ * a new set of ttbr0 page tables and load them.
+ */
+ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size,
+ (unsigned long)hibernate_exit,
+ &phys_hibernate_exit,
+ (void *)get_safe_page, GFP_ATOMIC);
+ if (rc) {
+ pr_err("Failed to create safe executable page for hibernate_exit code.");
+ goto out;
+ }
+
+ /*
+ * The hibernate exit text contains a set of el2 vectors, that will
+ * be executed at el2 with the mmu off in order to reload hyp-stub.
+ */
+ __flush_dcache_area(hibernate_exit, exit_size);
+
+ /*
+ * Restoring the memory image will overwrite the ttbr1 page tables.
+ * Create a second copy of just the linear map, and use this when
+ * restoring.
+ */
+ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!tmp_pg_dir) {
+ pr_err("Failed to allocate memory for temporary page tables.");
+ rc = -ENOMEM;
+ goto out;
+ }
+ rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+ if (rc)
+ goto out;
+
+ /*
+ * Since we only copied the linear map, we need to find restore_pblist's
+ * linear map address.
+ */
+ lm_restore_pblist = LMADDR(restore_pblist);
+
+ /*
+ * KASLR will cause the el2 vectors to be in a different location in
+ * the resumed kernel. Load hibernate's temporary copy into el2.
+ *
+ * We can skip this step if we booted at EL1, or are running with VHE.
+ */
+ if (el2_reset_needed()) {
+ phys_addr_t el2_vectors = phys_hibernate_exit; /* base */
+ el2_vectors += hibernate_el2_vectors -
+ __hibernate_exit_text_start; /* offset */
+
+ __hyp_set_vectors(el2_vectors);
+ }
+
+ /*
+ * We need a zero page that is zero before & after resume in order to
+ * to break before make on the ttbr1 page tables.
+ */
+ zero_page = (void *)get_safe_page(GFP_ATOMIC);
+
+ hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
+ resume_hdr.reenter_kernel, lm_restore_pblist,
+ resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
+
+out:
+ return rc;
+}
+
+static int check_boot_cpu_online_pm_callback(struct notifier_block *nb,
+ unsigned long action, void *ptr)
+{
+ if (action == PM_HIBERNATION_PREPARE &&
+ cpumask_first(cpu_online_mask) != 0) {
+ pr_warn("CPU0 is offline.\n");
+ return notifier_from_errno(-ENODEV);
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init check_boot_cpu_online_init(void)
+{
+ /*
+ * Set this pm_notifier callback with a lower priority than
+ * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be
+ * called earlier to disable cpu hotplug before the cpu online check.
+ */
+ pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX);
+
+ return 0;
+}
+core_initcall(check_boot_cpu_online_init);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b45c95d..ce21aa8 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -616,7 +616,7 @@
perf_bp_event(bp, regs);
/* Do we need to handle the stepping? */
- if (!bp->overflow_handler)
+ if (is_default_overflow_handler(bp))
step = 1;
unlock:
rcu_read_unlock();
@@ -712,7 +712,7 @@
perf_bp_event(wp, regs);
/* Do we need to handle the stepping? */
- if (!wp->overflow_handler)
+ if (is_default_overflow_handler(wp))
step = 1;
unlock:
@@ -886,9 +886,11 @@
unsigned long action,
void *hcpu)
{
- int cpu = (long)hcpu;
- if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
- smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
+ if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) {
+ local_irq_disable();
+ hw_breakpoint_reset(NULL);
+ local_irq_enable();
+ }
return NOTIFY_OK;
}
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index a272f33..8727f44 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -22,6 +22,8 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
#include <asm/ptrace.h>
#include <asm/virt.h>
@@ -53,15 +55,26 @@
.align 11
el1_sync:
- mrs x1, esr_el2
- lsr x1, x1, #26
- cmp x1, #0x16
- b.ne 2f // Not an HVC trap
- cbz x0, 1f
- msr vbar_el2, x0 // Set vbar_el2
- b 2f
-1: mrs x0, vbar_el2 // Return vbar_el2
-2: eret
+ mrs x30, esr_el2
+ lsr x30, x30, #ESR_ELx_EC_SHIFT
+
+ cmp x30, #ESR_ELx_EC_HVC64
+ b.ne 9f // Not an HVC trap
+
+ cmp x0, #HVC_GET_VECTORS
+ b.ne 1f
+ mrs x0, vbar_el2
+ b 9f
+
+1: cmp x0, #HVC_SET_VECTORS
+ b.ne 2f
+ msr vbar_el2, x1
+ b 9f
+
+ /* Someone called kvm_call_hyp() against the hyp-stub... */
+2: mov x0, #ARM_EXCEPTION_HYP_GONE
+
+9: eret
ENDPROC(el1_sync)
.macro invalid_vector label
@@ -101,10 +114,18 @@
*/
ENTRY(__hyp_get_vectors)
- mov x0, xzr
- // fall through
-ENTRY(__hyp_set_vectors)
+ str lr, [sp, #-16]!
+ mov x0, #HVC_GET_VECTORS
hvc #0
+ ldr lr, [sp], #16
ret
ENDPROC(__hyp_get_vectors)
+
+ENTRY(__hyp_set_vectors)
+ str lr, [sp, #-16]!
+ mov x1, x0
+ mov x0, #HVC_SET_VECTORS
+ hvc #0
+ ldr lr, [sp], #16
+ ret
ENDPROC(__hyp_set_vectors)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 5e360ce..c7fcb23 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -73,6 +73,8 @@
#ifdef CONFIG_EFI
+__efistub_stext_offset = stext - _text;
+
/*
* Prevent the symbol aliases below from being emitted into the kallsyms
* table, by forcing them to be absolute symbols (which are conveniently
@@ -112,6 +114,7 @@
__efistub__text = KALLSYMS_HIDE(_text);
__efistub__end = KALLSYMS_HIDE(_end);
__efistub__edata = KALLSYMS_HIDE(_edata);
+__efistub_screen_info = KALLSYMS_HIDE(screen_info);
#endif
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7371455..368c082 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -96,7 +96,7 @@
if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
page = vmalloc_to_page(addr);
else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
- page = virt_to_page(addr);
+ page = pfn_to_page(PHYS_PFN(__pa(addr)));
else
return addr;
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 5829839..b054691 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -74,7 +74,7 @@
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
-u64 __init kaslr_early_init(u64 dt_phys)
+u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset)
{
void *fdt;
u64 seed, offset, mask, module_range;
@@ -132,8 +132,8 @@
* boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
* happens, increase the KASLR offset by the size of the kernel image.
*/
- if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
- (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+ if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) !=
+ (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT))
offset = (offset + (u64)(_end - _text)) & mask;
if (IS_ENABLED(CONFIG_KASAN))
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index c72de66..3c4e308 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -74,6 +74,16 @@
return -ENXIO;
}
+#ifdef CONFIG_NUMA
+
+int pcibus_to_node(struct pci_bus *bus)
+{
+ return dev_to_node(&bus->dev);
+}
+EXPORT_SYMBOL(pcibus_to_node);
+
+#endif
+
#ifdef CONFIG_ACPI
/* Root bridge scanning */
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index ff46654..32c3c6e 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -122,7 +122,7 @@
tail = (struct frame_tail __user *)regs->regs[29];
- while (entry->nr < PERF_MAX_STACK_DEPTH &&
+ while (entry->nr < sysctl_perf_event_max_stack &&
tail && !((unsigned long)tail & 0xf))
tail = user_backtrace(tail, entry);
} else {
@@ -132,7 +132,7 @@
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
- while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ while ((entry->nr < sysctl_perf_event_max_stack) &&
tail && !((unsigned long)tail & 0x3))
tail = compat_user_backtrace(tail, entry);
#endif
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f419a7c..838ccf1 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -21,6 +21,7 @@
#include <asm/irq_regs.h>
#include <asm/perf_event.h>
+#include <asm/sysreg.h>
#include <asm/virt.h>
#include <linux/of.h>
@@ -33,43 +34,43 @@
*/
/* Required events. */
-#define ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR 0x00
-#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL 0x03
-#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS 0x04
-#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED 0x10
-#define ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES 0x11
-#define ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED 0x12
+#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
/* At least one of the following is required. */
-#define ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED 0x08
-#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x1B
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
/* Common architectural events. */
-#define ARMV8_PMUV3_PERFCTR_MEM_READ 0x06
-#define ARMV8_PMUV3_PERFCTR_MEM_WRITE 0x07
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07
#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_EXECUTED 0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE 0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE 0x0C
-#define ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH 0x0D
-#define ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN 0x0E
-#define ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS 0x0F
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE 0x1C
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C
#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21
/* Common microarchitectural events. */
-#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL 0x01
-#define ARMV8_PMUV3_PERFCTR_ITLB_REFILL 0x02
-#define ARMV8_PMUV3_PERFCTR_DTLB_REFILL 0x05
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05
#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13
-#define ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS 0x14
-#define ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB 0x15
-#define ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS 0x16
-#define ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL 0x17
-#define ARMV8_PMUV3_PERFCTR_L2_CACHE_WB 0x18
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18
#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19
-#define ARMV8_PMUV3_PERFCTR_MEM_ERROR 0x1A
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A
#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F
#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20
@@ -85,89 +86,182 @@
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B
#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C
#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D
-#define ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL 0x2E
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E
#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F
-#define ARMV8_PMUV3_PERFCTR_L21_TLB 0x30
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30
-/* ARMv8 implementation defined event types. */
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD 0x40
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST 0x41
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD 0x42
-#define ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST 0x43
-#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD 0x4C
-#define ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST 0x4D
-#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD 0x4E
-#define ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST 0x4F
+/* ARMv8 recommended implementation defined event types */
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F
+
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65
+
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8
/* ARMv8 Cortex-A53 specific event types. */
-#define ARMV8_A53_PERFCTR_PREFETCH_LINEFILL 0xC2
+#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
/* ARMv8 Cavium ThunderX specific event types. */
-#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST 0xE9
-#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS 0xEA
-#define ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS 0xEB
-#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS 0xEC
-#define ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS 0xED
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST 0xE9
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS 0xEA
+#define ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS 0xEB
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS 0xEC
+#define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS 0xED
/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
/* ARM Cortex-A53 HW events mapping. */
static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};
/* ARM Cortex-A57 and Cortex-A72 events mapping. */
static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};
static const unsigned armv8_thunder_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
- [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
};
+/* Broadcom Vulcan events mapping */
+static const unsigned armv8_vulcan_perf_map[PERF_COUNT_HW_MAX] = {
+ PERF_MAP_ALL_UNSUPPORTED,
+ [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
+ [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INST_RETIRED,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_BR_RETIRED,
+ [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
+};
+
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -175,21 +269,21 @@
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREF_LINEFILL,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -197,23 +291,23 @@
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_ST,
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
@@ -221,67 +315,108 @@
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
- [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_LD,
- [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_REFILL_LD,
- [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1_DCACHE_ACCESS_ST,
- [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_MISS_ST,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_ACCESS,
- [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_DCACHE_PREF_MISS,
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_MISS_ST,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_ACCESS,
+ [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1D_CACHE_PREF_MISS,
- [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
- [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
- [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_ACCESS,
- [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1_ICACHE_PREF_MISS,
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_ACCESS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_ACCESS,
+ [C(L1I)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS,
- [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_LD,
- [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_LD,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_DTLB_ACCESS_ST,
- [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_DTLB_REFILL_ST,
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
- [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
- [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
- [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
};
+static const unsigned armv8_vulcan_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+ [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
+ [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
+ [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR,
+
+ [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+ [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+
+ [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+ [C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1I_TLB,
+
+ [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR,
+ [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD,
+ [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR,
+
+ [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_BR_PRED,
+ [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+
+ [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
+ [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
+};
+
+static ssize_t
+armv8pmu_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
+}
+
#define ARMV8_EVENT_ATTR_RESOLVE(m) #m
#define ARMV8_EVENT_ATTR(name, config) \
- PMU_EVENT_ATTR_STRING(name, armv8_event_attr_##name, \
- "event=" ARMV8_EVENT_ATTR_RESOLVE(config))
+ PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
+ config, armv8pmu_events_sysfs_show)
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_ITLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_DTLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_MEM_READ);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_MEM_WRITE);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED);
+ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
+ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
+ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
+ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
+ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
+ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
+ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
+ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
+ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_EXECUTED);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED);
+ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
+ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
+ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
+ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
+ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
+ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
+ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
+ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
+ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2_CACHE_WB);
+ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
+ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
+ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
+ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
+ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEM_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_OP_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE);
+ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
+ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
+ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-ARMV8_EVENT_ATTR(chain, ARMV8_PMUV3_PERFCTR_CHAIN);
+/* Don't expose the chain event in /sys, since it's useless in isolation */
ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
@@ -297,9 +432,9 @@
ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l21_tlb_refill, ARMV8_PMUV3_PERFCTR_L21_TLB_REFILL);
+ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l21_tlb, ARMV8_PMUV3_PERFCTR_L21_TLB);
+ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
static struct attribute *armv8_pmuv3_event_attrs[] = {
&armv8_event_attr_sw_incr.attr.attr,
@@ -332,7 +467,6 @@
&armv8_event_attr_inst_spec.attr.attr,
&armv8_event_attr_ttbr_write_retired.attr.attr,
&armv8_event_attr_bus_cycles.attr.attr,
- &armv8_event_attr_chain.attr.attr,
&armv8_event_attr_l1d_cache_allocate.attr.attr,
&armv8_event_attr_l2d_cache_allocate.attr.attr,
&armv8_event_attr_br_retired.attr.attr,
@@ -348,15 +482,33 @@
&armv8_event_attr_l3d_cache.attr.attr,
&armv8_event_attr_l3d_cache_wb.attr.attr,
&armv8_event_attr_l2d_tlb_refill.attr.attr,
- &armv8_event_attr_l21_tlb_refill.attr.attr,
+ &armv8_event_attr_l2i_tlb_refill.attr.attr,
&armv8_event_attr_l2d_tlb.attr.attr,
- &armv8_event_attr_l21_tlb.attr.attr,
+ &armv8_event_attr_l2i_tlb.attr.attr,
NULL,
};
+static umode_t
+armv8pmu_event_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+ if (test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap))
+ return attr->mode;
+
+ return 0;
+}
+
static struct attribute_group armv8_pmuv3_events_attr_group = {
.name = "events",
.attrs = armv8_pmuv3_event_attrs,
+ .is_visible = armv8pmu_event_attr_is_visible,
};
PMU_FORMAT_ATTR(event, "config:0-9");
@@ -397,16 +549,14 @@
static inline u32 armv8pmu_pmcr_read(void)
{
- u32 val;
- asm volatile("mrs %0, pmcr_el0" : "=r" (val));
- return val;
+ return read_sysreg(pmcr_el0);
}
static inline void armv8pmu_pmcr_write(u32 val)
{
val &= ARMV8_PMU_PMCR_MASK;
isb();
- asm volatile("msr pmcr_el0, %0" :: "r" (val));
+ write_sysreg(val, pmcr_el0);
}
static inline int armv8pmu_has_overflowed(u32 pmovsr)
@@ -428,7 +578,7 @@
static inline int armv8pmu_select_counter(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmselr_el0, %0" :: "r" (counter));
+ write_sysreg(counter, pmselr_el0);
isb();
return idx;
@@ -445,9 +595,9 @@
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
- asm volatile("mrs %0, pmccntr_el0" : "=r" (value));
+ value = read_sysreg(pmccntr_el0);
else if (armv8pmu_select_counter(idx) == idx)
- asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value));
+ value = read_sysreg(pmxevcntr_el0);
return value;
}
@@ -469,47 +619,47 @@
*/
u64 value64 = 0xffffffff00000000ULL | value;
- asm volatile("msr pmccntr_el0, %0" :: "r" (value64));
+ write_sysreg(value64, pmccntr_el0);
} else if (armv8pmu_select_counter(idx) == idx)
- asm volatile("msr pmxevcntr_el0, %0" :: "r" (value));
+ write_sysreg(value, pmxevcntr_el0);
}
static inline void armv8pmu_write_evtype(int idx, u32 val)
{
if (armv8pmu_select_counter(idx) == idx) {
val &= ARMV8_PMU_EVTYPE_MASK;
- asm volatile("msr pmxevtyper_el0, %0" :: "r" (val));
+ write_sysreg(val, pmxevtyper_el0);
}
}
static inline int armv8pmu_enable_counter(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
+ write_sysreg(BIT(counter), pmcntenset_el0);
return idx;
}
static inline int armv8pmu_disable_counter(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
+ write_sysreg(BIT(counter), pmcntenclr_el0);
return idx;
}
static inline int armv8pmu_enable_intens(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
+ write_sysreg(BIT(counter), pmintenset_el1);
return idx;
}
static inline int armv8pmu_disable_intens(int idx)
{
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
- asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
+ write_sysreg(BIT(counter), pmintenclr_el1);
isb();
/* Clear the overflow flag in case an interrupt is pending. */
- asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
+ write_sysreg(BIT(counter), pmovsclr_el0);
isb();
return idx;
@@ -520,11 +670,11 @@
u32 value;
/* Read */
- asm volatile("mrs %0, pmovsclr_el0" : "=r" (value));
+ value = read_sysreg(pmovsclr_el0);
/* Write to clear flags */
value &= ARMV8_PMU_OVSR_MASK;
- asm volatile("msr pmovsclr_el0, %0" :: "r" (value));
+ write_sysreg(value, pmovsclr_el0);
return value;
}
@@ -685,7 +835,7 @@
unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
/* Always place a cycle counter into the cycle counter. */
- if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
+ if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
@@ -781,22 +931,38 @@
ARMV8_PMU_EVTYPE_EVENT);
}
-static void armv8pmu_read_num_pmnc_events(void *info)
+static int armv8_vulcan_map_event(struct perf_event *event)
{
- int *nb_cnt = info;
-
- /* Read the nb of CNTx counters supported from PMNC */
- *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
-
- /* Add the CPU cycles counter */
- *nb_cnt += 1;
+ return armpmu_map_event(event, &armv8_vulcan_perf_map,
+ &armv8_vulcan_perf_cache_map,
+ ARMV8_PMU_EVTYPE_EVENT);
}
-static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
+static void __armv8pmu_probe_pmu(void *info)
{
- return smp_call_function_any(&arm_pmu->supported_cpus,
- armv8pmu_read_num_pmnc_events,
- &arm_pmu->num_events, 1);
+ struct arm_pmu *cpu_pmu = info;
+ u32 pmceid[2];
+
+ /* Read the nb of CNTx counters supported from PMNC */
+ cpu_pmu->num_events = (armv8pmu_pmcr_read() >> ARMV8_PMU_PMCR_N_SHIFT)
+ & ARMV8_PMU_PMCR_N_MASK;
+
+ /* Add the CPU cycles counter */
+ cpu_pmu->num_events += 1;
+
+ pmceid[0] = read_sysreg(pmceid0_el0);
+ pmceid[1] = read_sysreg(pmceid1_el0);
+
+ bitmap_from_u32array(cpu_pmu->pmceid_bitmap,
+ ARMV8_PMUV3_MAX_COMMON_EVENTS, pmceid,
+ ARRAY_SIZE(pmceid));
+}
+
+static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
+{
+ return smp_call_function_any(&cpu_pmu->supported_cpus,
+ __armv8pmu_probe_pmu,
+ cpu_pmu, 1);
}
static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
@@ -819,7 +985,8 @@
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_pmuv3";
cpu_pmu->map_event = armv8_pmuv3_map_event;
- return armv8pmu_probe_num_events(cpu_pmu);
+ cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ return armv8pmu_probe_pmu(cpu_pmu);
}
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
@@ -828,7 +995,7 @@
cpu_pmu->name = "armv8_cortex_a53";
cpu_pmu->map_event = armv8_a53_map_event;
cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
- return armv8pmu_probe_num_events(cpu_pmu);
+ return armv8pmu_probe_pmu(cpu_pmu);
}
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
@@ -837,7 +1004,7 @@
cpu_pmu->name = "armv8_cortex_a57";
cpu_pmu->map_event = armv8_a57_map_event;
cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
- return armv8pmu_probe_num_events(cpu_pmu);
+ return armv8pmu_probe_pmu(cpu_pmu);
}
static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
@@ -846,7 +1013,7 @@
cpu_pmu->name = "armv8_cortex_a72";
cpu_pmu->map_event = armv8_a57_map_event;
cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
- return armv8pmu_probe_num_events(cpu_pmu);
+ return armv8pmu_probe_pmu(cpu_pmu);
}
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
@@ -855,7 +1022,16 @@
cpu_pmu->name = "armv8_cavium_thunder";
cpu_pmu->map_event = armv8_thunder_map_event;
cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
- return armv8pmu_probe_num_events(cpu_pmu);
+ return armv8pmu_probe_pmu(cpu_pmu);
+}
+
+static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
+{
+ armv8_pmu_init(cpu_pmu);
+ cpu_pmu->name = "armv8_brcm_vulcan";
+ cpu_pmu->map_event = armv8_vulcan_map_event;
+ cpu_pmu->pmu.attr_groups = armv8_pmuv3_attr_groups;
+ return armv8pmu_probe_pmu(cpu_pmu);
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {
@@ -864,6 +1040,7 @@
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
{.compatible = "arm,cortex-a72-pmu", .data = armv8_a72_pmu_init},
{.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init},
+ {.compatible = "brcm,vulcan-pmu", .data = armv8_vulcan_pmu_init},
{},
};
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 8062482..48eea68 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -265,9 +265,6 @@
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
- /* 16-byte aligned stack mandatory on AArch64 */
- else if (stack_start & 15)
- return -EINVAL;
else
childregs->sp = stack_start;
}
@@ -382,13 +379,14 @@
return sp & ~0xf;
}
-static unsigned long randomize_base(unsigned long base)
-{
- unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1;
- return randomize_range(base, range_end, 0) ? : base;
-}
-
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
- return randomize_base(mm->brk);
+ unsigned long range_end = mm->brk;
+
+ if (is_compat_task())
+ range_end += 0x02000000;
+ else
+ range_end += 0x40000000;
+
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 9dc6776..3279def 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -53,6 +53,7 @@
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
+#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -175,7 +176,6 @@
*/
if (mpidr_hash_size() > 4 * num_possible_cpus())
pr_warn("Large number of MPIDR hash buckets detected\n");
- __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
@@ -224,69 +224,6 @@
}
}
-#ifdef CONFIG_BLK_DEV_INITRD
-/*
- * Relocate initrd if it is not completely within the linear mapping.
- * This would be the case if mem= cuts out all or part of it.
- */
-static void __init relocate_initrd(void)
-{
- phys_addr_t orig_start = __virt_to_phys(initrd_start);
- phys_addr_t orig_end = __virt_to_phys(initrd_end);
- phys_addr_t ram_end = memblock_end_of_DRAM();
- phys_addr_t new_start;
- unsigned long size, to_free = 0;
- void *dest;
-
- if (orig_end <= ram_end)
- return;
-
- /*
- * Any of the original initrd which overlaps the linear map should
- * be freed after relocating.
- */
- if (orig_start < ram_end)
- to_free = ram_end - orig_start;
-
- size = orig_end - orig_start;
- if (!size)
- return;
-
- /* initrd needs to be relocated completely inside linear mapping */
- new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn),
- size, PAGE_SIZE);
- if (!new_start)
- panic("Cannot relocate initrd of size %ld\n", size);
- memblock_reserve(new_start, size);
-
- initrd_start = __phys_to_virt(new_start);
- initrd_end = initrd_start + size;
-
- pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n",
- orig_start, orig_start + size - 1,
- new_start, new_start + size - 1);
-
- dest = (void *)initrd_start;
-
- if (to_free) {
- memcpy(dest, (void *)__phys_to_virt(orig_start), to_free);
- dest += to_free;
- }
-
- copy_from_early_mem(dest, orig_start + to_free, size - to_free);
-
- if (to_free) {
- pr_info("Freeing original RAMDISK from [%llx-%llx]\n",
- orig_start, orig_start + to_free - 1);
- memblock_free(orig_start, to_free);
- }
-}
-#else
-static inline void __init relocate_initrd(void)
-{
-}
-#endif
-
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p)
@@ -327,7 +264,11 @@
acpi_boot_table_init();
paging_init();
- relocate_initrd();
+
+ if (acpi_disabled)
+ unflatten_device_tree();
+
+ bootmem_init();
kasan_init();
@@ -335,12 +276,11 @@
early_ioremap_reset();
- if (acpi_disabled) {
- unflatten_device_tree();
+ if (acpi_disabled)
psci_dt_init();
- } else {
+ else
psci_acpi_init();
- }
+
xen_early_init();
cpu_read_bootcpu_ops();
@@ -379,6 +319,9 @@
{
int i;
+ for_each_online_node(i)
+ register_one_node(i);
+
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_data.cpu, i);
cpu->hotpluggable = 1;
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index fd10eb6..9a3aec9 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -49,39 +49,32 @@
orr \dst, \dst, \mask // dst|=(aff3>>rs3)
.endm
/*
- * Save CPU state for a suspend and execute the suspend finisher.
- * On success it will return 0 through cpu_resume - ie through a CPU
- * soft/hard reboot from the reset vector.
- * On failure it returns the suspend finisher return value or force
- * -EOPNOTSUPP if the finisher erroneously returns 0 (the suspend finisher
- * is not allowed to return, if it does this must be considered failure).
- * It saves callee registers, and allocates space on the kernel stack
- * to save the CPU specific registers + some other data for resume.
+ * Save CPU state in the provided sleep_stack_data area, and publish its
+ * location for cpu_resume()'s use in sleep_save_stash.
*
- * x0 = suspend finisher argument
- * x1 = suspend finisher function pointer
+ * cpu_resume() will restore this saved state, and return. Because the
+ * link-register is saved and restored, it will appear to return from this
+ * function. So that the caller can tell the suspend/resume paths apart,
+ * __cpu_suspend_enter() will always return a non-zero value, whereas the
+ * path through cpu_resume() will return 0.
+ *
+ * x0 = struct sleep_stack_data area
*/
ENTRY(__cpu_suspend_enter)
- stp x29, lr, [sp, #-96]!
- stp x19, x20, [sp,#16]
- stp x21, x22, [sp,#32]
- stp x23, x24, [sp,#48]
- stp x25, x26, [sp,#64]
- stp x27, x28, [sp,#80]
- /*
- * Stash suspend finisher and its argument in x20 and x19
- */
- mov x19, x0
- mov x20, x1
+ stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS]
+ stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16]
+ stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32]
+ stp x23, x24, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+48]
+ stp x25, x26, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+64]
+ stp x27, x28, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+80]
+
+ /* save the sp in cpu_suspend_ctx */
mov x2, sp
- sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx
- mov x0, sp
- /*
- * x0 now points to struct cpu_suspend_ctx allocated on the stack
- */
- str x2, [x0, #CPU_CTX_SP]
- ldr x1, =sleep_save_sp
- ldr x1, [x1, #SLEEP_SAVE_SP_VIRT]
+ str x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
+
+ /* find the mpidr_hash */
+ ldr x1, =sleep_save_stash
+ ldr x1, [x1]
mrs x7, mpidr_el1
ldr x9, =mpidr_hash
ldr x10, [x9, #MPIDR_HASH_MASK]
@@ -93,74 +86,28 @@
ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
add x1, x1, x8, lsl #3
- bl __cpu_suspend_save
- /*
- * Grab suspend finisher in x20 and its argument in x19
- */
- mov x0, x19
- mov x1, x20
- /*
- * We are ready for power down, fire off the suspend finisher
- * in x1, with argument in x0
- */
- blr x1
- /*
- * Never gets here, unless suspend finisher fails.
- * Successful cpu_suspend should return from cpu_resume, returning
- * through this code path is considered an error
- * If the return value is set to 0 force x0 = -EOPNOTSUPP
- * to make sure a proper error condition is propagated
- */
- cmp x0, #0
- mov x3, #-EOPNOTSUPP
- csel x0, x3, x0, eq
- add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
+
+ str x0, [x1]
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
+ stp x29, lr, [sp, #-16]!
+ bl cpu_do_suspend
+ ldp x29, lr, [sp], #16
+ mov x0, #1
ret
ENDPROC(__cpu_suspend_enter)
.ltorg
-/*
- * x0 must contain the sctlr value retrieved from restored context
- */
- .pushsection ".idmap.text", "ax"
-ENTRY(cpu_resume_mmu)
- ldr x3, =cpu_resume_after_mmu
- msr sctlr_el1, x0 // restore sctlr_el1
- isb
- /*
- * Invalidate the local I-cache so that any instructions fetched
- * speculatively from the PoC are discarded, since they may have
- * been dynamically patched at the PoU.
- */
- ic iallu
- dsb nsh
- isb
- br x3 // global jump to virtual address
-ENDPROC(cpu_resume_mmu)
- .popsection
-cpu_resume_after_mmu:
-#ifdef CONFIG_KASAN
- mov x0, sp
- bl kasan_unpoison_remaining_stack
-#endif
- mov x0, #0 // return zero on success
- ldp x19, x20, [sp, #16]
- ldp x21, x22, [sp, #32]
- ldp x23, x24, [sp, #48]
- ldp x25, x26, [sp, #64]
- ldp x27, x28, [sp, #80]
- ldp x29, lr, [sp], #96
- ret
-ENDPROC(cpu_resume_after_mmu)
-
ENTRY(cpu_resume)
bl el2_setup // if in EL2 drop to EL1 cleanly
+ /* enable the MMU early - so we can access sleep_save_stash by va */
+ adr_l lr, __enable_mmu /* __cpu_setup will return here */
+ ldr x27, =_cpu_resume /* __enable_mmu will branch here */
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
+ b __cpu_setup
+ENDPROC(cpu_resume)
+
+ENTRY(_cpu_resume)
mrs x1, mpidr_el1
adrp x8, mpidr_hash
add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
@@ -170,20 +117,32 @@
ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
/* x7 contains hash index, let's use it to grab context pointer */
- ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
+ ldr_l x0, sleep_save_stash
ldr x0, [x0, x7, lsl #3]
+ add x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
+ add x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
- /* load physical address of identity map page table in x1 */
- adrp x1, idmap_pg_dir
mov sp, x2
/* save thread_info */
and x2, x2, #~(THREAD_SIZE - 1)
msr sp_el0, x2
/*
- * cpu_do_resume expects x0 to contain context physical address
- * pointer and x1 to contain physical address of 1:1 page tables
+ * cpu_do_resume expects x0 to contain context address pointer
*/
- bl cpu_do_resume // PC relative jump, MMU off
- b cpu_resume_mmu // Resume MMU, never returns
-ENDPROC(cpu_resume)
+ bl cpu_do_resume
+
+#ifdef CONFIG_KASAN
+ mov x0, sp
+ bl kasan_unpoison_remaining_stack
+#endif
+
+ ldp x19, x20, [x29, #16]
+ ldp x21, x22, [x29, #32]
+ ldp x23, x24, [x29, #48]
+ ldp x25, x26, [x29, #64]
+ ldp x27, x28, [x29, #80]
+ ldp x29, lr, [x29]
+ mov x0, #0
+ ret
+ENDPROC(_cpu_resume)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b2d5f4e..678e084 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -45,6 +45,7 @@
#include <asm/cputype.h>
#include <asm/cpu_ops.h>
#include <asm/mmu_context.h>
+#include <asm/numa.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
@@ -75,6 +76,43 @@
IPI_WAKEUP
};
+#ifdef CONFIG_ARM64_VHE
+
+/* Whether the boot CPU is running in HYP mode or not*/
+static bool boot_cpu_hyp_mode;
+
+static inline void save_boot_cpu_run_el(void)
+{
+ boot_cpu_hyp_mode = is_kernel_in_hyp_mode();
+}
+
+static inline bool is_boot_cpu_in_hyp_mode(void)
+{
+ return boot_cpu_hyp_mode;
+}
+
+/*
+ * Verify that a secondary CPU is running the kernel at the same
+ * EL as that of the boot CPU.
+ */
+void verify_cpu_run_el(void)
+{
+ bool in_el2 = is_kernel_in_hyp_mode();
+ bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode();
+
+ if (in_el2 ^ boot_cpu_el2) {
+ pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n",
+ smp_processor_id(),
+ in_el2 ? 2 : 1,
+ boot_cpu_el2 ? 2 : 1);
+ cpu_panic_kernel();
+ }
+}
+
+#else
+static inline void save_boot_cpu_run_el(void) {}
+#endif
+
#ifdef CONFIG_HOTPLUG_CPU
static int op_cpu_kill(unsigned int cpu);
#else
@@ -166,6 +204,7 @@
static void smp_store_cpu_info(unsigned int cpuid)
{
store_cpu_topology(cpuid);
+ numa_store_cpu_info(cpuid);
}
/*
@@ -225,8 +264,6 @@
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
update_cpu_boot_status(CPU_BOOT_SUCCESS);
- /* Make sure the status update is visible before we complete */
- smp_wmb();
set_cpu_online(cpu, true);
complete(&cpu_running);
@@ -401,6 +438,7 @@
void __init smp_prepare_boot_cpu(void)
{
cpuinfo_store_boot_cpu();
+ save_boot_cpu_run_el();
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
@@ -595,6 +633,8 @@
pr_debug("cpu logical map 0x%llx\n", hwid);
cpu_logical_map(cpu_count) = hwid;
+
+ early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
next:
cpu_count++;
}
@@ -647,33 +687,18 @@
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int err;
- unsigned int cpu, ncores = num_possible_cpus();
+ unsigned int cpu;
init_cpu_topology();
smp_store_cpu_info(smp_processor_id());
/*
- * are we trying to boot more cores than exist?
- */
- if (max_cpus > ncores)
- max_cpus = ncores;
-
- /* Don't bother if we're effectively UP */
- if (max_cpus <= 1)
- return;
-
- /*
* Initialise the present map (which describes the set of CPUs
* actually populated at the present time) and release the
* secondaries from the bootloader.
- *
- * Make sure we online at most (max_cpus - 1) additional CPUs.
*/
- max_cpus--;
for_each_possible_cpu(cpu) {
- if (max_cpus == 0)
- break;
if (cpu == smp_processor_id())
continue;
@@ -686,7 +711,6 @@
continue;
set_cpu_present(cpu, true);
- max_cpus--;
}
}
@@ -763,21 +787,11 @@
}
#endif
-static DEFINE_RAW_SPINLOCK(stop_lock);
-
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
*/
static void ipi_cpu_stop(unsigned int cpu)
{
- if (system_state == SYSTEM_BOOTING ||
- system_state == SYSTEM_RUNNING) {
- raw_spin_lock(&stop_lock);
- pr_crit("CPU%u: stopping\n", cpu);
- dump_stack();
- raw_spin_unlock(&stop_lock);
- }
-
set_cpu_online(cpu, false);
local_irq_disable();
@@ -872,6 +886,9 @@
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
+ if (system_state == SYSTEM_BOOTING ||
+ system_state == SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
smp_cross_call(&mask, IPI_CPU_STOP);
}
@@ -881,7 +898,8 @@
udelay(1);
if (num_online_cpus() > 1)
- pr_warning("SMP: failed to stop secondary CPUs\n");
+ pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(cpu_online_mask));
}
/*
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 6605539..b616e36 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -10,30 +10,11 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
-extern int __cpu_suspend_enter(unsigned long arg, int (*fn)(unsigned long));
/*
- * This is called by __cpu_suspend_enter() to save the state, and do whatever
- * flushing is required to ensure that when the CPU goes to sleep we have
- * the necessary data available when the caches are not searched.
- *
- * ptr: CPU context virtual address
- * save_ptr: address of the location where the context physical address
- * must be saved
+ * This is allocated by cpu_suspend_init(), and used to store a pointer to
+ * the 'struct sleep_stack_data' the contains a particular CPUs state.
*/
-void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr,
- phys_addr_t *save_ptr)
-{
- *save_ptr = virt_to_phys(ptr);
-
- cpu_do_suspend(ptr);
- /*
- * Only flush the context that must be retrieved with the MMU
- * off. VA primitives ensure the flush is applied to all
- * cache levels so context is pushed to DRAM.
- */
- __flush_dcache_area(ptr, sizeof(*ptr));
- __flush_dcache_area(save_ptr, sizeof(*save_ptr));
-}
+unsigned long *sleep_save_stash;
/*
* This hook is provided so that cpu_suspend code can restore HW
@@ -51,6 +32,30 @@
hw_breakpoint_restore = hw_bp_restore;
}
+void notrace __cpu_suspend_exit(void)
+{
+ /*
+ * We are resuming from reset with the idmap active in TTBR0_EL1.
+ * We must uninstall the idmap and restore the expected MMU
+ * state before we can possibly return to userspace.
+ */
+ cpu_uninstall_idmap();
+
+ /*
+ * Restore per-cpu offset before any kernel
+ * subsystem relying on it has a chance to run.
+ */
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+
+ /*
+ * Restore HW breakpoint registers to sane values
+ * before debug exceptions are possibly reenabled
+ * through local_dbg_restore.
+ */
+ if (hw_breakpoint_restore)
+ hw_breakpoint_restore(NULL);
+}
+
/*
* cpu_suspend
*
@@ -60,8 +65,9 @@
*/
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
- int ret;
+ int ret = 0;
unsigned long flags;
+ struct sleep_stack_data state;
/*
* From this point debug exceptions are disabled to prevent
@@ -77,34 +83,21 @@
*/
pause_graph_tracing();
- /*
- * mm context saved on the stack, it will be restored when
- * the cpu comes out of reset through the identity mapped
- * page tables, so that the thread address space is properly
- * set-up on function return.
- */
- ret = __cpu_suspend_enter(arg, fn);
- if (ret == 0) {
- /*
- * We are resuming from reset with the idmap active in TTBR0_EL1.
- * We must uninstall the idmap and restore the expected MMU
- * state before we can possibly return to userspace.
- */
- cpu_uninstall_idmap();
+ if (__cpu_suspend_enter(&state)) {
+ /* Call the suspend finisher */
+ ret = fn(arg);
/*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
+ * Never gets here, unless the suspend finisher fails.
+ * Successful cpu_suspend() should return from cpu_resume(),
+ * returning through this code path is considered an error
+ * If the return value is set to 0 force ret = -EOPNOTSUPP
+ * to make sure a proper error condition is propagated
*/
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
- /*
- * Restore HW breakpoint registers to sane values
- * before debug exceptions are possibly reenabled
- * through local_dbg_restore.
- */
- if (hw_breakpoint_restore)
- hw_breakpoint_restore(NULL);
+ if (!ret)
+ ret = -EOPNOTSUPP;
+ } else {
+ __cpu_suspend_exit();
}
unpause_graph_tracing();
@@ -119,22 +112,15 @@
return ret;
}
-struct sleep_save_sp sleep_save_sp;
-
static int __init cpu_suspend_init(void)
{
- void *ctx_ptr;
-
/* ctx_ptr is an array of physical addresses */
- ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+ sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash),
+ GFP_KERNEL);
- if (WARN_ON(!ctx_ptr))
+ if (WARN_ON(!sleep_save_stash))
return -ENOMEM;
- sleep_save_sp.save_ptr_stash = ctx_ptr;
- sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
- __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
-
return 0;
}
early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index 75151aa..26fe8ea 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -25,6 +25,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
+#include <asm/cpufeature.h>
asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -36,11 +37,20 @@
return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}
+SYSCALL_DEFINE1(arm64_personality, unsigned int, personality)
+{
+ if (personality(personality) == PER_LINUX32 &&
+ !system_supports_32bit_el0())
+ return -EINVAL;
+ return sys_personality(personality);
+}
+
/*
* Wrappers to pass the pt_regs argument.
*/
asmlinkage long sys_rt_sigreturn_wrapper(void);
#define sys_rt_sigreturn sys_rt_sigreturn_wrapper
+#define sys_personality sys_arm64_personality
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 97bc68f..64fc030 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -131,11 +131,11 @@
return -ENOMEM;
/* Grab the vDSO data page. */
- vdso_pagelist[0] = virt_to_page(vdso_data);
+ vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data)));
/* Grab the vDSO code pages. */
for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE);
+ vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
/* Populate the special mapping structures */
vdso_spec[0] = (struct vm_special_mapping) {
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5a1939a..435e820 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -46,6 +46,16 @@
*(.idmap.text) \
VMLINUX_SYMBOL(__idmap_text_end) = .;
+#ifdef CONFIG_HIBERNATION
+#define HIBERNATE_TEXT \
+ . = ALIGN(SZ_4K); \
+ VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\
+ *(.hibernate_exit.text) \
+ VMLINUX_SYMBOL(__hibernate_exit_text_end) = .;
+#else
+#define HIBERNATE_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -63,14 +73,19 @@
#endif
#if defined(CONFIG_DEBUG_ALIGN_RODATA)
-#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
-#elif defined(CONFIG_DEBUG_RODATA)
-#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT);
-#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
+/*
+ * 4 KB granule: 1 level 2 entry
+ * 16 KB granule: 128 level 3 entries, with contiguous bit
+ * 64 KB granule: 32 level 3 entries, with contiguous bit
+ */
+#define SEGMENT_ALIGN SZ_2M
#else
-#define ALIGN_DEBUG_RO
-#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
+/*
+ * 4 KB granule: 16 level 3 entries, with contiguous bit
+ * 16 KB granule: 4 level 3 entries, without contiguous bit
+ * 64 KB granule: 1 level 3 entry
+ */
+#define SEGMENT_ALIGN SZ_64K
#endif
SECTIONS
@@ -96,7 +111,6 @@
_text = .;
HEAD_TEXT
}
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
__exception_text_start = .;
@@ -109,18 +123,19 @@
LOCK_TEXT
HYPERVISOR_TEXT
IDMAP_TEXT
+ HIBERNATE_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
*(.got) /* Global offset table */
}
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ . = ALIGN(SEGMENT_ALIGN);
RO_DATA(PAGE_SIZE) /* everything from this point to */
EXCEPTION_TABLE(8) /* _etext will be marked RO NX */
NOTES
- ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+ . = ALIGN(SEGMENT_ALIGN);
_etext = .; /* End of text and rodata section */
__init_begin = .;
@@ -154,12 +169,9 @@
*(.altinstr_replacement)
}
.rela : ALIGN(8) {
- __reloc_start = .;
*(.rela .rela*)
- __reloc_end = .;
}
.dynsym : ALIGN(8) {
- __dynsym_start = .;
*(.dynsym)
}
.dynstr : {
@@ -169,7 +181,11 @@
*(.hash)
}
- . = ALIGN(PAGE_SIZE);
+ __rela_offset = ADDR(.rela) - KIMAGE_VADDR;
+ __rela_size = SIZEOF(.rela);
+ __dynsym_offset = ADDR(.dynsym) - KIMAGE_VADDR;
+
+ . = ALIGN(SEGMENT_ALIGN);
__init_end = .;
_data = .;
@@ -201,6 +217,10 @@
"HYP init code too big or misaligned")
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
"ID map text too big or misaligned")
+#ifdef CONFIG_HIBERNATION
+ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
+ <= SZ_4K, "Hibernate exit text too big or misaligned")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index eba89e4..3246c4a 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -186,6 +186,13 @@
exit_handler = kvm_get_exit_handler(vcpu);
return exit_handler(vcpu, run);
+ case ARM_EXCEPTION_HYP_GONE:
+ /*
+ * EL2 has been reset to the hyp-stub. This happens when a guest
+ * is pre-empted by kvm_reboot()'s shutdown call.
+ */
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ return 0;
default:
kvm_pr_unimpl("Unsupported exception type: %d",
exception_index);
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 7d8747c..a873a6d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -21,6 +21,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
#include <asm/pgtable-hwdef.h>
+#include <asm/sysreg.h>
.text
.pushsection .hyp.idmap.text, "ax"
@@ -103,8 +104,8 @@
dsb sy
mrs x4, sctlr_el2
- and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2
- ldr x5, =SCTLR_EL2_FLAGS
+ and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2
+ ldr x5, =SCTLR_ELx_FLAGS
orr x4, x4, x5
msr sctlr_el2, x4
isb
@@ -138,6 +139,49 @@
eret
ENDPROC(__kvm_hyp_init)
+ /*
+ * Reset kvm back to the hyp stub. This is the trampoline dance in
+ * reverse. If kvm used an extended idmap, __extended_idmap_trampoline
+ * calls this code directly in the idmap. In this case switching to the
+ * boot tables is a no-op.
+ *
+ * x0: HYP boot pgd
+ * x1: HYP phys_idmap_start
+ */
+ENTRY(__kvm_hyp_reset)
+ /* We're in trampoline code in VA, switch back to boot page tables */
+ msr ttbr0_el2, x0
+ isb
+
+ /* Ensure the PA branch doesn't find a stale tlb entry or stale code. */
+ ic iallu
+ tlbi alle2
+ dsb sy
+ isb
+
+ /* Branch into PA space */
+ adr x0, 1f
+ bfi x1, x0, #0, #PAGE_SHIFT
+ br x1
+
+ /* We're now in idmap, disable MMU */
+1: mrs x0, sctlr_el2
+ ldr x1, =SCTLR_ELx_FLAGS
+ bic x0, x0, x1 // Clear SCTL_M and etc
+ msr sctlr_el2, x0
+ isb
+
+ /* Invalidate the old TLBs */
+ tlbi alle2
+ dsb sy
+
+ /* Install stub vectors */
+ adr_l x0, __hyp_stub_vectors
+ msr vbar_el2, x0
+
+ eret
+ENDPROC(__kvm_hyp_reset)
+
.ltorg
.popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 48f19a3..7ce9315 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -35,16 +35,21 @@
* in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
* passed in x0.
*
- * A function pointer with a value of 0 has a special meaning, and is
- * used to implement __hyp_get_vectors in the same way as in
+ * A function pointer with a value less than 0xfff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
* arch/arm64/kernel/hyp_stub.S.
+ * HVC behaves as a 'bl' call and will clobber lr.
*/
ENTRY(__kvm_call_hyp)
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+ str lr, [sp, #-16]!
hvc #0
+ ldr lr, [sp], #16
ret
alternative_else
b __vhe_hyp_call
nop
+ nop
+ nop
alternative_endif
ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index ce9e5e5..70254a6 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -164,3 +164,22 @@
eret
ENDPROC(__fpsimd_guest_restore)
+
+/*
+ * When using the extended idmap, we don't have a trampoline page we can use
+ * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap
+ * directly would be ideal, but if we're using the extended idmap then the
+ * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by
+ * kvm_call_hyp using kern_hyp_va.
+ *
+ * x0: HYP boot pgd
+ * x1: HYP phys_idmap_start
+ */
+ENTRY(__extended_idmap_trampoline)
+ mov x4, x1
+ adr_l x3, __kvm_hyp_reset
+
+ /* insert __kvm_hyp_reset()s offset into phys_idmap_start */
+ bfi x4, x3, #0, #PAGE_SHIFT
+ br x4
+ENDPROC(__extended_idmap_trampoline)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 3488894..2d87f36 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -42,19 +42,17 @@
* Shuffle the parameters before calling the function
* pointed to in x0. Assumes parameters in x[1,2,3].
*/
- sub sp, sp, #16
- str lr, [sp]
mov lr, x0
mov x0, x1
mov x1, x2
mov x2, x3
blr lr
- ldr lr, [sp]
- add sp, sp, #16
.endm
ENTRY(__vhe_hyp_call)
+ str lr, [sp, #-16]!
do_el2_call
+ ldr lr, [sp], #16
/*
* We used to rely on having an exception return to get
* an implicit isb. In the E2H case, we don't have it anymore.
@@ -84,8 +82,8 @@
/* Here, we're pretty sure the host called HVC. */
restore_x0_to_x3
- /* Check for __hyp_get_vectors */
- cbnz x0, 1f
+ cmp x0, #HVC_GET_VECTORS
+ b.ne 1f
mrs x0, vbar_el2
b 2f
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 9677bf0..b1ad730 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -29,7 +29,9 @@
#include <asm/cputype.h>
#include <asm/ptrace.h>
#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
/*
* ARMv8 Reset Values
@@ -130,3 +132,31 @@
/* Reset timer */
return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
}
+
+extern char __hyp_idmap_text_start[];
+
+unsigned long kvm_hyp_reset_entry(void)
+{
+ if (!__kvm_cpu_uses_extended_idmap()) {
+ unsigned long offset;
+
+ /*
+ * Find the address of __kvm_hyp_reset() in the trampoline page.
+ * This is present in the running page tables, and the boot page
+ * tables, so we call the code here to start the trampoline
+ * dance in reverse.
+ */
+ offset = (unsigned long)__kvm_hyp_reset
+ - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK);
+
+ return TRAMPOLINE_VA + offset;
+ } else {
+ /*
+ * KVM is running with merged page tables, which don't have the
+ * trampoline page mapped. We know the idmap is still mapped,
+ * but can't be called into directly. Use
+ * __extended_idmap_trampoline to do the call.
+ */
+ return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline);
+ }
+}
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 57f57fd..54bb209 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,6 +4,7 @@
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 6df0706..50ff9ba 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -24,8 +24,6 @@
#include <asm/cpufeature.h>
#include <asm/alternative.h>
-#include "proc-macros.S"
-
/*
* flush_icache_range(start,end)
*
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index c90c3c5..b7b3978 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -75,8 +75,7 @@
*/
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
smp_processor_id(), asid, asid_bits);
- update_cpu_boot_status(CPU_PANIC_KERNEL);
- cpu_park_loop();
+ cpu_panic_kernel();
}
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a6e757c..fd8b942 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -804,57 +804,24 @@
static LIST_HEAD(iommu_dma_masters);
static DEFINE_MUTEX(iommu_dma_notifier_lock);
-/*
- * Temporarily "borrow" a domain feature flag to to tell if we had to resort
- * to creating our own domain here, in case we need to clean it up again.
- */
-#define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31)
-
static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
u64 dma_base, u64 size)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
/*
- * Best case: The device is either part of a group which was
- * already attached to a domain in a previous call, or it's
- * been put in a default DMA domain by the IOMMU core.
+ * If the IOMMU driver has the DMA domain support that we require,
+ * then the IOMMU core will have already configured a group for this
+ * device, and allocated the default domain for that group.
*/
- if (!domain) {
- /*
- * Urgh. The IOMMU core isn't going to do default domains
- * for non-PCI devices anyway, until it has some means of
- * abstracting the entirely implementation-specific
- * sideband data/SoC topology/unicorn dust that may or
- * may not differentiate upstream masters.
- * So until then, HORRIBLE HACKS!
- */
- domain = ops->domain_alloc(IOMMU_DOMAIN_DMA);
- if (!domain)
- goto out_no_domain;
-
- domain->ops = ops;
- domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT;
-
- if (iommu_attach_device(domain, dev))
- goto out_put_domain;
+ if (!domain || iommu_dma_init_domain(domain, dma_base, size)) {
+ pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+ dev_name(dev));
+ return false;
}
- if (iommu_dma_init_domain(domain, dma_base, size))
- goto out_detach;
-
dev->archdata.dma_ops = &iommu_dma_ops;
return true;
-
-out_detach:
- iommu_detach_device(domain, dev);
-out_put_domain:
- if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
- iommu_domain_free(domain);
-out_no_domain:
- pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
- dev_name(dev));
- return false;
}
static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
@@ -933,6 +900,10 @@
ret = register_iommu_dma_ops_notifier(&platform_bus_type);
if (!ret)
ret = register_iommu_dma_ops_notifier(&amba_bustype);
+#ifdef CONFIG_PCI
+ if (!ret)
+ ret = register_iommu_dma_ops_notifier(&pci_bus_type);
+#endif
/* handle devices queued before this arch_initcall */
if (!ret)
@@ -967,11 +938,8 @@
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- if (domain) {
+ if (WARN_ON(domain))
iommu_detach_device(domain, dev);
- if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
- iommu_domain_free(domain);
- }
dev->archdata.dma_ops = NULL;
}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f9271cb..8404190 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -23,6 +23,7 @@
#include <linux/seq_file.h>
#include <asm/fixmap.h>
+#include <asm/kasan.h>
#include <asm/memory.h>
#include <asm/pgtable.h>
#include <asm/pgtable-hwdef.h>
@@ -32,37 +33,25 @@
const char *name;
};
-enum address_markers_idx {
- MODULES_START_NR = 0,
- MODULES_END_NR,
- VMALLOC_START_NR,
- VMALLOC_END_NR,
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- VMEMMAP_START_NR,
- VMEMMAP_END_NR,
+static const struct addr_marker address_markers[] = {
+#ifdef CONFIG_KASAN
+ { KASAN_SHADOW_START, "Kasan shadow start" },
+ { KASAN_SHADOW_END, "Kasan shadow end" },
#endif
- FIXADDR_START_NR,
- FIXADDR_END_NR,
- PCI_START_NR,
- PCI_END_NR,
- KERNEL_SPACE_NR,
-};
-
-static struct addr_marker address_markers[] = {
- { MODULES_VADDR, "Modules start" },
- { MODULES_END, "Modules end" },
- { VMALLOC_START, "vmalloc() Area" },
- { VMALLOC_END, "vmalloc() End" },
+ { MODULES_VADDR, "Modules start" },
+ { MODULES_END, "Modules end" },
+ { VMALLOC_START, "vmalloc() Area" },
+ { VMALLOC_END, "vmalloc() End" },
+ { FIXADDR_START, "Fixmap start" },
+ { FIXADDR_TOP, "Fixmap end" },
+ { PCI_IO_START, "PCI I/O start" },
+ { PCI_IO_END, "PCI I/O end" },
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- { 0, "vmemmap start" },
- { 0, "vmemmap end" },
+ { VMEMMAP_START, "vmemmap start" },
+ { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
#endif
- { FIXADDR_START, "Fixmap start" },
- { FIXADDR_TOP, "Fixmap end" },
- { PCI_IO_START, "PCI I/O start" },
- { PCI_IO_END, "PCI I/O end" },
- { PAGE_OFFSET, "Linear Mapping" },
- { -1, NULL },
+ { PAGE_OFFSET, "Linear Mapping" },
+ { -1, NULL },
};
/*
@@ -347,13 +336,6 @@
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- address_markers[VMEMMAP_START_NR].start_address =
- (unsigned long)virt_to_page(PAGE_OFFSET);
- address_markers[VMEMMAP_END_NR].start_address =
- (unsigned long)virt_to_page(high_memory);
-#endif
-
pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
&ptdump_fops);
return pe ? 0 : -ENOMEM;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 95df28b..5954881 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -81,6 +81,56 @@
printk("\n");
}
+#ifdef CONFIG_ARM64_HW_AFDBM
+/*
+ * This function sets the access flags (dirty, accessed), as well as write
+ * permission, and only to a more permissive setting.
+ *
+ * It needs to cope with hardware update of the accessed/dirty state by other
+ * agents in the system and can safely skip the __sync_icache_dcache() call as,
+ * like set_pte_at(), the PTE is never changed from no-exec to exec here.
+ *
+ * Returns whether or not the PTE actually changed.
+ */
+int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ pteval_t old_pteval;
+ unsigned int tmp;
+
+ if (pte_same(*ptep, entry))
+ return 0;
+
+ /* only preserve the access flags and write permission */
+ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY;
+
+ /*
+ * PTE_RDONLY is cleared by default in the asm below, so set it in
+ * back if necessary (read-only or clean PTE).
+ */
+ if (!pte_write(entry) || !dirty)
+ pte_val(entry) |= PTE_RDONLY;
+
+ /*
+ * Setting the flags must be done atomically to avoid racing with the
+ * hardware update of the access/dirty state.
+ */
+ asm volatile("// ptep_set_access_flags\n"
+ " prfm pstl1strm, %2\n"
+ "1: ldxr %0, %2\n"
+ " and %0, %0, %3 // clear PTE_RDONLY\n"
+ " orr %0, %0, %4 // set flags\n"
+ " stxr %w1, %0, %2\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))
+ : "L" (~PTE_RDONLY), "r" (pte_val(entry)));
+
+ flush_tlb_fix_spurious_fault(vma, address);
+ return 1;
+}
+#endif
+
/*
* The kernel tried to access some page that wasn't present.
*/
@@ -212,10 +262,6 @@
tsk = current;
mm = tsk->mm;
- /* Enable interrupts if they were enabled in the parent context. */
- if (interrupts_enabled(regs))
- local_irq_enable();
-
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
@@ -555,20 +601,33 @@
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
struct siginfo info;
+ int rv;
- if (!inf->fn(addr, esr, regs))
- return 1;
+ /*
+ * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
+ * already disabled to preserve the last enabled/disabled addresses.
+ */
+ if (interrupts_enabled(regs))
+ trace_hardirqs_off();
- pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
- inf->name, esr, addr);
+ if (!inf->fn(addr, esr, regs)) {
+ rv = 1;
+ } else {
+ pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
+ inf->name, esr, addr);
- info.si_signo = inf->sig;
- info.si_errno = 0;
- info.si_code = inf->code;
- info.si_addr = (void __user *)addr;
- arm64_notify_die("", regs, &info, 0);
+ info.si_signo = inf->sig;
+ info.si_errno = 0;
+ info.si_code = inf->code;
+ info.si_addr = (void __user *)addr;
+ arm64_notify_die("", regs, &info, 0);
+ rv = 0;
+ }
- return 0;
+ if (interrupts_enabled(regs))
+ trace_hardirqs_on();
+
+ return rv;
}
#ifdef CONFIG_ARM64_PAN
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ea989d8..d45f862 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -40,6 +40,7 @@
#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
+#include <asm/numa.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
@@ -86,6 +87,21 @@
return min(offset + (1ULL << 32), memblock_end_of_DRAM());
}
+#ifdef CONFIG_NUMA
+
+static void __init zone_sizes_init(unsigned long min, unsigned long max)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
+
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(max_zone_dma_phys());
+ max_zone_pfns[ZONE_NORMAL] = max;
+
+ free_area_init_nodes(max_zone_pfns);
+}
+
+#else
+
static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
struct memblock_region *reg;
@@ -126,6 +142,8 @@
free_area_init_node(0, zone_size, min, zhole_size);
}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
@@ -142,10 +160,15 @@
static void __init arm64_memory_present(void)
{
struct memblock_region *reg;
+ int nid = 0;
- for_each_memblock(memory, reg)
- memory_present(0, memblock_region_memory_base_pfn(reg),
- memblock_region_memory_end_pfn(reg));
+ for_each_memblock(memory, reg) {
+#ifdef CONFIG_NUMA
+ nid = reg->nid;
+#endif
+ memory_present(nid, memblock_region_memory_base_pfn(reg),
+ memblock_region_memory_end_pfn(reg));
+ }
}
#endif
@@ -190,8 +213,12 @@
*/
memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
ULLONG_MAX);
- if (memblock_end_of_DRAM() > linear_region_size)
- memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+ if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {
+ /* ensure that memstart_addr remains sufficiently aligned */
+ memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,
+ ARM64_MEMSTART_ALIGN);
+ memblock_remove(0, memstart_addr);
+ }
/*
* Apply the memory limit if it was set. Since the kernel may be loaded
@@ -203,6 +230,35 @@
memblock_add(__pa(_text), (u64)(_end - _text));
}
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
+ /*
+ * Add back the memory we just removed if it results in the
+ * initrd to become inaccessible via the linear mapping.
+ * Otherwise, this is a no-op
+ */
+ u64 base = initrd_start & PAGE_MASK;
+ u64 size = PAGE_ALIGN(initrd_end) - base;
+
+ /*
+ * We can only add back the initrd memory if we don't end up
+ * with more memory than we can address via the linear mapping.
+ * It is up to the bootloader to position the kernel and the
+ * initrd reasonably close to each other (i.e., within 32 GB of
+ * each other) so that all granule/#levels combinations can
+ * always access both.
+ */
+ if (WARN(base < memblock_start_of_DRAM() ||
+ base + size > memblock_start_of_DRAM() +
+ linear_region_size,
+ "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
+ initrd_start = 0;
+ } else {
+ memblock_remove(base, size); /* clear MEMBLOCK_ flags */
+ memblock_add(base, size);
+ memblock_reserve(base, size);
+ }
+ }
+
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
extern u16 memstart_offset_seed;
u64 range = linear_region_size -
@@ -245,7 +301,6 @@
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
- memblock_dump_all();
}
void __init bootmem_init(void)
@@ -257,6 +312,9 @@
early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
+ max_pfn = max_low_pfn = max;
+
+ arm64_numa_init();
/*
* Sparsemem tries to allocate bootmem in memory_present(), so must be
* done after the fixed reservations.
@@ -267,7 +325,7 @@
zone_sizes_init(min, max);
high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
- max_pfn = max_low_pfn = max;
+ memblock_dump_all();
}
#ifndef CONFIG_SPARSEMEM_VMEMMAP
@@ -371,26 +429,27 @@
MLM(MODULES_VADDR, MODULES_END));
pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n",
MLG(VMALLOC_START, VMALLOC_END));
- pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
- " .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
- MLK_ROUNDUP(_text, __start_rodata),
- MLK_ROUNDUP(__start_rodata, _etext),
- MLK_ROUNDUP(__init_begin, __init_end),
+ pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n",
+ MLK_ROUNDUP(_text, __start_rodata));
+ pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n",
+ MLK_ROUNDUP(__start_rodata, _etext));
+ pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n",
+ MLK_ROUNDUP(__init_begin, __init_end));
+ pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
MLK_ROUNDUP(_sdata, _edata));
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- pr_cont(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
- " 0x%16lx - 0x%16lx (%6ld MB actual)\n",
- MLG(VMEMMAP_START,
- VMEMMAP_START + VMEMMAP_SIZE),
- MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
- (unsigned long)virt_to_page(high_memory)));
-#endif
+ pr_cont(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n",
+ MLK_ROUNDUP(__bss_start, __bss_stop));
pr_cont(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n",
MLK(FIXADDR_START, FIXADDR_TOP));
pr_cont(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n",
MLM(PCI_IO_START, PCI_IO_END));
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ pr_cont(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n",
+ MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
+ pr_cont(" 0x%16lx - 0x%16lx (%6ld MB actual)\n",
+ MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
+ (unsigned long)virt_to_page(high_memory)));
+#endif
pr_cont(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
MLM(__phys_to_virt(memblock_start_of_DRAM()),
(unsigned long)high_memory));
@@ -407,6 +466,12 @@
BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64);
#endif
+ /*
+ * Make sure we chose the upper bound of sizeof(struct page)
+ * correctly.
+ */
+ BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
+
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
extern int sysctl_overcommit_memory;
/*
@@ -419,7 +484,8 @@
void free_initmem(void)
{
- free_initmem_default(0);
+ free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
+ 0, "unused kernel");
fixup_init();
}
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index ef47d99..71fe9898 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,3 +1,2 @@
-extern void __init bootmem_init(void);
void fixup_init(void);
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 232f787..01c1717 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -95,8 +95,6 @@
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
-EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
-
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f3e5c74..0f85a46 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -385,7 +385,7 @@
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
{
- unsigned long kernel_start = __pa(_stext);
+ unsigned long kernel_start = __pa(_text);
unsigned long kernel_end = __pa(_etext);
/*
@@ -417,7 +417,7 @@
early_pgtable_alloc);
/*
- * Map the linear alias of the [_stext, _etext) interval as
+ * Map the linear alias of the [_text, _etext) interval as
* read-only/non-executable. This makes the contents of the
* region accessible to subsystems such as hibernate, but
* protects it from inadvertent modification or execution.
@@ -449,8 +449,8 @@
{
unsigned long section_size;
- section_size = (unsigned long)__start_rodata - (unsigned long)_stext;
- create_mapping_late(__pa(_stext), (unsigned long)_stext,
+ section_size = (unsigned long)__start_rodata - (unsigned long)_text;
+ create_mapping_late(__pa(_text), (unsigned long)_text,
section_size, PAGE_KERNEL_ROX);
/*
* mark .rodata as read only. Use _etext rather than __end_rodata to
@@ -471,8 +471,8 @@
unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
-static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
- pgprot_t prot, struct vm_struct *vma)
+static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+ pgprot_t prot, struct vm_struct *vma)
{
phys_addr_t pa_start = __pa(va_start);
unsigned long size = va_end - va_start;
@@ -499,11 +499,11 @@
{
static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data;
- map_kernel_chunk(pgd, _stext, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
- map_kernel_chunk(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
- map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
- &vmlinux_init);
- map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
+ map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text);
+ map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata);
+ map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+ &vmlinux_init);
+ map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
/*
@@ -564,8 +564,6 @@
*/
memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
SWAPPER_DIR_SIZE - PAGE_SIZE);
-
- bootmem_init();
}
/*
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
new file mode 100644
index 0000000..98dc104
--- /dev/null
+++ b/arch/arm64/mm/numa.c
@@ -0,0 +1,396 @@
+/*
+ * NUMA support, based on the x86 implementation.
+ *
+ * Copyright (C) 2015 Cavium Inc.
+ * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/module.h>
+#include <linux/of.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL(node_data);
+nodemask_t numa_nodes_parsed __initdata;
+static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
+static int numa_off;
+
+static __init int numa_parse_early_param(char *opt)
+{
+ if (!opt)
+ return -EINVAL;
+ if (!strncmp(opt, "off", 3)) {
+ pr_info("%s\n", "NUMA turned off");
+ numa_off = 1;
+ }
+ return 0;
+}
+early_param("numa", numa_parse_early_param);
+
+cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+EXPORT_SYMBOL(node_to_cpumask_map);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+
+/*
+ * Returns a pointer to the bitmask of CPUs on Node 'node'.
+ */
+const struct cpumask *cpumask_of_node(int node)
+{
+ if (WARN_ON(node >= nr_node_ids))
+ return cpu_none_mask;
+
+ if (WARN_ON(node_to_cpumask_map[node] == NULL))
+ return cpu_online_mask;
+
+ return node_to_cpumask_map[node];
+}
+EXPORT_SYMBOL(cpumask_of_node);
+
+#endif
+
+static void map_cpu_to_node(unsigned int cpu, int nid)
+{
+ set_cpu_numa_node(cpu, nid);
+ if (nid >= 0)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void numa_clear_node(unsigned int cpu)
+{
+ int nid = cpu_to_node(cpu);
+
+ if (nid >= 0)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
+ set_cpu_numa_node(cpu, NUMA_NO_NODE);
+}
+
+/*
+ * Allocate node_to_cpumask_map based on number of available nodes
+ * Requires node_possible_map to be valid.
+ *
+ * Note: cpumask_of_node() is not valid until after this is done.
+ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
+ */
+static void __init setup_node_to_cpumask_map(void)
+{
+ unsigned int cpu;
+ int node;
+
+ /* setup nr_node_ids if not done yet */
+ if (nr_node_ids == MAX_NUMNODES)
+ setup_nr_node_ids();
+
+ /* allocate and clear the mapping */
+ for (node = 0; node < nr_node_ids; node++) {
+ alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
+ cpumask_clear(node_to_cpumask_map[node]);
+ }
+
+ for_each_possible_cpu(cpu)
+ set_cpu_numa_node(cpu, NUMA_NO_NODE);
+
+ /* cpumask_of_node() will now work */
+ pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids);
+}
+
+/*
+ * Set the cpu to node and mem mapping
+ */
+void numa_store_cpu_info(unsigned int cpu)
+{
+ map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]);
+}
+
+void __init early_map_cpu_to_node(unsigned int cpu, int nid)
+{
+ /* fallback to node 0 */
+ if (nid < 0 || nid >= MAX_NUMNODES)
+ nid = 0;
+
+ cpu_to_node_map[cpu] = nid;
+}
+
+/**
+ * numa_add_memblk - Set node id to memblk
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @size: Size of the new memblk
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 size)
+{
+ int ret;
+
+ ret = memblock_set_node(start, size, &memblock.memory, nid);
+ if (ret < 0) {
+ pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
+ start, (start + size - 1), nid);
+ return ret;
+ }
+
+ node_set(nid, numa_nodes_parsed);
+ pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
+ start, (start + size - 1), nid);
+ return ret;
+}
+
+/**
+ * Initialize NODE_DATA for a node on the local memory
+ */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
+{
+ const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+ u64 nd_pa;
+ void *nd;
+ int tnid;
+
+ pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start_pfn << PAGE_SHIFT,
+ (end_pfn << PAGE_SHIFT) - 1);
+
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd = __va(nd_pa);
+
+ /* report and initialize */
+ pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
+ tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+ if (tnid != nid)
+ pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid);
+
+ node_data[nid] = nd;
+ memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+ NODE_DATA(nid)->node_id = nid;
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+/**
+ * numa_free_distance
+ *
+ * The current table is freed.
+ */
+void __init numa_free_distance(void)
+{
+ size_t size;
+
+ if (!numa_distance)
+ return;
+
+ size = numa_distance_cnt * numa_distance_cnt *
+ sizeof(numa_distance[0]);
+
+ memblock_free(__pa(numa_distance), size);
+ numa_distance_cnt = 0;
+ numa_distance = NULL;
+}
+
+/**
+ *
+ * Create a new NUMA distance table.
+ *
+ */
+static int __init numa_alloc_distance(void)
+{
+ size_t size;
+ u64 phys;
+ int i, j;
+
+ size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
+ phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
+ size, PAGE_SIZE);
+ if (WARN_ON(!phys))
+ return -ENOMEM;
+
+ memblock_reserve(phys, size);
+
+ numa_distance = __va(phys);
+ numa_distance_cnt = nr_node_ids;
+
+ /* fill with the default distances */
+ for (i = 0; i < numa_distance_cnt; i++)
+ for (j = 0; j < numa_distance_cnt; j++)
+ numa_distance[i * numa_distance_cnt + j] = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+
+ pr_debug("NUMA: Initialized distance table, cnt=%d\n",
+ numa_distance_cnt);
+
+ return 0;
+}
+
+/**
+ * numa_set_distance - Set inter node NUMA distance from node to node.
+ * @from: the 'from' node to set distance
+ * @to: the 'to' node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance.
+ * If distance table doesn't exist, a warning is printed.
+ *
+ * If @from or @to is higher than the highest known node or lower than zero
+ * or @distance doesn't make sense, the call is ignored.
+ *
+ */
+void __init numa_set_distance(int from, int to, int distance)
+{
+ if (!numa_distance) {
+ pr_warn_once("NUMA: Warning: distance table not allocated yet\n");
+ return;
+ }
+
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
+ from < 0 || to < 0) {
+ pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
+ return;
+ }
+
+ if ((u8)distance != distance ||
+ (from == to && distance != LOCAL_DISTANCE)) {
+ pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ from, to, distance);
+ return;
+ }
+
+ numa_distance[from * numa_distance_cnt + to] = distance;
+}
+
+/**
+ * Return NUMA distance @from to @to
+ */
+int __node_distance(int from, int to)
+{
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+ return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+ return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+static int __init numa_register_nodes(void)
+{
+ int nid;
+ struct memblock_region *mblk;
+
+ /* Check that valid nid is set to memblks */
+ for_each_memblock(memory, mblk)
+ if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
+ pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+ mblk->nid, mblk->base,
+ mblk->base + mblk->size - 1);
+ return -EINVAL;
+ }
+
+ /* Finally register nodes. */
+ for_each_node_mask(nid, numa_nodes_parsed) {
+ unsigned long start_pfn, end_pfn;
+
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ setup_node_data(nid, start_pfn, end_pfn);
+ node_set_online(nid);
+ }
+
+ /* Setup online nodes to actual nodes*/
+ node_possible_map = numa_nodes_parsed;
+
+ return 0;
+}
+
+static int __init numa_init(int (*init_func)(void))
+{
+ int ret;
+
+ nodes_clear(numa_nodes_parsed);
+ nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
+ numa_free_distance();
+
+ ret = numa_alloc_distance();
+ if (ret < 0)
+ return ret;
+
+ ret = init_func();
+ if (ret < 0)
+ return ret;
+
+ if (nodes_empty(numa_nodes_parsed))
+ return -EINVAL;
+
+ ret = numa_register_nodes();
+ if (ret < 0)
+ return ret;
+
+ setup_node_to_cpumask_map();
+
+ /* init boot processor */
+ cpu_to_node_map[0] = 0;
+ map_cpu_to_node(0, 0);
+
+ return 0;
+}
+
+/**
+ * dummy_numa_init - Fallback dummy NUMA init
+ *
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
+ *
+ * Must online at least one node (node 0) and add memory blocks that cover all
+ * allowed memory. It is unlikely that this function fails.
+ */
+static int __init dummy_numa_init(void)
+{
+ int ret;
+ struct memblock_region *mblk;
+
+ pr_info("%s\n", "No NUMA configuration found");
+ pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
+ 0LLU, PFN_PHYS(max_pfn) - 1);
+
+ for_each_memblock(memory, mblk) {
+ ret = numa_add_memblk(0, mblk->base, mblk->size);
+ if (!ret)
+ continue;
+
+ pr_err("NUMA init failed\n");
+ return ret;
+ }
+
+ numa_off = 1;
+ return 0;
+}
+
+/**
+ * arm64_numa_init - Initialize NUMA
+ *
+ * Try each configured NUMA initialization method until one succeeds. The
+ * last fallback is dummy single node config encomapssing whole memory.
+ */
+void __init arm64_numa_init(void)
+{
+ if (!numa_off) {
+ if (!numa_init(of_numa_init))
+ return;
+ }
+
+ numa_init(dummy_numa_init);
+}
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
deleted file mode 100644
index e6a30e1..0000000
--- a/arch/arm64/mm/proc-macros.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Based on arch/arm/mm/proc-macros.S
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-
-/*
- * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
- */
- .macro vma_vm_mm, rd, rn
- ldr \rd, [\rn, #VMA_VM_MM]
- .endm
-
-/*
- * mmid - get context id from mm pointer (mm->context.id)
- */
- .macro mmid, rd, rn
- ldr \rd, [\rn, #MM_CONTEXT_ID]
- .endm
-
-/*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
- */
- .macro dcache_line_size, reg, tmp
- mrs \tmp, ctr_el0 // read CTR
- ubfm \tmp, \tmp, #16, #19 // cache line size encoding
- mov \reg, #4 // bytes per word
- lsl \reg, \reg, \tmp // actual cache line size
- .endm
-
-/*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
- */
- .macro icache_line_size, reg, tmp
- mrs \tmp, ctr_el0 // read CTR
- and \tmp, \tmp, #0xf // cache line size encoding
- mov \reg, #4 // bytes per word
- lsl \reg, \reg, \tmp // actual cache line size
- .endm
-
-/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
- */
- .macro tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
- ldr_l \tmpreg, idmap_t0sz
- bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
- .endm
-
-/*
- * Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
- *
- * op: operation passed to dc instruction
- * domain: domain used in dsb instruciton
- * kaddr: starting virtual address of the region
- * size: size of the region
- * Corrupts: kaddr, size, tmp1, tmp2
- */
- .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
- dcache_line_size \tmp1, \tmp2
- add \size, \kaddr, \size
- sub \tmp2, \tmp1, #1
- bic \kaddr, \kaddr, \tmp2
-9998: dc \op, \kaddr
- add \kaddr, \kaddr, \tmp1
- cmp \kaddr, \size
- b.lo 9998b
- dsb \domain
- .endm
-
-/*
- * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
- */
- .macro reset_pmuserenr_el0, tmpreg
- mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
- sbfx \tmpreg, \tmpreg, #8, #4
- cmp \tmpreg, #1 // Skip if no PMU present
- b.lt 9000f
- msr pmuserenr_el0, xzr // Disable PMU access from EL0
-9000:
- .endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 543f519..c431787 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -23,13 +23,11 @@
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hwcap.h>
-#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
-#include "proc-macros.S"
-
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
#elif defined(CONFIG_ARM64_16K_PAGES)
@@ -66,62 +64,50 @@
mrs x2, tpidr_el0
mrs x3, tpidrro_el0
mrs x4, contextidr_el1
- mrs x5, mair_el1
- mrs x6, cpacr_el1
- mrs x7, ttbr1_el1
- mrs x8, tcr_el1
- mrs x9, vbar_el1
- mrs x10, mdscr_el1
- mrs x11, oslsr_el1
- mrs x12, sctlr_el1
+ mrs x5, cpacr_el1
+ mrs x6, tcr_el1
+ mrs x7, vbar_el1
+ mrs x8, mdscr_el1
+ mrs x9, oslsr_el1
+ mrs x10, sctlr_el1
stp x2, x3, [x0]
- stp x4, x5, [x0, #16]
- stp x6, x7, [x0, #32]
- stp x8, x9, [x0, #48]
- stp x10, x11, [x0, #64]
- str x12, [x0, #80]
+ stp x4, xzr, [x0, #16]
+ stp x5, x6, [x0, #32]
+ stp x7, x8, [x0, #48]
+ stp x9, x10, [x0, #64]
ret
ENDPROC(cpu_do_suspend)
/**
* cpu_do_resume - restore CPU register context
*
- * x0: Physical address of context pointer
- * x1: ttbr0_el1 to be restored
- *
- * Returns:
- * sctlr_el1 value in x0
+ * x0: Address of context pointer
*/
ENTRY(cpu_do_resume)
- /*
- * Invalidate local tlb entries before turning on MMU
- */
- tlbi vmalle1
ldp x2, x3, [x0]
ldp x4, x5, [x0, #16]
- ldp x6, x7, [x0, #32]
- ldp x8, x9, [x0, #48]
- ldp x10, x11, [x0, #64]
- ldr x12, [x0, #80]
+ ldp x6, x8, [x0, #32]
+ ldp x9, x10, [x0, #48]
+ ldp x11, x12, [x0, #64]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
- msr mair_el1, x5
msr cpacr_el1, x6
- msr ttbr0_el1, x1
- msr ttbr1_el1, x7
- tcr_set_idmap_t0sz x8, x7
+
+ /* Don't change t0sz here, mask those bits when restoring */
+ mrs x5, tcr_el1
+ bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
msr tcr_el1, x8
msr vbar_el1, x9
msr mdscr_el1, x10
+ msr sctlr_el1, x12
/*
* Restore oslsr_el1 by writing oslar_el1
*/
ubfx x11, x11, #1, #1
msr oslar_el1, x11
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
- mov x0, x12
- dsb nsh // Make sure local tlb invalidation completed
isb
ret
ENDPROC(cpu_do_resume)
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 105c93b..1d12129 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -1,7 +1,6 @@
#ifndef _ASM_IA64_IOMMU_H
#define _ASM_IA64_IOMMU_H 1
-#define cpu_has_x2apic 0
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index ce11247..8b23e07 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -49,8 +49,8 @@
/*
* lock for writing
*/
-static inline void
-__down_write (struct rw_semaphore *sem)
+static inline long
+___down_write (struct rw_semaphore *sem)
{
long old, new;
@@ -59,10 +59,26 @@
new = old + RWSEM_ACTIVE_WRITE_BIAS;
} while (cmpxchg_acq(&sem->count, old, new) != old);
- if (old != 0)
+ return old;
+}
+
+static inline void
+__down_write (struct rw_semaphore *sem)
+{
+ if (___down_write(sem))
rwsem_down_write_failed(sem);
}
+static inline int
+__down_write_killable (struct rw_semaphore *sem)
+{
+ if (___down_write(sem))
+ if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+ return -EINTR;
+
+ return 0;
+}
+
/*
* unlock after reading
*/
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 300dac3..bf0865c 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -531,8 +531,6 @@
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
palo_phys = EFI_INVALID_TABLE_ADDR;
if (efi_config_init(arch_tables) != 0)
diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c
index cf12a17..f7984f4 100644
--- a/arch/m68k/bvme6000/rtc.c
+++ b/arch/m68k/bvme6000/rtc.c
@@ -15,7 +15,7 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/module.h>
-#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
+#include <linux/rtc.h> /* For struct rtc_time and ioctls, etc */
#include <linux/bcd.h>
#include <asm/bvme6000hw.h>
diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c
index 1755e2f..1cdc732 100644
--- a/arch/m68k/mvme16x/rtc.c
+++ b/arch/m68k/mvme16x/rtc.c
@@ -14,7 +14,7 @@
#include <linux/fcntl.h>
#include <linux/init.h>
#include <linux/poll.h>
-#include <linux/mc146818rtc.h> /* For struct rtc_time and ioctls, etc */
+#include <linux/rtc.h> /* For struct rtc_time and ioctls, etc */
#include <linux/bcd.h>
#include <asm/mvme16xhw.h>
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index a625818..88fa25f 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -61,7 +61,7 @@
" CMPT %0, #HI(0x02000000)\n" \
" BNZ 1b\n" \
: "=&d" (temp), "=&da" (result) \
- : "da" (&v->counter), "bd" (i) \
+ : "da" (&v->counter), "br" (i) \
: "cc"); \
\
smp_mb(); \
diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
index ac8c039..f7b23d3 100644
--- a/arch/metag/kernel/ftrace.c
+++ b/arch/metag/kernel/ftrace.c
@@ -115,7 +115,6 @@
return ftrace_modify_code(ip, old, new);
}
-/* run from kstop_machine */
int __init ftrace_dyn_arch_init(void)
{
return 0;
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
index 2478ec6..33a365f 100644
--- a/arch/metag/kernel/perf/perf_event.c
+++ b/arch/metag/kernel/perf/perf_event.c
@@ -618,6 +618,8 @@
/* Check for a core internal or performance channel event. */
if (tmp) {
+ /* PERF_ICORE/PERF_CHAN only exist since Meta2 */
+#ifdef METAC_2_1
void *perf_addr;
/*
@@ -640,6 +642,7 @@
if (perf_addr)
metag_out32((config & 0x0f), perf_addr);
+#endif
/*
* Now we use the high nibble as the performance event to
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
index 3156334..252abc1 100644
--- a/arch/metag/kernel/perf_callchain.c
+++ b/arch/metag/kernel/perf_callchain.c
@@ -65,7 +65,7 @@
--frame;
- while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+ while ((entry->nr < sysctl_perf_event_max_stack) && frame)
frame = user_backtrace(frame, entry);
}
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index c1cf9c6..5021c54 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -35,7 +35,7 @@
addr = *sp++;
if (__kernel_text_address(addr)) {
perf_callchain_store(entry, addr);
- if (entry->nr >= PERF_MAX_STACK_DEPTH)
+ if (entry->nr >= sysctl_perf_event_max_stack)
break;
}
}
@@ -59,7 +59,7 @@
}
do {
perf_callchain_store(entry, pc);
- if (entry->nr >= PERF_MAX_STACK_DEPTH)
+ if (entry->nr >= sysctl_perf_event_max_stack)
break;
pc = unwind_stack(current, &sp, pc, &ra);
} while (pc);
diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index c5a62da..ce072ba 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h
@@ -50,7 +50,6 @@
/* Pages to physical address... */
#define page_to_phys(page) virt_to_phys(page_to_virt(page))
-#define page_to_bus(page) page_to_virt(page)
/* Macros used for converting between virtual and physical mappings. */
#define phys_to_virt(vaddr) \
diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h
index 4b32d6f..c1683f5 100644
--- a/arch/nios2/include/asm/page.h
+++ b/arch/nios2/include/asm/page.h
@@ -84,7 +84,7 @@
((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
#define page_to_virt(page) \
- ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
+ ((void *)(((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
# define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
# define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && \
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index a213e8c..298393c 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -209,7 +209,7 @@
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
- unsigned long paddr = page_to_virt(pte_page(pteval));
+ unsigned long paddr = (unsigned long)page_to_virt(pte_page(pteval));
flush_dcache_range(paddr, paddr + PAGE_SIZE);
set_pte(ptep, pteval);
diff --git a/arch/openrisc/include/asm/page.h b/arch/openrisc/include/asm/page.h
index e613d36..35bcb7c 100644
--- a/arch/openrisc/include/asm/page.h
+++ b/arch/openrisc/include/asm/page.h
@@ -81,8 +81,6 @@
#define virt_to_page(addr) \
(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
-#define page_to_virt(page) \
- ((((page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)
#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8cac1eb..55c924b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -565,7 +565,7 @@
smp_ops->give_timebase();
/* Wait until cpu puts itself in the online & active maps */
- while (!cpu_online(cpu) || !cpu_active(cpu))
+ while (!cpu_online(cpu))
cpu_relax();
return 0;
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index e04a675..22d9015 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -247,7 +247,7 @@
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ while (entry->nr < sysctl_perf_event_max_stack) {
fp = (unsigned long __user *) sp;
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
return;
@@ -453,7 +453,7 @@
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ while (entry->nr < sysctl_perf_event_max_stack) {
fp = (unsigned int __user *) (unsigned long) sp;
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
return;
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index fead491..c75e447 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -90,7 +90,7 @@
/*
* lock for writing
*/
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline long ___down_write(struct rw_semaphore *sem)
{
signed long old, new, tmp;
@@ -104,13 +104,23 @@
: "=&d" (old), "=&d" (new), "=Q" (sem->count)
: "Q" (sem->count), "m" (tmp)
: "cc", "memory");
- if (old != 0)
- rwsem_down_write_failed(sem);
+
+ return old;
}
static inline void __down_write(struct rw_semaphore *sem)
{
- __down_write_nested(sem, 0);
+ if (___down_write(sem))
+ rwsem_down_write_failed(sem);
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+ if (___down_write(sem))
+ if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+ return -EINTR;
+
+ return 0;
}
/*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 40a6b4f..7b89a75 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -832,7 +832,7 @@
pcpu_attach_task(pcpu, tidle);
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
- while (!cpu_online(cpu) || !cpu_active(cpu))
+ while (!cpu_online(cpu))
cpu_relax();
return 0;
}
diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c
index 324599b..0104c81 100644
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c
@@ -20,7 +20,6 @@
#include <linux/mfd/tmio.h>
#include <linux/mmc/host.h>
#include <linux/mmc/sh_mmcif.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/sh_eth.h>
#include <linux/sh_intc.h>
#include <linux/usb/renesas_usbhs.h>
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 62c3b81..de8393c 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -15,7 +15,6 @@
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/mtd/physmap.h>
#include <linux/mtd/sh_flctl.h>
#include <linux/mfd/tmio.h>
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index a9c0c07..6d61279 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -13,7 +13,6 @@
#include <linux/platform_device.h>
#include <linux/mmc/host.h>
#include <linux/mmc/sh_mmcif.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/mtd/physmap.h>
#include <linux/mfd/tmio.h>
#include <linux/gpio.h>
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 6bd9230..5deb2d8 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -11,7 +11,6 @@
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/mfd/tmio.h>
#include <linux/mtd/physmap.h>
#include <linux/mtd/onenand.h>
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 7a04da3..5de60a7 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -13,7 +13,6 @@
#include <linux/input.h>
#include <linux/input/sh_keysc.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/mtd/physmap.h>
#include <linux/mfd/tmio.h>
#include <linux/mtd/nand.h>
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index e0e1df1..f1fecd3 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -15,7 +15,6 @@
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/mfd/tmio.h>
#include <linux/mtd/physmap.h>
#include <linux/delay.h>
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index a319745..751c337 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -26,6 +26,7 @@
generic-y += poll.h
generic-y += preempt.h
generic-y += resource.h
+generic-y += rwsem.h
generic-y += sembuf.h
generic-y += serial.h
generic-y += shmbuf.h
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
deleted file mode 100644
index edab572..0000000
--- a/arch/sh/include/asm/rwsem.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * include/asm-sh/rwsem.h: R/W semaphores for SH using the stuff
- * in lib/rwsem.c.
- */
-
-#ifndef _ASM_SH_RWSEM_H
-#define _ASM_SH_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000
-#define RWSEM_ACTIVE_BIAS 0x00000001
-#define RWSEM_ACTIVE_MASK 0x0000ffff
-#define RWSEM_WAITING_BIAS (-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (atomic_inc_return((atomic_t *)(&sem->count)) > 0)
- smp_wmb();
- else
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- int tmp;
-
- while ((tmp = sem->count) >= 0) {
- if (tmp == cmpxchg(&sem->count, tmp,
- tmp + RWSEM_ACTIVE_READ_BIAS)) {
- smp_wmb();
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
- int tmp;
-
- tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_t *)(&sem->count));
- if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
- smp_wmb();
- else
- rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- int tmp;
-
- tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- smp_wmb();
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- int tmp;
-
- smp_wmb();
- tmp = atomic_dec_return((atomic_t *)(&sem->count));
- if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- smp_wmb();
- if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_t *)(&sem->count)) < 0)
- rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
- atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- int tmp;
-
- smp_wmb();
- tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
- if (tmp < 0)
- rwsem_downgrade_wake(sem);
-}
-
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
- __down_write(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
- smp_mb();
- return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index e9286188..6024c26 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -16,6 +16,7 @@
generic-y += module.h
generic-y += mutex.h
generic-y += preempt.h
+generic-y += rwsem.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += types.h
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
deleted file mode 100644
index 069bf4d..0000000
--- a/arch/sparc/include/asm/rwsem.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * rwsem.h: R/W semaphores implemented using CAS
- *
- * Written by David S. Miller (davem@redhat.com), 2001.
- * Derived from asm-i386/rwsem.h
- */
-#ifndef _SPARC64_RWSEM_H
-#define _SPARC64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000L
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_ACTIVE_MASK 0xffffffffL
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (unlikely(atomic64_inc_return((atomic64_t *)(&sem->count)) <= 0L))
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- long tmp;
-
- while ((tmp = sem->count) >= 0L) {
- if (tmp == cmpxchg(&sem->count, tmp,
- tmp + RWSEM_ACTIVE_READ_BIAS)) {
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
- long tmp;
-
- tmp = atomic64_add_return(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic64_t *)(&sem->count));
- if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
- rwsem_down_write_failed(sem);
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- __down_write_nested(sem, 0);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic64_dec_return((atomic64_t *)(&sem->count));
- if (unlikely(tmp < -1L && (tmp & RWSEM_ACTIVE_MASK) == 0L))
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- if (unlikely(atomic64_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic64_t *)(&sem->count)) < 0L))
- rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- atomic64_add(delta, (atomic64_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- long tmp;
-
- tmp = atomic64_add_return(-RWSEM_WAITING_BIAS, (atomic64_t *)(&sem->count));
- if (tmp < 0L)
- rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 6596f66..a4b8b5a 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1756,7 +1756,7 @@
}
}
#endif
- } while (entry->nr < PERF_MAX_STACK_DEPTH);
+ } while (entry->nr < sysctl_perf_event_max_stack);
}
static inline int
@@ -1790,7 +1790,7 @@
pc = sf.callers_pc;
ufp = (unsigned long)sf.fp + STACK_BIAS;
perf_callchain_store(entry, pc);
- } while (entry->nr < PERF_MAX_STACK_DEPTH);
+ } while (entry->nr < sysctl_perf_event_max_stack);
}
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
@@ -1822,7 +1822,7 @@
ufp = (unsigned long)sf.fp;
}
perf_callchain_store(entry, pc);
- } while (entry->nr < PERF_MAX_STACK_DEPTH);
+ } while (entry->nr < sysctl_perf_event_max_stack);
}
void
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2dc18605..7bb1574 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -164,10 +164,6 @@
def_bool y
depends on KPROBES || PERF_EVENTS || UPROBES
-config PERF_EVENTS_INTEL_UNCORE
- def_bool y
- depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
-
config OUTPUT_FORMAT
string
default "elf32-i386" if X86_32
@@ -1046,6 +1042,8 @@
def_bool y
depends on X86_MCE_INTEL
+source "arch/x86/events/Kconfig"
+
config X86_LEGACY_VM86
bool "Legacy VM86 support"
default n
@@ -1210,15 +1208,6 @@
def_bool y
depends on MICROCODE
-config PERF_EVENTS_AMD_POWER
- depends on PERF_EVENTS && CPU_SUP_AMD
- tristate "AMD Processor Power Reporting Mechanism"
- ---help---
- Provide power reporting mechanism support for AMD processors.
- Currently, it leverages X86_FEATURE_ACC_POWER
- (CPUID Fn8000_0007_EDX[12]) interface to calculate the
- average power consumption on Family 15h processors.
-
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
---help---
@@ -1932,54 +1921,38 @@
(CONFIG_PHYSICAL_START) is used as the minimum location.
config RANDOMIZE_BASE
- bool "Randomize the address of the kernel image"
+ bool "Randomize the address of the kernel image (KASLR)"
depends on RELOCATABLE
default n
---help---
- Randomizes the physical and virtual address at which the
- kernel image is decompressed, as a security feature that
- deters exploit attempts relying on knowledge of the location
- of kernel internals.
+ In support of Kernel Address Space Layout Randomization (KASLR),
+ this randomizes the physical address at which the kernel image
+ is decompressed and the virtual address where the kernel
+ image is mapped, as a security feature that deters exploit
+ attempts relying on knowledge of the location of kernel
+ code internals.
- Entropy is generated using the RDRAND instruction if it is
- supported. If RDTSC is supported, it is used as well. If
- neither RDRAND nor RDTSC are supported, then randomness is
- read from the i8254 timer.
+ The kernel physical and virtual address can be randomized
+ from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
+ using RANDOMIZE_BASE reduces the memory space available to
+ kernel modules from 1.5GB to 1GB.)
- The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
- and aligned according to PHYSICAL_ALIGN. Since the kernel is
- built using 2GiB addressing, and PHYSICAL_ALGIN must be at a
- minimum of 2MiB, only 10 bits of entropy is theoretically
- possible. At best, due to page table layouts, 64-bit can use
- 9 bits of entropy and 32-bit uses 8 bits.
+ Entropy is generated using the RDRAND instruction if it is
+ supported. If RDTSC is supported, its value is mixed into
+ the entropy pool as well. If neither RDRAND nor RDTSC are
+ supported, then entropy is read from the i8254 timer.
- If unsure, say N.
+ Since the kernel is built using 2GB addressing, and
+ PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
+ entropy is theoretically possible. Currently, with the
+ default value for PHYSICAL_ALIGN and due to page table
+ layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
-config RANDOMIZE_BASE_MAX_OFFSET
- hex "Maximum kASLR offset allowed" if EXPERT
- depends on RANDOMIZE_BASE
- range 0x0 0x20000000 if X86_32
- default "0x20000000" if X86_32
- range 0x0 0x40000000 if X86_64
- default "0x40000000" if X86_64
- ---help---
- The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical
- memory is used to determine the maximal offset in bytes that will
- be applied to the kernel when kernel Address Space Layout
- Randomization (kASLR) is active. This must be a multiple of
- PHYSICAL_ALIGN.
+ If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
+ time. To enable it, boot with "kaslr" on the kernel command
+ line (which will also disable hibernation).
- On 32-bit this is limited to 512MiB by page table layouts. The
- default is 512MiB.
-
- On 64-bit this is limited by how the kernel fixmap page table is
- positioned, so this cannot be larger than 1GiB currently. Without
- RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel
- and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the
- modules area will shrink to compensate, up to the current maximum
- 1GiB to 1GiB split. The default is 1GiB.
-
- If unsure, leave at the default value.
+ If unsure, say N.
# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 4086abc..6fce7f0 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -208,7 +208,8 @@
head-y := arch/x86/kernel/head_$(BITS).o
head-y += arch/x86/kernel/head$(BITS).o
-head-y += arch/x86/kernel/head.o
+head-y += arch/x86/kernel/ebda.o
+head-y += arch/x86/kernel/platform-quirks.o
libs-y += arch/x86/lib/
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index b1ef9e4..700a9c6 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -86,16 +86,7 @@
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
-
-quiet_cmd_voffset = VOFFSET $@
- cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
-
-targets += voffset.h
-$(obj)/voffset.h: vmlinux FORCE
- $(call if_changed,voffset)
-
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
@@ -106,7 +97,7 @@
AFLAGS_header.o += -I$(obj)
-$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
+$(obj)/header.o: $(obj)/zoffset.h
LDFLAGS_setup.elf := -T
$(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 8774cb2..cfdd8c3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -57,12 +57,27 @@
hostprogs-y := mkpiggy
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p'
+
+quiet_cmd_voffset = VOFFSET $@
+ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
+
+targets += ../voffset.h
+
+$(obj)/../voffset.h: vmlinux FORCE
+ $(call if_changed,voffset)
+
+$(obj)/misc.o: $(obj)/../voffset.h
+
vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
- $(obj)/string.o $(obj)/cmdline.o \
+ $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
$(obj)/piggy.o $(obj)/cpuflags.o
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
-vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o
+vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
+ifdef CONFIG_X86_64
+ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
+endif
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
@@ -109,10 +124,8 @@
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_LZ4) := lz4
-RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \
- $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh)
quiet_cmd_mkpiggy = MKPIGGY $@
- cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
+ cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
targets += piggy.S
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
deleted file mode 100644
index 6a9b96b..0000000
--- a/arch/x86/boot/compressed/aslr.c
+++ /dev/null
@@ -1,339 +0,0 @@
-#include "misc.h"
-
-#include <asm/msr.h>
-#include <asm/archrandom.h>
-#include <asm/e820.h>
-
-#include <generated/compile.h>
-#include <linux/module.h>
-#include <linux/uts.h>
-#include <linux/utsname.h>
-#include <generated/utsrelease.h>
-
-/* Simplified build-specific string for starting entropy. */
-static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
- LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
-
-#define I8254_PORT_CONTROL 0x43
-#define I8254_PORT_COUNTER0 0x40
-#define I8254_CMD_READBACK 0xC0
-#define I8254_SELECT_COUNTER0 0x02
-#define I8254_STATUS_NOTREADY 0x40
-static inline u16 i8254(void)
-{
- u16 status, timer;
-
- do {
- outb(I8254_PORT_CONTROL,
- I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
- status = inb(I8254_PORT_COUNTER0);
- timer = inb(I8254_PORT_COUNTER0);
- timer |= inb(I8254_PORT_COUNTER0) << 8;
- } while (status & I8254_STATUS_NOTREADY);
-
- return timer;
-}
-
-static unsigned long rotate_xor(unsigned long hash, const void *area,
- size_t size)
-{
- size_t i;
- unsigned long *ptr = (unsigned long *)area;
-
- for (i = 0; i < size / sizeof(hash); i++) {
- /* Rotate by odd number of bits and XOR. */
- hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
- hash ^= ptr[i];
- }
-
- return hash;
-}
-
-/* Attempt to create a simple but unpredictable starting entropy. */
-static unsigned long get_random_boot(void)
-{
- unsigned long hash = 0;
-
- hash = rotate_xor(hash, build_str, sizeof(build_str));
- hash = rotate_xor(hash, real_mode, sizeof(*real_mode));
-
- return hash;
-}
-
-static unsigned long get_random_long(void)
-{
-#ifdef CONFIG_X86_64
- const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
-#else
- const unsigned long mix_const = 0x3f39e593UL;
-#endif
- unsigned long raw, random = get_random_boot();
- bool use_i8254 = true;
-
- debug_putstr("KASLR using");
-
- if (has_cpuflag(X86_FEATURE_RDRAND)) {
- debug_putstr(" RDRAND");
- if (rdrand_long(&raw)) {
- random ^= raw;
- use_i8254 = false;
- }
- }
-
- if (has_cpuflag(X86_FEATURE_TSC)) {
- debug_putstr(" RDTSC");
- raw = rdtsc();
-
- random ^= raw;
- use_i8254 = false;
- }
-
- if (use_i8254) {
- debug_putstr(" i8254");
- random ^= i8254();
- }
-
- /* Circular multiply for better bit diffusion */
- asm("mul %3"
- : "=a" (random), "=d" (raw)
- : "a" (random), "rm" (mix_const));
- random += raw;
-
- debug_putstr("...\n");
-
- return random;
-}
-
-struct mem_vector {
- unsigned long start;
- unsigned long size;
-};
-
-#define MEM_AVOID_MAX 5
-static struct mem_vector mem_avoid[MEM_AVOID_MAX];
-
-static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
-{
- /* Item at least partially before region. */
- if (item->start < region->start)
- return false;
- /* Item at least partially after region. */
- if (item->start + item->size > region->start + region->size)
- return false;
- return true;
-}
-
-static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
-{
- /* Item one is entirely before item two. */
- if (one->start + one->size <= two->start)
- return false;
- /* Item one is entirely after item two. */
- if (one->start >= two->start + two->size)
- return false;
- return true;
-}
-
-static void mem_avoid_init(unsigned long input, unsigned long input_size,
- unsigned long output, unsigned long output_size)
-{
- u64 initrd_start, initrd_size;
- u64 cmd_line, cmd_line_size;
- unsigned long unsafe, unsafe_len;
- char *ptr;
-
- /*
- * Avoid the region that is unsafe to overlap during
- * decompression (see calculations at top of misc.c).
- */
- unsafe_len = (output_size >> 12) + 32768 + 18;
- unsafe = (unsigned long)input + input_size - unsafe_len;
- mem_avoid[0].start = unsafe;
- mem_avoid[0].size = unsafe_len;
-
- /* Avoid initrd. */
- initrd_start = (u64)real_mode->ext_ramdisk_image << 32;
- initrd_start |= real_mode->hdr.ramdisk_image;
- initrd_size = (u64)real_mode->ext_ramdisk_size << 32;
- initrd_size |= real_mode->hdr.ramdisk_size;
- mem_avoid[1].start = initrd_start;
- mem_avoid[1].size = initrd_size;
-
- /* Avoid kernel command line. */
- cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32;
- cmd_line |= real_mode->hdr.cmd_line_ptr;
- /* Calculate size of cmd_line. */
- ptr = (char *)(unsigned long)cmd_line;
- for (cmd_line_size = 0; ptr[cmd_line_size++]; )
- ;
- mem_avoid[2].start = cmd_line;
- mem_avoid[2].size = cmd_line_size;
-
- /* Avoid heap memory. */
- mem_avoid[3].start = (unsigned long)free_mem_ptr;
- mem_avoid[3].size = BOOT_HEAP_SIZE;
-
- /* Avoid stack memory. */
- mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
- mem_avoid[4].size = BOOT_STACK_SIZE;
-}
-
-/* Does this memory vector overlap a known avoided area? */
-static bool mem_avoid_overlap(struct mem_vector *img)
-{
- int i;
- struct setup_data *ptr;
-
- for (i = 0; i < MEM_AVOID_MAX; i++) {
- if (mem_overlaps(img, &mem_avoid[i]))
- return true;
- }
-
- /* Avoid all entries in the setup_data linked list. */
- ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data;
- while (ptr) {
- struct mem_vector avoid;
-
- avoid.start = (unsigned long)ptr;
- avoid.size = sizeof(*ptr) + ptr->len;
-
- if (mem_overlaps(img, &avoid))
- return true;
-
- ptr = (struct setup_data *)(unsigned long)ptr->next;
- }
-
- return false;
-}
-
-static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
- CONFIG_PHYSICAL_ALIGN];
-static unsigned long slot_max;
-
-static void slots_append(unsigned long addr)
-{
- /* Overflowing the slots list should be impossible. */
- if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
- CONFIG_PHYSICAL_ALIGN)
- return;
-
- slots[slot_max++] = addr;
-}
-
-static unsigned long slots_fetch_random(void)
-{
- /* Handle case of no slots stored. */
- if (slot_max == 0)
- return 0;
-
- return slots[get_random_long() % slot_max];
-}
-
-static void process_e820_entry(struct e820entry *entry,
- unsigned long minimum,
- unsigned long image_size)
-{
- struct mem_vector region, img;
-
- /* Skip non-RAM entries. */
- if (entry->type != E820_RAM)
- return;
-
- /* Ignore entries entirely above our maximum. */
- if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
- return;
-
- /* Ignore entries entirely below our minimum. */
- if (entry->addr + entry->size < minimum)
- return;
-
- region.start = entry->addr;
- region.size = entry->size;
-
- /* Potentially raise address to minimum location. */
- if (region.start < minimum)
- region.start = minimum;
-
- /* Potentially raise address to meet alignment requirements. */
- region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
-
- /* Did we raise the address above the bounds of this e820 region? */
- if (region.start > entry->addr + entry->size)
- return;
-
- /* Reduce size by any delta from the original address. */
- region.size -= region.start - entry->addr;
-
- /* Reduce maximum size to fit end of image within maximum limit. */
- if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
- region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
-
- /* Walk each aligned slot and check for avoided areas. */
- for (img.start = region.start, img.size = image_size ;
- mem_contains(®ion, &img) ;
- img.start += CONFIG_PHYSICAL_ALIGN) {
- if (mem_avoid_overlap(&img))
- continue;
- slots_append(img.start);
- }
-}
-
-static unsigned long find_random_addr(unsigned long minimum,
- unsigned long size)
-{
- int i;
- unsigned long addr;
-
- /* Make sure minimum is aligned. */
- minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
-
- /* Verify potential e820 positions, appending to slots list. */
- for (i = 0; i < real_mode->e820_entries; i++) {
- process_e820_entry(&real_mode->e820_map[i], minimum, size);
- }
-
- return slots_fetch_random();
-}
-
-unsigned char *choose_kernel_location(struct boot_params *boot_params,
- unsigned char *input,
- unsigned long input_size,
- unsigned char *output,
- unsigned long output_size)
-{
- unsigned long choice = (unsigned long)output;
- unsigned long random;
-
-#ifdef CONFIG_HIBERNATION
- if (!cmdline_find_option_bool("kaslr")) {
- debug_putstr("KASLR disabled by default...\n");
- goto out;
- }
-#else
- if (cmdline_find_option_bool("nokaslr")) {
- debug_putstr("KASLR disabled by cmdline...\n");
- goto out;
- }
-#endif
-
- boot_params->hdr.loadflags |= KASLR_FLAG;
-
- /* Record the various known unsafe memory ranges. */
- mem_avoid_init((unsigned long)input, input_size,
- (unsigned long)output, output_size);
-
- /* Walk e820 and find a random address. */
- random = find_random_addr(choice, output_size);
- if (!random) {
- debug_putstr("KASLR could not find suitable E820 region...\n");
- goto out;
- }
-
- /* Always enforce the minimum. */
- if (random < choice)
- goto out;
-
- choice = random;
-out:
- return (unsigned char *)choice;
-}
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index b68e303..73ccf63 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -15,9 +15,9 @@
#include "../cmdline.c"
static unsigned long get_cmd_line_ptr(void)
{
- unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr;
+ unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr;
- cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32;
+ cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32;
return cmd_line_ptr;
}
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 583d539..52fef60 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -571,312 +571,6 @@
efi_call_early(free_pool, pci_handle);
}
-static void
-setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
- struct efi_pixel_bitmask pixel_info, int pixel_format)
-{
- if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
- si->lfb_depth = 32;
- si->lfb_linelength = pixels_per_scan_line * 4;
- si->red_size = 8;
- si->red_pos = 0;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 16;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
- } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
- si->lfb_depth = 32;
- si->lfb_linelength = pixels_per_scan_line * 4;
- si->red_size = 8;
- si->red_pos = 16;
- si->green_size = 8;
- si->green_pos = 8;
- si->blue_size = 8;
- si->blue_pos = 0;
- si->rsvd_size = 8;
- si->rsvd_pos = 24;
- } else if (pixel_format == PIXEL_BIT_MASK) {
- find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
- find_bits(pixel_info.green_mask, &si->green_pos,
- &si->green_size);
- find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
- find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
- &si->rsvd_size);
- si->lfb_depth = si->red_size + si->green_size +
- si->blue_size + si->rsvd_size;
- si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
- } else {
- si->lfb_depth = 4;
- si->lfb_linelength = si->lfb_width / 2;
- si->red_size = 0;
- si->red_pos = 0;
- si->green_size = 0;
- si->green_pos = 0;
- si->blue_size = 0;
- si->blue_pos = 0;
- si->rsvd_size = 0;
- si->rsvd_pos = 0;
- }
-}
-
-static efi_status_t
-__gop_query32(struct efi_graphics_output_protocol_32 *gop32,
- struct efi_graphics_output_mode_info **info,
- unsigned long *size, u64 *fb_base)
-{
- struct efi_graphics_output_protocol_mode_32 *mode;
- efi_status_t status;
- unsigned long m;
-
- m = gop32->mode;
- mode = (struct efi_graphics_output_protocol_mode_32 *)m;
-
- status = efi_early->call(gop32->query_mode, gop32,
- mode->mode, size, info);
- if (status != EFI_SUCCESS)
- return status;
-
- *fb_base = mode->frame_buffer_base;
- return status;
-}
-
-static efi_status_t
-setup_gop32(struct screen_info *si, efi_guid_t *proto,
- unsigned long size, void **gop_handle)
-{
- struct efi_graphics_output_protocol_32 *gop32, *first_gop;
- unsigned long nr_gops;
- u16 width, height;
- u32 pixels_per_scan_line;
- u32 ext_lfb_base;
- u64 fb_base;
- struct efi_pixel_bitmask pixel_info;
- int pixel_format;
- efi_status_t status;
- u32 *handles = (u32 *)(unsigned long)gop_handle;
- int i;
-
- first_gop = NULL;
- gop32 = NULL;
-
- nr_gops = size / sizeof(u32);
- for (i = 0; i < nr_gops; i++) {
- struct efi_graphics_output_mode_info *info = NULL;
- efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
- bool conout_found = false;
- void *dummy = NULL;
- u32 h = handles[i];
- u64 current_fb_base;
-
- status = efi_call_early(handle_protocol, h,
- proto, (void **)&gop32);
- if (status != EFI_SUCCESS)
- continue;
-
- status = efi_call_early(handle_protocol, h,
- &conout_proto, &dummy);
- if (status == EFI_SUCCESS)
- conout_found = true;
-
- status = __gop_query32(gop32, &info, &size, ¤t_fb_base);
- if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
- /*
- * Systems that use the UEFI Console Splitter may
- * provide multiple GOP devices, not all of which are
- * backed by real hardware. The workaround is to search
- * for a GOP implementing the ConOut protocol, and if
- * one isn't found, to just fall back to the first GOP.
- */
- width = info->horizontal_resolution;
- height = info->vertical_resolution;
- pixel_format = info->pixel_format;
- pixel_info = info->pixel_information;
- pixels_per_scan_line = info->pixels_per_scan_line;
- fb_base = current_fb_base;
-
- /*
- * Once we've found a GOP supporting ConOut,
- * don't bother looking any further.
- */
- first_gop = gop32;
- if (conout_found)
- break;
- }
- }
-
- /* Did we find any GOPs? */
- if (!first_gop)
- goto out;
-
- /* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
- si->lfb_width = width;
- si->lfb_height = height;
- si->lfb_base = fb_base;
-
- ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
- if (ext_lfb_base) {
- si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- si->ext_lfb_base = ext_lfb_base;
- }
-
- si->pages = 1;
-
- setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
- si->lfb_size = si->lfb_linelength * si->lfb_height;
-
- si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
- return status;
-}
-
-static efi_status_t
-__gop_query64(struct efi_graphics_output_protocol_64 *gop64,
- struct efi_graphics_output_mode_info **info,
- unsigned long *size, u64 *fb_base)
-{
- struct efi_graphics_output_protocol_mode_64 *mode;
- efi_status_t status;
- unsigned long m;
-
- m = gop64->mode;
- mode = (struct efi_graphics_output_protocol_mode_64 *)m;
-
- status = efi_early->call(gop64->query_mode, gop64,
- mode->mode, size, info);
- if (status != EFI_SUCCESS)
- return status;
-
- *fb_base = mode->frame_buffer_base;
- return status;
-}
-
-static efi_status_t
-setup_gop64(struct screen_info *si, efi_guid_t *proto,
- unsigned long size, void **gop_handle)
-{
- struct efi_graphics_output_protocol_64 *gop64, *first_gop;
- unsigned long nr_gops;
- u16 width, height;
- u32 pixels_per_scan_line;
- u32 ext_lfb_base;
- u64 fb_base;
- struct efi_pixel_bitmask pixel_info;
- int pixel_format;
- efi_status_t status;
- u64 *handles = (u64 *)(unsigned long)gop_handle;
- int i;
-
- first_gop = NULL;
- gop64 = NULL;
-
- nr_gops = size / sizeof(u64);
- for (i = 0; i < nr_gops; i++) {
- struct efi_graphics_output_mode_info *info = NULL;
- efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
- bool conout_found = false;
- void *dummy = NULL;
- u64 h = handles[i];
- u64 current_fb_base;
-
- status = efi_call_early(handle_protocol, h,
- proto, (void **)&gop64);
- if (status != EFI_SUCCESS)
- continue;
-
- status = efi_call_early(handle_protocol, h,
- &conout_proto, &dummy);
- if (status == EFI_SUCCESS)
- conout_found = true;
-
- status = __gop_query64(gop64, &info, &size, ¤t_fb_base);
- if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
- /*
- * Systems that use the UEFI Console Splitter may
- * provide multiple GOP devices, not all of which are
- * backed by real hardware. The workaround is to search
- * for a GOP implementing the ConOut protocol, and if
- * one isn't found, to just fall back to the first GOP.
- */
- width = info->horizontal_resolution;
- height = info->vertical_resolution;
- pixel_format = info->pixel_format;
- pixel_info = info->pixel_information;
- pixels_per_scan_line = info->pixels_per_scan_line;
- fb_base = current_fb_base;
-
- /*
- * Once we've found a GOP supporting ConOut,
- * don't bother looking any further.
- */
- first_gop = gop64;
- if (conout_found)
- break;
- }
- }
-
- /* Did we find any GOPs? */
- if (!first_gop)
- goto out;
-
- /* EFI framebuffer */
- si->orig_video_isVGA = VIDEO_TYPE_EFI;
-
- si->lfb_width = width;
- si->lfb_height = height;
- si->lfb_base = fb_base;
-
- ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
- if (ext_lfb_base) {
- si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
- si->ext_lfb_base = ext_lfb_base;
- }
-
- si->pages = 1;
-
- setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
-
- si->lfb_size = si->lfb_linelength * si->lfb_height;
-
- si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
- return status;
-}
-
-/*
- * See if we have Graphics Output Protocol
- */
-static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
- unsigned long size)
-{
- efi_status_t status;
- void **gop_handle = NULL;
-
- status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
- size, (void **)&gop_handle);
- if (status != EFI_SUCCESS)
- return status;
-
- status = efi_call_early(locate_handle,
- EFI_LOCATE_BY_PROTOCOL,
- proto, NULL, &size, gop_handle);
- if (status != EFI_SUCCESS)
- goto free_handle;
-
- if (efi_early->is64)
- status = setup_gop64(si, proto, size, gop_handle);
- else
- status = setup_gop32(si, proto, size, gop_handle);
-
-free_handle:
- efi_call_early(free_pool, gop_handle);
- return status;
-}
-
static efi_status_t
setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
{
@@ -1038,7 +732,7 @@
EFI_LOCATE_BY_PROTOCOL,
&graphics_proto, NULL, &size, gop_handle);
if (status == EFI_BUFFER_TOO_SMALL)
- status = setup_gop(si, &graphics_proto, size);
+ status = efi_setup_gop(NULL, si, &graphics_proto, size);
if (status != EFI_SUCCESS) {
size = 0;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index d487e72..c0223f1 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -11,80 +11,6 @@
#define DESC_TYPE_CODE_DATA (1 << 0)
-#define EFI_CONSOLE_OUT_DEVICE_GUID \
- EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
- 0x3f, 0xc1, 0x4d)
-
-#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
-#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
-#define PIXEL_BIT_MASK 2
-#define PIXEL_BLT_ONLY 3
-#define PIXEL_FORMAT_MAX 4
-
-struct efi_pixel_bitmask {
- u32 red_mask;
- u32 green_mask;
- u32 blue_mask;
- u32 reserved_mask;
-};
-
-struct efi_graphics_output_mode_info {
- u32 version;
- u32 horizontal_resolution;
- u32 vertical_resolution;
- int pixel_format;
- struct efi_pixel_bitmask pixel_information;
- u32 pixels_per_scan_line;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_32 {
- u32 max_mode;
- u32 mode;
- u32 info;
- u32 size_of_info;
- u64 frame_buffer_base;
- u32 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode_64 {
- u32 max_mode;
- u32 mode;
- u64 info;
- u64 size_of_info;
- u64 frame_buffer_base;
- u64 frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_mode {
- u32 max_mode;
- u32 mode;
- unsigned long info;
- unsigned long size_of_info;
- u64 frame_buffer_base;
- unsigned long frame_buffer_size;
-} __packed;
-
-struct efi_graphics_output_protocol_32 {
- u32 query_mode;
- u32 set_mode;
- u32 blt;
- u32 mode;
-};
-
-struct efi_graphics_output_protocol_64 {
- u64 query_mode;
- u64 set_mode;
- u64 blt;
- u64 mode;
-};
-
-struct efi_graphics_output_protocol {
- void *query_mode;
- unsigned long set_mode;
- unsigned long blt;
- struct efi_graphics_output_protocol_mode *mode;
-};
-
struct efi_uga_draw_protocol_32 {
u32 get_mode;
u32 set_mode;
diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c
new file mode 100644
index 0000000..6248740
--- /dev/null
+++ b/arch/x86/boot/compressed/error.c
@@ -0,0 +1,22 @@
+/*
+ * Callers outside of misc.c need access to the error reporting routines,
+ * but the *_putstr() functions need to stay in misc.c because of how
+ * memcpy() and memmove() are defined for the compressed boot environment.
+ */
+#include "misc.h"
+
+void warn(char *m)
+{
+ error_putstr("\n\n");
+ error_putstr(m);
+ error_putstr("\n\n");
+}
+
+void error(char *m)
+{
+ warn(m);
+ error_putstr(" -- System halted");
+
+ while (1)
+ asm("hlt");
+}
diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h
new file mode 100644
index 0000000..2e59dac
--- /dev/null
+++ b/arch/x86/boot/compressed/error.h
@@ -0,0 +1,7 @@
+#ifndef BOOT_COMPRESSED_ERROR_H
+#define BOOT_COMPRESSED_ERROR_H
+
+void warn(char *m);
+void error(char *m);
+
+#endif /* BOOT_COMPRESSED_ERROR_H */
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 0256064..1038524 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -176,7 +176,9 @@
1:
/* Target address to relocate to for decompression */
- addl $z_extract_offset, %ebx
+ movl BP_init_size(%esi), %eax
+ subl $_end, %eax
+ addl %eax, %ebx
/* Set up the stack */
leal boot_stack_end(%ebx), %esp
@@ -233,24 +235,28 @@
2:
/*
- * Do the decompression, and jump to the new kernel..
+ * Do the extraction, and jump to the new kernel..
*/
- /* push arguments for decompress_kernel: */
- pushl $z_run_size /* size of kernel with .bss and .brk */
+ /* push arguments for extract_kernel: */
pushl $z_output_len /* decompressed length, end of relocs */
- leal z_extract_offset_negative(%ebx), %ebp
+
+ movl BP_init_size(%esi), %eax
+ subl $_end, %eax
+ movl %ebx, %ebp
+ subl %eax, %ebp
pushl %ebp /* output address */
+
pushl $z_input_len /* input_len */
leal input_data(%ebx), %eax
pushl %eax /* input_data */
leal boot_heap(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
- call decompress_kernel /* returns kernel location in %eax */
- addl $28, %esp
+ call extract_kernel /* returns kernel location in %eax */
+ addl $24, %esp
/*
- * Jump to the decompressed kernel.
+ * Jump to the extracted kernel.
*/
xorl %ebx, %ebx
jmp *%eax
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 86558a1..0d80a7a 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -110,7 +110,9 @@
1:
/* Target address to relocate to for decompression */
- addl $z_extract_offset, %ebx
+ movl BP_init_size(%esi), %eax
+ subl $_end, %eax
+ addl %eax, %ebx
/*
* Prepare for entering 64 bit mode
@@ -132,7 +134,7 @@
/* Initialize Page tables to 0 */
leal pgtable(%ebx), %edi
xorl %eax, %eax
- movl $((4096*6)/4), %ecx
+ movl $(BOOT_INIT_PGT_SIZE/4), %ecx
rep stosl
/* Build Level 4 */
@@ -338,7 +340,9 @@
1:
/* Target address to relocate to for decompression */
- leaq z_extract_offset(%rbp), %rbx
+ movl BP_init_size(%rsi), %ebx
+ subl $_end, %ebx
+ addq %rbp, %rbx
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
@@ -408,19 +412,16 @@
2:
/*
- * Do the decompression, and jump to the new kernel..
+ * Do the extraction, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
- movq $z_run_size, %r9 /* size of kernel with .bss and .brk */
- pushq %r9
movq %rsi, %rdi /* real mode address */
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
leaq input_data(%rip), %rdx /* input_data */
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
movq $z_output_len, %r9 /* decompressed length, end of relocs */
- call decompress_kernel /* returns kernel location in %rax */
- popq %r9
+ call extract_kernel /* returns kernel location in %rax */
popq %rsi
/*
@@ -485,4 +486,4 @@
.section ".pgtable","a",@nobits
.balign 4096
pgtable:
- .fill 6*4096, 1, 0
+ .fill BOOT_PGT_SIZE, 1, 0
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
new file mode 100644
index 0000000..cfeb025
--- /dev/null
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -0,0 +1,510 @@
+/*
+ * kaslr.c
+ *
+ * This contains the routines needed to generate a reasonable level of
+ * entropy to choose a randomized kernel base address offset in support
+ * of Kernel Address Space Layout Randomization (KASLR). Additionally
+ * handles walking the physical memory maps (and tracking memory regions
+ * to avoid) in order to select a physical memory location that can
+ * contain the entire properly aligned running kernel image.
+ *
+ */
+#include "misc.h"
+#include "error.h"
+
+#include <asm/msr.h>
+#include <asm/archrandom.h>
+#include <asm/e820.h>
+
+#include <generated/compile.h>
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <generated/utsrelease.h>
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+ LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+#define I8254_PORT_CONTROL 0x43
+#define I8254_PORT_COUNTER0 0x40
+#define I8254_CMD_READBACK 0xC0
+#define I8254_SELECT_COUNTER0 0x02
+#define I8254_STATUS_NOTREADY 0x40
+static inline u16 i8254(void)
+{
+ u16 status, timer;
+
+ do {
+ outb(I8254_PORT_CONTROL,
+ I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
+ status = inb(I8254_PORT_COUNTER0);
+ timer = inb(I8254_PORT_COUNTER0);
+ timer |= inb(I8254_PORT_COUNTER0) << 8;
+ } while (status & I8254_STATUS_NOTREADY);
+
+ return timer;
+}
+
+static unsigned long rotate_xor(unsigned long hash, const void *area,
+ size_t size)
+{
+ size_t i;
+ unsigned long *ptr = (unsigned long *)area;
+
+ for (i = 0; i < size / sizeof(hash); i++) {
+ /* Rotate by odd number of bits and XOR. */
+ hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+ hash ^= ptr[i];
+ }
+
+ return hash;
+}
+
+/* Attempt to create a simple but unpredictable starting entropy. */
+static unsigned long get_random_boot(void)
+{
+ unsigned long hash = 0;
+
+ hash = rotate_xor(hash, build_str, sizeof(build_str));
+ hash = rotate_xor(hash, boot_params, sizeof(*boot_params));
+
+ return hash;
+}
+
+static unsigned long get_random_long(const char *purpose)
+{
+#ifdef CONFIG_X86_64
+ const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
+#else
+ const unsigned long mix_const = 0x3f39e593UL;
+#endif
+ unsigned long raw, random = get_random_boot();
+ bool use_i8254 = true;
+
+ debug_putstr(purpose);
+ debug_putstr(" KASLR using");
+
+ if (has_cpuflag(X86_FEATURE_RDRAND)) {
+ debug_putstr(" RDRAND");
+ if (rdrand_long(&raw)) {
+ random ^= raw;
+ use_i8254 = false;
+ }
+ }
+
+ if (has_cpuflag(X86_FEATURE_TSC)) {
+ debug_putstr(" RDTSC");
+ raw = rdtsc();
+
+ random ^= raw;
+ use_i8254 = false;
+ }
+
+ if (use_i8254) {
+ debug_putstr(" i8254");
+ random ^= i8254();
+ }
+
+ /* Circular multiply for better bit diffusion */
+ asm("mul %3"
+ : "=a" (random), "=d" (raw)
+ : "a" (random), "rm" (mix_const));
+ random += raw;
+
+ debug_putstr("...\n");
+
+ return random;
+}
+
+struct mem_vector {
+ unsigned long start;
+ unsigned long size;
+};
+
+enum mem_avoid_index {
+ MEM_AVOID_ZO_RANGE = 0,
+ MEM_AVOID_INITRD,
+ MEM_AVOID_CMDLINE,
+ MEM_AVOID_BOOTPARAMS,
+ MEM_AVOID_MAX,
+};
+
+static struct mem_vector mem_avoid[MEM_AVOID_MAX];
+
+static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
+{
+ /* Item at least partially before region. */
+ if (item->start < region->start)
+ return false;
+ /* Item at least partially after region. */
+ if (item->start + item->size > region->start + region->size)
+ return false;
+ return true;
+}
+
+static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
+{
+ /* Item one is entirely before item two. */
+ if (one->start + one->size <= two->start)
+ return false;
+ /* Item one is entirely after item two. */
+ if (one->start >= two->start + two->size)
+ return false;
+ return true;
+}
+
+/*
+ * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
+ * The mem_avoid array is used to store the ranges that need to be avoided
+ * when KASLR searches for an appropriate random address. We must avoid any
+ * regions that are unsafe to overlap with during decompression, and other
+ * things like the initrd, cmdline and boot_params. This comment seeks to
+ * explain mem_avoid as clearly as possible since incorrect mem_avoid
+ * memory ranges lead to really hard to debug boot failures.
+ *
+ * The initrd, cmdline, and boot_params are trivial to identify for
+ * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and
+ * MEM_AVOID_BOOTPARAMS respectively below.
+ *
+ * What is not obvious how to avoid is the range of memory that is used
+ * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover
+ * the compressed kernel (ZO) and its run space, which is used to extract
+ * the uncompressed kernel (VO) and relocs.
+ *
+ * ZO's full run size sits against the end of the decompression buffer, so
+ * we can calculate where text, data, bss, etc of ZO are positioned more
+ * easily.
+ *
+ * For additional background, the decompression calculations can be found
+ * in header.S, and the memory diagram is based on the one found in misc.c.
+ *
+ * The following conditions are already enforced by the image layouts and
+ * associated code:
+ * - input + input_size >= output + output_size
+ * - kernel_total_size <= init_size
+ * - kernel_total_size <= output_size (see Note below)
+ * - output + init_size >= output + output_size
+ *
+ * (Note that kernel_total_size and output_size have no fundamental
+ * relationship, but output_size is passed to choose_random_location
+ * as a maximum of the two. The diagram is showing a case where
+ * kernel_total_size is larger than output_size, but this case is
+ * handled by bumping output_size.)
+ *
+ * The above conditions can be illustrated by a diagram:
+ *
+ * 0 output input input+input_size output+init_size
+ * | | | | |
+ * | | | | |
+ * |-----|--------|--------|--------------|-----------|--|-------------|
+ * | | |
+ * | | |
+ * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size
+ *
+ * [output, output+init_size) is the entire memory range used for
+ * extracting the compressed image.
+ *
+ * [output, output+kernel_total_size) is the range needed for the
+ * uncompressed kernel (VO) and its run size (bss, brk, etc).
+ *
+ * [output, output+output_size) is VO plus relocs (i.e. the entire
+ * uncompressed payload contained by ZO). This is the area of the buffer
+ * written to during decompression.
+ *
+ * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case
+ * range of the copied ZO and decompression code. (i.e. the range
+ * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.)
+ *
+ * [input, input+input_size) is the original copied compressed image (ZO)
+ * (i.e. it does not include its run size). This range must be avoided
+ * because it contains the data used for decompression.
+ *
+ * [input+input_size, output+init_size) is [_text, _end) for ZO. This
+ * range includes ZO's heap and stack, and must be avoided since it
+ * performs the decompression.
+ *
+ * Since the above two ranges need to be avoided and they are adjacent,
+ * they can be merged, resulting in: [input, output+init_size) which
+ * becomes the MEM_AVOID_ZO_RANGE below.
+ */
+static void mem_avoid_init(unsigned long input, unsigned long input_size,
+ unsigned long output)
+{
+ unsigned long init_size = boot_params->hdr.init_size;
+ u64 initrd_start, initrd_size;
+ u64 cmd_line, cmd_line_size;
+ char *ptr;
+
+ /*
+ * Avoid the region that is unsafe to overlap during
+ * decompression.
+ */
+ mem_avoid[MEM_AVOID_ZO_RANGE].start = input;
+ mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input;
+ add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start,
+ mem_avoid[MEM_AVOID_ZO_RANGE].size);
+
+ /* Avoid initrd. */
+ initrd_start = (u64)boot_params->ext_ramdisk_image << 32;
+ initrd_start |= boot_params->hdr.ramdisk_image;
+ initrd_size = (u64)boot_params->ext_ramdisk_size << 32;
+ initrd_size |= boot_params->hdr.ramdisk_size;
+ mem_avoid[MEM_AVOID_INITRD].start = initrd_start;
+ mem_avoid[MEM_AVOID_INITRD].size = initrd_size;
+ /* No need to set mapping for initrd, it will be handled in VO. */
+
+ /* Avoid kernel command line. */
+ cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32;
+ cmd_line |= boot_params->hdr.cmd_line_ptr;
+ /* Calculate size of cmd_line. */
+ ptr = (char *)(unsigned long)cmd_line;
+ for (cmd_line_size = 0; ptr[cmd_line_size++]; )
+ ;
+ mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line;
+ mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size;
+ add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start,
+ mem_avoid[MEM_AVOID_CMDLINE].size);
+
+ /* Avoid boot parameters. */
+ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params;
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params);
+ add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start,
+ mem_avoid[MEM_AVOID_BOOTPARAMS].size);
+
+ /* We don't need to set a mapping for setup_data. */
+
+#ifdef CONFIG_X86_VERBOSE_BOOTUP
+ /* Make sure video RAM can be used. */
+ add_identity_map(0, PMD_SIZE);
+#endif
+}
+
+/*
+ * Does this memory vector overlap a known avoided area? If so, record the
+ * overlap region with the lowest address.
+ */
+static bool mem_avoid_overlap(struct mem_vector *img,
+ struct mem_vector *overlap)
+{
+ int i;
+ struct setup_data *ptr;
+ unsigned long earliest = img->start + img->size;
+ bool is_overlapping = false;
+
+ for (i = 0; i < MEM_AVOID_MAX; i++) {
+ if (mem_overlaps(img, &mem_avoid[i]) &&
+ mem_avoid[i].start < earliest) {
+ *overlap = mem_avoid[i];
+ is_overlapping = true;
+ }
+ }
+
+ /* Avoid all entries in the setup_data linked list. */
+ ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ while (ptr) {
+ struct mem_vector avoid;
+
+ avoid.start = (unsigned long)ptr;
+ avoid.size = sizeof(*ptr) + ptr->len;
+
+ if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+ *overlap = avoid;
+ is_overlapping = true;
+ }
+
+ ptr = (struct setup_data *)(unsigned long)ptr->next;
+ }
+
+ return is_overlapping;
+}
+
+static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];
+
+struct slot_area {
+ unsigned long addr;
+ int num;
+};
+
+#define MAX_SLOT_AREA 100
+
+static struct slot_area slot_areas[MAX_SLOT_AREA];
+
+static unsigned long slot_max;
+
+static unsigned long slot_area_index;
+
+static void store_slot_info(struct mem_vector *region, unsigned long image_size)
+{
+ struct slot_area slot_area;
+
+ if (slot_area_index == MAX_SLOT_AREA)
+ return;
+
+ slot_area.addr = region->start;
+ slot_area.num = (region->size - image_size) /
+ CONFIG_PHYSICAL_ALIGN + 1;
+
+ if (slot_area.num > 0) {
+ slot_areas[slot_area_index++] = slot_area;
+ slot_max += slot_area.num;
+ }
+}
+
+static void slots_append(unsigned long addr)
+{
+ /* Overflowing the slots list should be impossible. */
+ if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
+ return;
+
+ slots[slot_max++] = addr;
+}
+
+static unsigned long slots_fetch_random(void)
+{
+ /* Handle case of no slots stored. */
+ if (slot_max == 0)
+ return 0;
+
+ return slots[get_random_long("Physical") % slot_max];
+}
+
+static void process_e820_entry(struct e820entry *entry,
+ unsigned long minimum,
+ unsigned long image_size)
+{
+ struct mem_vector region, img, overlap;
+
+ /* Skip non-RAM entries. */
+ if (entry->type != E820_RAM)
+ return;
+
+ /* Ignore entries entirely above our maximum. */
+ if (entry->addr >= KERNEL_IMAGE_SIZE)
+ return;
+
+ /* Ignore entries entirely below our minimum. */
+ if (entry->addr + entry->size < minimum)
+ return;
+
+ region.start = entry->addr;
+ region.size = entry->size;
+
+ /* Potentially raise address to minimum location. */
+ if (region.start < minimum)
+ region.start = minimum;
+
+ /* Potentially raise address to meet alignment requirements. */
+ region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
+
+ /* Did we raise the address above the bounds of this e820 region? */
+ if (region.start > entry->addr + entry->size)
+ return;
+
+ /* Reduce size by any delta from the original address. */
+ region.size -= region.start - entry->addr;
+
+ /* Reduce maximum size to fit end of image within maximum limit. */
+ if (region.start + region.size > KERNEL_IMAGE_SIZE)
+ region.size = KERNEL_IMAGE_SIZE - region.start;
+
+ /* Walk each aligned slot and check for avoided areas. */
+ for (img.start = region.start, img.size = image_size ;
+ mem_contains(®ion, &img) ;
+ img.start += CONFIG_PHYSICAL_ALIGN) {
+ if (mem_avoid_overlap(&img, &overlap))
+ continue;
+ slots_append(img.start);
+ }
+}
+
+static unsigned long find_random_phys_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ int i;
+ unsigned long addr;
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+
+ /* Verify potential e820 positions, appending to slots list. */
+ for (i = 0; i < boot_params->e820_entries; i++) {
+ process_e820_entry(&boot_params->e820_map[i], minimum,
+ image_size);
+ }
+
+ return slots_fetch_random();
+}
+
+static unsigned long find_random_virt_addr(unsigned long minimum,
+ unsigned long image_size)
+{
+ unsigned long slots, random_addr;
+
+ /* Make sure minimum is aligned. */
+ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
+ /* Align image_size for easy slot calculations. */
+ image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN);
+
+ /*
+ * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
+ * that can hold image_size within the range of minimum to
+ * KERNEL_IMAGE_SIZE?
+ */
+ slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
+ CONFIG_PHYSICAL_ALIGN + 1;
+
+ random_addr = get_random_long("Virtual") % slots;
+
+ return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
+}
+
+/*
+ * Since this function examines addresses much more numerically,
+ * it takes the input and output pointers as 'unsigned long'.
+ */
+unsigned char *choose_random_location(unsigned long input,
+ unsigned long input_size,
+ unsigned long output,
+ unsigned long output_size)
+{
+ unsigned long choice = output;
+ unsigned long random_addr;
+
+#ifdef CONFIG_HIBERNATION
+ if (!cmdline_find_option_bool("kaslr")) {
+ warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
+ goto out;
+ }
+#else
+ if (cmdline_find_option_bool("nokaslr")) {
+ warn("KASLR disabled: 'nokaslr' on cmdline.");
+ goto out;
+ }
+#endif
+
+ boot_params->hdr.loadflags |= KASLR_FLAG;
+
+ /* Record the various known unsafe memory ranges. */
+ mem_avoid_init(input, input_size, output);
+
+ /* Walk e820 and find a random address. */
+ random_addr = find_random_phys_addr(output, output_size);
+ if (!random_addr) {
+ warn("KASLR disabled: could not find suitable E820 region!");
+ goto out;
+ }
+
+ /* Always enforce the minimum. */
+ if (random_addr < choice)
+ goto out;
+
+ choice = random_addr;
+
+ add_identity_map(choice, output_size);
+
+ /* This actually loads the identity pagetable on x86_64. */
+ finalize_identity_maps();
+out:
+ return (unsigned char *)choice;
+}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 79dac17..f14db4e 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -1,8 +1,10 @@
/*
* misc.c
*
- * This is a collection of several routines from gzip-1.0.3
- * adapted for Linux.
+ * This is a collection of several routines used to extract the kernel
+ * which includes KASLR relocation, decompression, ELF parsing, and
+ * relocation processing. Additionally included are the screen and serial
+ * output functions and related debugging support functions.
*
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
* puts by Nick Holloway 1993, better puts by Martin Mares 1995
@@ -10,111 +12,37 @@
*/
#include "misc.h"
+#include "error.h"
#include "../string.h"
-
-/* WARNING!!
- * This code is compiled with -fPIC and it is relocated dynamically
- * at run time, but no relocation processing is performed.
- * This means that it is not safe to place pointers in static structures.
- */
+#include "../voffset.h"
/*
- * Getting to provable safe in place decompression is hard.
- * Worst case behaviours need to be analyzed.
- * Background information:
- *
- * The file layout is:
- * magic[2]
- * method[1]
- * flags[1]
- * timestamp[4]
- * extraflags[1]
- * os[1]
- * compressed data blocks[N]
- * crc[4] orig_len[4]
- *
- * resulting in 18 bytes of non compressed data overhead.
- *
- * Files divided into blocks
- * 1 bit (last block flag)
- * 2 bits (block type)
- *
- * 1 block occurs every 32K -1 bytes or when there 50% compression
- * has been achieved. The smallest block type encoding is always used.
- *
- * stored:
- * 32 bits length in bytes.
- *
- * fixed:
- * magic fixed tree.
- * symbols.
- *
- * dynamic:
- * dynamic tree encoding.
- * symbols.
- *
- *
- * The buffer for decompression in place is the length of the
- * uncompressed data, plus a small amount extra to keep the algorithm safe.
- * The compressed data is placed at the end of the buffer. The output
- * pointer is placed at the start of the buffer and the input pointer
- * is placed where the compressed data starts. Problems will occur
- * when the output pointer overruns the input pointer.
- *
- * The output pointer can only overrun the input pointer if the input
- * pointer is moving faster than the output pointer. A condition only
- * triggered by data whose compressed form is larger than the uncompressed
- * form.
- *
- * The worst case at the block level is a growth of the compressed data
- * of 5 bytes per 32767 bytes.
- *
- * The worst case internal to a compressed block is very hard to figure.
- * The worst case can at least be boundined by having one bit that represents
- * 32764 bytes and then all of the rest of the bytes representing the very
- * very last byte.
- *
- * All of which is enough to compute an amount of extra data that is required
- * to be safe. To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient. To avoind problems internal to a
- * block adding an extra 32767 bytes (the worst case uncompressed block size)
- * is sufficient, to ensure that in the worst case the decompressed data for
- * block will stop the byte before the compressed data for a block begins.
- * To avoid problems with the compressed data's meta information an extra 18
- * bytes are needed. Leading to the formula:
- *
- * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size.
- *
- * Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
- * Adding 32768 instead of 32767 just makes for round numbers.
- * Adding the decompressor_size is necessary as it musht live after all
- * of the data as well. Last I measured the decompressor is about 14K.
- * 10K of actual data and 4K of bss.
- *
+ * WARNING!!
+ * This code is compiled with -fPIC and it is relocated dynamically at
+ * run time, but no relocation processing is performed. This means that
+ * it is not safe to place pointers in static structures.
*/
-/*
- * gzip declarations
- */
+/* Macros used by the included decompressor code below. */
#define STATIC static
-#undef memcpy
-
/*
- * Use a normal definition of memset() from string.c. There are already
+ * Use normal definitions of mem*() from string.c. There are already
* included header files which expect a definition of memset() and by
* the time we define memset macro, it is too late.
*/
+#undef memcpy
#undef memset
#define memzero(s, n) memset((s), 0, (n))
+#define memmove memmove
-
-static void error(char *m);
+/* Functions used by the included decompressor code below. */
+void *memmove(void *dest, const void *src, size_t n);
/*
* This is set up by the setup-routine at boot-time
*/
-struct boot_params *real_mode; /* Pointer to real-mode data */
+struct boot_params *boot_params;
memptr free_mem_ptr;
memptr free_mem_end_ptr;
@@ -146,12 +74,16 @@
#ifdef CONFIG_KERNEL_LZ4
#include "../../../../lib/decompress_unlz4.c"
#endif
+/*
+ * NOTE: When adding a new decompressor, please update the analysis in
+ * ../header.S.
+ */
static void scroll(void)
{
int i;
- memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+ memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
vidmem[i] = ' ';
}
@@ -184,12 +116,12 @@
}
}
- if (real_mode->screen_info.orig_video_mode == 0 &&
+ if (boot_params->screen_info.orig_video_mode == 0 &&
lines == 0 && cols == 0)
return;
- x = real_mode->screen_info.orig_x;
- y = real_mode->screen_info.orig_y;
+ x = boot_params->screen_info.orig_x;
+ y = boot_params->screen_info.orig_y;
while ((c = *s++) != '\0') {
if (c == '\n') {
@@ -210,8 +142,8 @@
}
}
- real_mode->screen_info.orig_x = x;
- real_mode->screen_info.orig_y = y;
+ boot_params->screen_info.orig_x = x;
+ boot_params->screen_info.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
outb(14, vidport);
@@ -237,23 +169,13 @@
}
}
-static void error(char *x)
-{
- error_putstr("\n\n");
- error_putstr(x);
- error_putstr("\n\n -- System halted");
-
- while (1)
- asm("hlt");
-}
-
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len)
{
int *reloc;
unsigned long delta, map, ptr;
unsigned long min_addr = (unsigned long)output;
- unsigned long max_addr = min_addr + output_len;
+ unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
/*
* Calculate the delta between where vmlinux was linked to load
@@ -295,7 +217,7 @@
* So we work backwards from the end of the decompressed image.
*/
for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) {
- int extended = *reloc;
+ long extended = *reloc;
extended += map;
ptr = (unsigned long)extended;
@@ -372,9 +294,7 @@
#else
dest = (void *)(phdr->p_paddr);
#endif
- memcpy(dest,
- output + phdr->p_offset,
- phdr->p_filesz);
+ memmove(dest, output + phdr->p_offset, phdr->p_filesz);
break;
default: /* Ignore other PT_* */ break;
}
@@ -383,23 +303,41 @@
free(phdrs);
}
-asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
+/*
+ * The compressed kernel image (ZO), has been moved so that its position
+ * is against the end of the buffer used to hold the uncompressed kernel
+ * image (VO) and the execution environment (.bss, .brk), which makes sure
+ * there is room to do the in-place decompression. (See header.S for the
+ * calculations.)
+ *
+ * |-----compressed kernel image------|
+ * V V
+ * 0 extract_offset +INIT_SIZE
+ * |-----------|---------------|-------------------------|--------|
+ * | | | |
+ * VO__text startup_32 of ZO VO__end ZO__end
+ * ^ ^
+ * |-------uncompressed kernel image---------|
+ *
+ */
+asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
- unsigned long output_len,
- unsigned long run_size)
+ unsigned long output_len)
{
+ const unsigned long kernel_total_size = VO__end - VO__text;
unsigned char *output_orig = output;
- real_mode = rmode;
+ /* Retain x86 boot parameters pointer passed from startup_32/64. */
+ boot_params = rmode;
- /* Clear it for solely in-kernel use */
- real_mode->hdr.loadflags &= ~KASLR_FLAG;
+ /* Clear flags intended for solely in-kernel use. */
+ boot_params->hdr.loadflags &= ~KASLR_FLAG;
- sanitize_boot_params(real_mode);
+ sanitize_boot_params(boot_params);
- if (real_mode->screen_info.orig_video_mode == 7) {
+ if (boot_params->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
@@ -407,11 +345,11 @@
vidport = 0x3d4;
}
- lines = real_mode->screen_info.orig_video_lines;
- cols = real_mode->screen_info.orig_video_cols;
+ lines = boot_params->screen_info.orig_video_lines;
+ cols = boot_params->screen_info.orig_video_cols;
console_init();
- debug_putstr("early console in decompress_kernel\n");
+ debug_putstr("early console in extract_kernel\n");
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
@@ -421,16 +359,16 @@
debug_putaddr(input_len);
debug_putaddr(output);
debug_putaddr(output_len);
- debug_putaddr(run_size);
+ debug_putaddr(kernel_total_size);
/*
* The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
- output = choose_kernel_location(real_mode, input_data, input_len, output,
- output_len > run_size ? output_len
- : run_size);
+ output = choose_random_location((unsigned long)input_data, input_len,
+ (unsigned long)output,
+ max(output_len, kernel_total_size));
/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 3783dc3..b6fec1f 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -32,7 +32,7 @@
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
-extern struct boot_params *real_mode; /* Pointer to real-mode data */
+extern struct boot_params *boot_params;
void __putstr(const char *s);
void __puthex(unsigned long value);
#define error_putstr(__x) __putstr(__x)
@@ -66,26 +66,35 @@
#if CONFIG_RANDOMIZE_BASE
-/* aslr.c */
-unsigned char *choose_kernel_location(struct boot_params *boot_params,
- unsigned char *input,
+/* kaslr.c */
+unsigned char *choose_random_location(unsigned long input_ptr,
unsigned long input_size,
- unsigned char *output,
+ unsigned long output_ptr,
unsigned long output_size);
/* cpuflags.c */
bool has_cpuflag(int flag);
#else
static inline
-unsigned char *choose_kernel_location(struct boot_params *boot_params,
- unsigned char *input,
+unsigned char *choose_random_location(unsigned long input_ptr,
unsigned long input_size,
- unsigned char *output,
+ unsigned long output_ptr,
unsigned long output_size)
{
- return output;
+ return (unsigned char *)output_ptr;
}
#endif
+#ifdef CONFIG_X86_64
+void add_identity_map(unsigned long start, unsigned long size);
+void finalize_identity_maps(void);
+extern unsigned char _pgtable[];
+#else
+static inline void add_identity_map(unsigned long start, unsigned long size)
+{ }
+static inline void finalize_identity_maps(void)
+{ }
+#endif
+
#ifdef CONFIG_EARLY_PRINTK
/* early_serial_console.c */
extern int early_serial_base;
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index d8222f2..72bad2c 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -18,11 +18,10 @@
*
* H. Peter Anvin <hpa@linux.intel.com>
*
- * ----------------------------------------------------------------------- */
-
-/*
- * Compute the desired load offset from a compressed program; outputs
- * a small assembly wrapper with the appropriate symbols defined.
+ * -----------------------------------------------------------------------
+ *
+ * Outputs a small assembly wrapper with the appropriate symbols defined.
+ *
*/
#include <stdlib.h>
@@ -35,14 +34,11 @@
{
uint32_t olen;
long ilen;
- unsigned long offs;
- unsigned long run_size;
FILE *f = NULL;
int retval = 1;
- if (argc < 3) {
- fprintf(stderr, "Usage: %s compressed_file run_size\n",
- argv[0]);
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
goto bail;
}
@@ -67,29 +63,11 @@
ilen = ftell(f);
olen = get_unaligned_le32(&olen);
- /*
- * Now we have the input (compressed) and output (uncompressed)
- * sizes, compute the necessary decompression offset...
- */
-
- offs = (olen > ilen) ? olen - ilen : 0;
- offs += olen >> 12; /* Add 8 bytes for each 32K block */
- offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
- offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
- run_size = atoi(argv[2]);
-
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
printf(".globl z_input_len\n");
printf("z_input_len = %lu\n", ilen);
printf(".globl z_output_len\n");
printf("z_output_len = %lu\n", (unsigned long)olen);
- printf(".globl z_extract_offset\n");
- printf("z_extract_offset = 0x%lx\n", offs);
- /* z_extract_offset_negative allows simplification of head_32.S */
- printf(".globl z_extract_offset_negative\n");
- printf("z_extract_offset_negative = -0x%lx\n", offs);
- printf(".globl z_run_size\n");
- printf("z_run_size = %lu\n", run_size);
printf(".globl input_data, input_data_end\n");
printf("input_data:\n");
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
new file mode 100644
index 0000000..34b95df
--- /dev/null
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -0,0 +1,129 @@
+/*
+ * This code is used on x86_64 to create page table identity mappings on
+ * demand by building up a new set of page tables (or appending to the
+ * existing ones), and then switching over to them when ready.
+ */
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+#include "misc.h"
+
+/* These actually do the work of building the kernel identity maps. */
+#include <asm/init.h>
+#include <asm/pgtable.h>
+#include "../../mm/ident_map.c"
+
+/* Used by pgtable.h asm code to force instruction serialization. */
+unsigned long __force_order;
+
+/* Used to track our page table allocation area. */
+struct alloc_pgt_data {
+ unsigned char *pgt_buf;
+ unsigned long pgt_buf_size;
+ unsigned long pgt_buf_offset;
+};
+
+/*
+ * Allocates space for a page table entry, using struct alloc_pgt_data
+ * above. Besides the local callers, this is used as the allocation
+ * callback in mapping_info below.
+ */
+static void *alloc_pgt_page(void *context)
+{
+ struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
+ unsigned char *entry;
+
+ /* Validate there is space available for a new page. */
+ if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
+ debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
+ debug_putaddr(pages->pgt_buf_offset);
+ debug_putaddr(pages->pgt_buf_size);
+ return NULL;
+ }
+
+ entry = pages->pgt_buf + pages->pgt_buf_offset;
+ pages->pgt_buf_offset += PAGE_SIZE;
+
+ return entry;
+}
+
+/* Used to track our allocated page tables. */
+static struct alloc_pgt_data pgt_data;
+
+/* The top level page table entry pointer. */
+static unsigned long level4p;
+
+/* Locates and clears a region for a new top level page table. */
+static void prepare_level4(void)
+{
+ /*
+ * It should be impossible for this not to already be true,
+ * but since calling this a second time would rewind the other
+ * counters, let's just make sure this is reset too.
+ */
+ pgt_data.pgt_buf_offset = 0;
+
+ /*
+ * If we came here via startup_32(), cr3 will be _pgtable already
+ * and we must append to the existing area instead of entirely
+ * overwriting it.
+ */
+ level4p = read_cr3();
+ if (level4p == (unsigned long)_pgtable) {
+ debug_putstr("booted via startup_32()\n");
+ pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ } else {
+ debug_putstr("booted via startup_64()\n");
+ pgt_data.pgt_buf = _pgtable;
+ pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
+ memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
+ level4p = (unsigned long)alloc_pgt_page(&pgt_data);
+ }
+}
+
+/*
+ * Adds the specified range to what will become the new identity mappings.
+ * Once all ranges have been added, the new mapping is activated by calling
+ * finalize_identity_maps() below.
+ */
+void add_identity_map(unsigned long start, unsigned long size)
+{
+ struct x86_mapping_info mapping_info = {
+ .alloc_pgt_page = alloc_pgt_page,
+ .context = &pgt_data,
+ .pmd_flag = __PAGE_KERNEL_LARGE_EXEC,
+ };
+ unsigned long end = start + size;
+
+ /* Make sure we have a top level page table ready to use. */
+ if (!level4p)
+ prepare_level4();
+
+ /* Align boundary to 2M. */
+ start = round_down(start, PMD_SIZE);
+ end = round_up(end, PMD_SIZE);
+ if (start >= end)
+ return;
+
+ /* Build the mapping. */
+ kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
+ start, end);
+}
+
+/*
+ * This switches the page tables to the new level4 that has been built
+ * via calls to add_identity_map() above. If booted via startup_32(),
+ * this is effectively a no-op.
+ */
+void finalize_identity_maps(void)
+{
+ write_cr3(level4p);
+}
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c
index 00e788b..cea140c 100644
--- a/arch/x86/boot/compressed/string.c
+++ b/arch/x86/boot/compressed/string.c
@@ -1,7 +1,16 @@
+/*
+ * This provides an optimized implementation of memcpy, and a simplified
+ * implementation of memset and memmove. These are used here because the
+ * standard kernel runtime versions are not yet available and we don't
+ * trust the gcc built-in implementations as they may do unexpected things
+ * (e.g. FPU ops) in the minimal decompression stub execution environment.
+ */
+#include "error.h"
+
#include "../string.c"
#ifdef CONFIG_X86_32
-void *memcpy(void *dest, const void *src, size_t n)
+static void *__memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
@@ -15,7 +24,7 @@
return dest;
}
#else
-void *memcpy(void *dest, const void *src, size_t n)
+static void *__memcpy(void *dest, const void *src, size_t n)
{
long d0, d1, d2;
asm volatile(
@@ -39,3 +48,27 @@
ss[i] = c;
return s;
}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+ unsigned char *d = dest;
+ const unsigned char *s = src;
+
+ if (d <= s || d - s >= n)
+ return __memcpy(dest, src, n);
+
+ while (n-- > 0)
+ d[n] = s[n];
+
+ return dest;
+}
+
+/* Detect and warn about potential overlaps, but handle them with memmove. */
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ if (dest > src && dest - src < n) {
+ warn("Avoiding potentially unsafe overlapping memcpy()!");
+ return memmove(dest, src, n);
+ }
+ return __memcpy(dest, src, n);
+}
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 34d047c..e24e0a0 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -70,5 +70,6 @@
_epgtable = . ;
}
#endif
+ . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */
_end = .;
}
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
index 45a0768..f0b8d6d 100644
--- a/arch/x86/boot/early_serial_console.c
+++ b/arch/x86/boot/early_serial_console.c
@@ -1,3 +1,7 @@
+/*
+ * Serial port routines for use during early boot reporting. This code is
+ * included from both the compressed kernel and the regular kernel.
+ */
#include "boot.h"
#define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 6236b9e..3dd5be3 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -440,13 +440,116 @@
pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
-#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
+#
+# Getting to provably safe in-place decompression is hard. Worst case
+# behaviours need to be analyzed. Here let's take the decompression of
+# a gzip-compressed kernel as example, to illustrate it:
+#
+# The file layout of gzip compressed kernel is:
+#
+# magic[2]
+# method[1]
+# flags[1]
+# timestamp[4]
+# extraflags[1]
+# os[1]
+# compressed data blocks[N]
+# crc[4] orig_len[4]
+#
+# ... resulting in +18 bytes overhead of uncompressed data.
+#
+# (For more information, please refer to RFC 1951 and RFC 1952.)
+#
+# Files divided into blocks
+# 1 bit (last block flag)
+# 2 bits (block type)
+#
+# 1 block occurs every 32K -1 bytes or when there 50% compression
+# has been achieved. The smallest block type encoding is always used.
+#
+# stored:
+# 32 bits length in bytes.
+#
+# fixed:
+# magic fixed tree.
+# symbols.
+#
+# dynamic:
+# dynamic tree encoding.
+# symbols.
+#
+#
+# The buffer for decompression in place is the length of the uncompressed
+# data, plus a small amount extra to keep the algorithm safe. The
+# compressed data is placed at the end of the buffer. The output pointer
+# is placed at the start of the buffer and the input pointer is placed
+# where the compressed data starts. Problems will occur when the output
+# pointer overruns the input pointer.
+#
+# The output pointer can only overrun the input pointer if the input
+# pointer is moving faster than the output pointer. A condition only
+# triggered by data whose compressed form is larger than the uncompressed
+# form.
+#
+# The worst case at the block level is a growth of the compressed data
+# of 5 bytes per 32767 bytes.
+#
+# The worst case internal to a compressed block is very hard to figure.
+# The worst case can at least be bounded by having one bit that represents
+# 32764 bytes and then all of the rest of the bytes representing the very
+# very last byte.
+#
+# All of which is enough to compute an amount of extra data that is required
+# to be safe. To avoid problems at the block level allocating 5 extra bytes
+# per 32767 bytes of data is sufficient. To avoid problems internal to a
+# block adding an extra 32767 bytes (the worst case uncompressed block size)
+# is sufficient, to ensure that in the worst case the decompressed data for
+# block will stop the byte before the compressed data for a block begins.
+# To avoid problems with the compressed data's meta information an extra 18
+# bytes are needed. Leading to the formula:
+#
+# extra_bytes = (uncompressed_size >> 12) + 32768 + 18
+#
+# Adding 8 bytes per 32K is a bit excessive but much easier to calculate.
+# Adding 32768 instead of 32767 just makes for round numbers.
+#
+# Above analysis is for decompressing gzip compressed kernel only. Up to
+# now 6 different decompressor are supported all together. And among them
+# xz stores data in chunks and has maximum chunk of 64K. Hence safety
+# margin should be updated to cover all decompressors so that we don't
+# need to deal with each of them separately. Please check
+# the description in lib/decompressor_xxx.c for specific information.
+#
+# extra_bytes = (uncompressed_size >> 12) + 65536 + 128
+
+#define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128)
+#if ZO_z_output_len > ZO_z_input_len
+# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \
+ ZO_z_input_len)
+#else
+# define ZO_z_extract_offset ZO_z_extra_bytes
+#endif
+
+/*
+ * The extract_offset has to be bigger than ZO head section. Otherwise when
+ * the head code is running to move ZO to the end of the buffer, it will
+ * overwrite the head code itself.
+ */
+#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset
+# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095)
+#else
+# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095)
+#endif
+
+#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset)
+
#define VO_INIT_SIZE (VO__end - VO__text)
#if ZO_INIT_SIZE > VO_INIT_SIZE
-#define INIT_SIZE ZO_INIT_SIZE
+# define INIT_SIZE ZO_INIT_SIZE
#else
-#define INIT_SIZE VO_INIT_SIZE
+# define INIT_SIZE VO_INIT_SIZE
#endif
+
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4f404a6..0c8d796 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -173,6 +173,7 @@
CONFIG_NET_TULIP=y
CONFIG_E100=y
CONFIG_E1000=y
+CONFIG_E1000E=y
CONFIG_SKY2=y
CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 064c7e2..5b7fa14 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1477,7 +1477,7 @@
}
aesni_ctr_enc_tfm = aesni_ctr_enc;
#ifdef CONFIG_AS_AVX
- if (cpu_has_avx) {
+ if (boot_cpu_has(X86_FEATURE_AVX)) {
/* optimize performance of ctr mode encryption transform */
aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
pr_info("AES CTR mode by8 optimization enabled\n");
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index d844569..60907c1 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -562,7 +562,10 @@
{
const char *feature_name;
- if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 or AES-NI instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 93d8f29..d96429d 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -554,7 +554,9 @@
{
const char *feature_name;
- if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ if (!boot_cpu_has(X86_FEATURE_AVX) ||
+ !boot_cpu_has(X86_FEATURE_AES) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX or AES-NI instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 8baaff5..2d5c2e0b 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -129,7 +129,8 @@
return -ENODEV;
#ifdef CONFIG_AS_AVX2
- chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+ chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#endif
return crypto_register_alg(&alg);
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4264a3d..e32142b 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -179,11 +179,12 @@
static int __init poly1305_simd_mod_init(void)
{
- if (!cpu_has_xmm2)
+ if (!boot_cpu_has(X86_FEATURE_XMM2))
return -ENODEV;
#ifdef CONFIG_AS_AVX2
- poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+ poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
if (poly1305_use_avx2)
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 6d19834..870f6d8 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -538,7 +538,7 @@
{
const char *feature_name;
- if (!cpu_has_avx2 || !cpu_has_osxsave) {
+ if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 8943407..644f97a 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -600,7 +600,7 @@
static int __init serpent_sse2_init(void)
{
- if (!cpu_has_xmm2) {
+ if (!boot_cpu_has(X86_FEATURE_XMM2)) {
printk(KERN_INFO "SSE2 instructions are not detected.\n");
return -ENODEV;
}
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index 081255c..9c5af33 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -102,14 +102,14 @@
static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state);
static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state);
-inline void sha1_init_digest(uint32_t *digest)
+static inline void sha1_init_digest(uint32_t *digest)
{
static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
memcpy(digest, initial_digest, sizeof(initial_digest));
}
-inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
+static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
uint32_t total_len)
{
uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index dd14616..1024e37 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -166,7 +166,7 @@
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (cpu_has_avx)
+ if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 5f4d608..3ae0f43 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -201,7 +201,7 @@
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (cpu_has_avx)
+ if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 34e5083..0b17c83 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -151,7 +151,7 @@
static bool avx_usable(void)
{
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
- if (cpu_has_avx)
+ if (boot_cpu_has(X86_FEATURE_AVX))
pr_info("AVX detected but unusable.\n");
return false;
}
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index e79d93d..ec138e5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -191,7 +191,7 @@
long syscall_trace_enter(struct pt_regs *regs)
{
- u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch);
if (phase1_result == 0)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 10868aa..983e5d3 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -207,10 +207,7 @@
ENTRY(ret_from_fork)
pushl %eax
call schedule_tail
- GET_THREAD_INFO(%ebp)
popl %eax
- pushl $0x0202 # Reset kernel eflags
- popfl
/* When we fork, we trace the syscall return in the child, too. */
movl %esp, %eax
@@ -221,10 +218,7 @@
ENTRY(ret_from_kernel_thread)
pushl %eax
call schedule_tail
- GET_THREAD_INFO(%ebp)
popl %eax
- pushl $0x0202 # Reset kernel eflags
- popfl
movl PT_EBP(%esp), %eax
call *PT_EBX(%esp)
movl $0, PT_EAX(%esp)
@@ -251,7 +245,6 @@
ret_from_exception:
preempt_stop(CLBR_ANY)
ret_from_intr:
- GET_THREAD_INFO(%ebp)
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 858b555..9ee0da1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -372,9 +372,6 @@
ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK, TI_flags(%r8)
- pushq $0x0002
- popfq /* reset kernel eflags */
-
call schedule_tail /* rdi: 'prev' task parameter */
testb $3, CS(%rsp) /* from kernel_thread? */
@@ -781,19 +778,25 @@
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
SWAPGS
-gs_change:
+.Lgs_change:
movl %edi, %gs
-2: mfence /* workaround */
+2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE
SWAPGS
popfq
ret
END(native_load_gs_index)
- _ASM_EXTABLE(gs_change, bad_gs)
+ _ASM_EXTABLE(.Lgs_change, bad_gs)
.section .fixup, "ax"
/* running with kernelgs */
bad_gs:
SWAPGS /* switch back to user gs */
+.macro ZAP_GS
+ /* This can't be a string because the preprocessor needs to see it. */
+ movl $__USER_DS, %eax
+ movl %eax, %gs
+.endm
+ ALTERNATIVE "", "ZAP_GS", X86_BUG_NULL_SEG
xorl %eax, %eax
movl %eax, %gs
jmp 2b
@@ -1019,13 +1022,13 @@
movl %ecx, %eax /* zero extend */
cmpq %rax, RIP+8(%rsp)
je .Lbstep_iret
- cmpq $gs_change, RIP+8(%rsp)
+ cmpq $.Lgs_change, RIP+8(%rsp)
jne .Lerror_entry_done
/*
- * hack: gs_change can fail with user gsbase. If this happens, fix up
+ * hack: .Lgs_change can fail with user gsbase. If this happens, fix up
* gsbase and proceed. We'll fix up the exception and land in
- * gs_change's error handler with kernel gsbase.
+ * .Lgs_change's error handler with kernel gsbase.
*/
jmp .Lerror_entry_from_usermode_swapgs
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 847f2f0..e1721da 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -72,24 +72,23 @@
pushfq /* pt_regs->flags (except IF = 0) */
orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
pushq $__USER32_CS /* pt_regs->cs */
- xorq %r8,%r8
- pushq %r8 /* pt_regs->ip = 0 (placeholder) */
+ pushq $0 /* pt_regs->ip = 0 (placeholder) */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 = 0 */
- pushq %r8 /* pt_regs->r9 = 0 */
- pushq %r8 /* pt_regs->r10 = 0 */
- pushq %r8 /* pt_regs->r11 = 0 */
+ pushq $0 /* pt_regs->r8 = 0 */
+ pushq $0 /* pt_regs->r9 = 0 */
+ pushq $0 /* pt_regs->r10 = 0 */
+ pushq $0 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
- pushq %r8 /* pt_regs->r12 = 0 */
- pushq %r8 /* pt_regs->r13 = 0 */
- pushq %r8 /* pt_regs->r14 = 0 */
- pushq %r8 /* pt_regs->r15 = 0 */
+ pushq $0 /* pt_regs->r12 = 0 */
+ pushq $0 /* pt_regs->r13 = 0 */
+ pushq $0 /* pt_regs->r14 = 0 */
+ pushq $0 /* pt_regs->r15 = 0 */
cld
/*
@@ -205,17 +204,16 @@
pushq %rdx /* pt_regs->dx */
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
- xorq %r8,%r8
- pushq %r8 /* pt_regs->r8 = 0 */
- pushq %r8 /* pt_regs->r9 = 0 */
- pushq %r8 /* pt_regs->r10 = 0 */
- pushq %r8 /* pt_regs->r11 = 0 */
+ pushq $0 /* pt_regs->r8 = 0 */
+ pushq $0 /* pt_regs->r9 = 0 */
+ pushq $0 /* pt_regs->r10 = 0 */
+ pushq $0 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
- pushq %r8 /* pt_regs->r12 = 0 */
- pushq %r8 /* pt_regs->r13 = 0 */
- pushq %r8 /* pt_regs->r14 = 0 */
- pushq %r8 /* pt_regs->r15 = 0 */
+ pushq $0 /* pt_regs->r12 = 0 */
+ pushq $0 /* pt_regs->r13 = 0 */
+ pushq $0 /* pt_regs->r14 = 0 */
+ pushq $0 /* pt_regs->r15 = 0 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -316,11 +314,10 @@
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
- xorq %r8,%r8
- pushq %r8 /* pt_regs->r8 = 0 */
- pushq %r8 /* pt_regs->r9 = 0 */
- pushq %r8 /* pt_regs->r10 = 0 */
- pushq %r8 /* pt_regs->r11 = 0 */
+ pushq $0 /* pt_regs->r8 = 0 */
+ pushq $0 /* pt_regs->r9 = 0 */
+ pushq $0 /* pt_regs->r10 = 0 */
+ pushq $0 /* pt_regs->r11 = 0 */
pushq %rbx /* pt_regs->rbx */
pushq %rbp /* pt_regs->rbp */
pushq %r12 /* pt_regs->r12 */
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index cac6d17..555263e 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,3 +374,5 @@
543 x32 io_setup compat_sys_io_setup
544 x32 io_submit compat_sys_io_submit
545 x32 execveat compat_sys_execveat/ptregs
+534 x32 preadv2 compat_sys_preadv2
+535 x32 pwritev2 compat_sys_pwritev2
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 03c3eb7..2f02d23 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -13,7 +13,6 @@
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
-#include <asm/hpet.h>
#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
@@ -28,16 +27,6 @@
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
-#ifdef CONFIG_HPET_TIMER
-extern u8 hpet_page
- __attribute__((visibility("hidden")));
-
-static notrace cycle_t vread_hpet(void)
-{
- return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
-}
-#endif
-
#ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
__attribute__((visibility("hidden")));
@@ -195,10 +184,6 @@
if (gtod->vclock_mode == VCLOCK_TSC)
cycles = vread_tsc();
-#ifdef CONFIG_HPET_TIMER
- else if (gtod->vclock_mode == VCLOCK_HPET)
- cycles = vread_hpet();
-#endif
#ifdef CONFIG_PARAVIRT_CLOCK
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
cycles = vread_pvclock(mode);
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 4158acc..a708aa9 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -25,7 +25,7 @@
* segment.
*/
- vvar_start = . - 3 * PAGE_SIZE;
+ vvar_start = . - 2 * PAGE_SIZE;
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -35,8 +35,7 @@
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
- hpet_page = vvar_start + PAGE_SIZE;
- pvclock_page = vvar_start + 2 * PAGE_SIZE;
+ pvclock_page = vvar_start + PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f7045..b3cf813 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -18,7 +18,6 @@
#include <asm/vdso.h>
#include <asm/vvar.h>
#include <asm/page.h>
-#include <asm/hpet.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
@@ -129,16 +128,6 @@
if (sym_offset == image->sym_vvar_page) {
ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
__pa_symbol(&__vvar_page) >> PAGE_SHIFT);
- } else if (sym_offset == image->sym_hpet_page) {
-#ifdef CONFIG_HPET_TIMER
- if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
- ret = vm_insert_pfn_prot(
- vma,
- (unsigned long)vmf->virtual_address,
- hpet_address >> PAGE_SHIFT,
- pgprot_noncached(PAGE_READONLY));
- }
-#endif
} else if (sym_offset == image->sym_pvclock_page) {
struct pvclock_vsyscall_time_info *pvti =
pvclock_pvti_cpu0_va();
diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig
new file mode 100644
index 0000000..98397db
--- /dev/null
+++ b/arch/x86/events/Kconfig
@@ -0,0 +1,36 @@
+menu "Performance monitoring"
+
+config PERF_EVENTS_INTEL_UNCORE
+ tristate "Intel uncore performance events"
+ depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ default y
+ ---help---
+ Include support for Intel uncore performance events. These are
+ available on NehalemEX and more modern processors.
+
+config PERF_EVENTS_INTEL_RAPL
+ tristate "Intel rapl performance events"
+ depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ default y
+ ---help---
+ Include support for Intel rapl performance events for power
+ monitoring on modern processors.
+
+config PERF_EVENTS_INTEL_CSTATE
+ tristate "Intel cstate performance events"
+ depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
+ default y
+ ---help---
+ Include support for Intel cstate performance events for power
+ monitoring on modern processors.
+
+config PERF_EVENTS_AMD_POWER
+ depends on PERF_EVENTS && CPU_SUP_AMD
+ tristate "AMD Processor Power Reporting Mechanism"
+ ---help---
+ Provide power reporting mechanism support for AMD processors.
+ Currently, it leverages X86_FEATURE_ACC_POWER
+ (CPUID Fn8000_0007_EDX[12]) interface to calculate the
+ average power consumption on Family 15h processors.
+
+endmenu
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index f59618a..1d392c3 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -6,9 +6,6 @@
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o
endif
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o
-obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o
+
+obj-$(CONFIG_CPU_SUP_INTEL) += msr.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 3db9569..98ac573 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -263,6 +263,7 @@
};
static struct pmu amd_nb_pmu = {
+ .task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_attr_groups,
.name = "amd_nb",
.event_init = amd_uncore_event_init,
@@ -274,6 +275,7 @@
};
static struct pmu amd_l2_pmu = {
+ .task_ctx_nr = perf_invalid_context,
.attr_groups = amd_uncore_attr_groups,
.name = "amd_l2",
.event_init = amd_uncore_event_init,
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 041e442..73a75aa 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -360,6 +360,9 @@
{
int i;
+ if (x86_pmu.lbr_pt_coexist)
+ return 0;
+
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
mutex_lock(&pmc_reserve_mutex);
for (i = 0; i < ARRAY_SIZE(x86_pmu.lbr_exclusive); i++) {
@@ -380,6 +383,9 @@
void x86_del_exclusive(unsigned int what)
{
+ if (x86_pmu.lbr_pt_coexist)
+ return;
+
atomic_dec(&x86_pmu.lbr_exclusive[what]);
atomic_dec(&active_events);
}
@@ -1518,7 +1524,7 @@
static void __init pmu_check_apic(void)
{
- if (cpu_has_apic)
+ if (boot_cpu_has(X86_FEATURE_APIC))
return;
x86_pmu.apic = 0;
@@ -2177,7 +2183,7 @@
* cap_user_time_zero doesn't make sense when we're using a different
* time base for the records.
*/
- if (event->clock == &local_clock) {
+ if (!event->attr.use_clockid) {
userpg->cap_user_time_zero = 1;
userpg->time_zero = data->cyc2ns_offset;
}
@@ -2277,7 +2283,7 @@
fp = compat_ptr(ss_base + regs->bp);
pagefault_disable();
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ while (entry->nr < sysctl_perf_event_max_stack) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;
@@ -2337,7 +2343,7 @@
return;
pagefault_disable();
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
+ while (entry->nr < sysctl_perf_event_max_stack) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;
diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile
new file mode 100644
index 0000000..3660b2c
--- /dev/null
+++ b/arch/x86/events/intel/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o
+obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o
+obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o
+intel-rapl-objs := rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o
+intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o
+intel-cstate-objs := cstate.o
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index b99dc92..0a6e393 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -171,18 +171,6 @@
memset(page_address(phys->page) + index, 0, phys->size - index);
}
-static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
-{
- if (buf->snapshot)
- return false;
-
- if (local_read(&buf->data_size) >= bts->handle.size ||
- bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
- return true;
-
- return false;
-}
-
static void bts_update(struct bts_ctx *bts)
{
int cpu = raw_smp_processor_id();
@@ -213,18 +201,15 @@
}
}
+static int
+bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
+
static void __bts_event_start(struct perf_event *event)
{
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct bts_buffer *buf = perf_get_aux(&bts->handle);
u64 config = 0;
- if (!buf || bts_buffer_is_full(buf, bts))
- return;
-
- event->hw.itrace_started = 1;
- event->hw.state = 0;
-
if (!buf->snapshot)
config |= ARCH_PERFMON_EVENTSEL_INT;
if (!event->attr.exclude_kernel)
@@ -241,16 +226,41 @@
wmb();
intel_pmu_enable_bts(config);
+
}
static void bts_event_start(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_buffer *buf;
+
+ buf = perf_aux_output_begin(&bts->handle, event);
+ if (!buf)
+ goto fail_stop;
+
+ if (bts_buffer_reset(buf, &bts->handle))
+ goto fail_end_stop;
+
+ bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
+ bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
+ bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
+
+ event->hw.itrace_started = 1;
+ event->hw.state = 0;
__bts_event_start(event);
/* PMI handler: this counter is running and likely generating PMIs */
ACCESS_ONCE(bts->started) = 1;
+
+ return;
+
+fail_end_stop:
+ perf_aux_output_end(&bts->handle, 0, false);
+
+fail_stop:
+ event->hw.state = PERF_HES_STOPPED;
}
static void __bts_event_stop(struct perf_event *event)
@@ -269,15 +279,32 @@
static void bts_event_stop(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+ struct bts_buffer *buf = perf_get_aux(&bts->handle);
/* PMI handler: don't restart this counter */
ACCESS_ONCE(bts->started) = 0;
__bts_event_stop(event);
- if (flags & PERF_EF_UPDATE)
+ if (flags & PERF_EF_UPDATE) {
bts_update(bts);
+
+ if (buf) {
+ if (buf->snapshot)
+ bts->handle.head =
+ local_xchg(&buf->data_size,
+ buf->nr_pages << PAGE_SHIFT);
+ perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
+ !!local_xchg(&buf->lost, 0));
+ }
+
+ cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
+ cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
+ cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
+ cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
+ }
}
void intel_bts_enable_local(void)
@@ -417,34 +444,14 @@
static void bts_event_del(struct perf_event *event, int mode)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
- struct bts_buffer *buf = perf_get_aux(&bts->handle);
-
bts_event_stop(event, PERF_EF_UPDATE);
-
- if (buf) {
- if (buf->snapshot)
- bts->handle.head =
- local_xchg(&buf->data_size,
- buf->nr_pages << PAGE_SHIFT);
- perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
- !!local_xchg(&buf->lost, 0));
- }
-
- cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
- cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
- cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
- cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
}
static int bts_event_add(struct perf_event *event, int mode)
{
- struct bts_buffer *buf;
struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int ret = -EBUSY;
event->hw.state = PERF_HES_STOPPED;
@@ -454,26 +461,10 @@
if (bts->handle.event)
return -EBUSY;
- buf = perf_aux_output_begin(&bts->handle, event);
- if (!buf)
- return -EINVAL;
-
- ret = bts_buffer_reset(buf, &bts->handle);
- if (ret) {
- perf_aux_output_end(&bts->handle, 0, false);
- return ret;
- }
-
- bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
- bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
- bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
-
if (mode & PERF_EF_START) {
bts_event_start(event, 0);
- if (hwc->state & PERF_HES_STOPPED) {
- bts_event_del(event, 0);
- return -EBUSY;
- }
+ if (hwc->state & PERF_HES_STOPPED)
+ return -EINVAL;
}
return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5210eaa..7c66695 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1465,6 +1465,140 @@
},
};
+static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+#define GLM_DEMAND_DATA_RD BIT_ULL(0)
+#define GLM_DEMAND_RFO BIT_ULL(1)
+#define GLM_ANY_RESPONSE BIT_ULL(16)
+#define GLM_SNP_NONE_OR_MISS BIT_ULL(33)
+#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD
+#define GLM_DEMAND_WRITE GLM_DEMAND_RFO
+#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+#define GLM_LLC_ACCESS GLM_ANY_RESPONSE
+#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
+#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 glm_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
+ [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [C(RESULT_MISS)] = 0x0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
+static __initconst const u64 glm_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = GLM_DEMAND_READ|
+ GLM_LLC_ACCESS,
+ [C(RESULT_MISS)] = GLM_DEMAND_READ|
+ GLM_LLC_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
+ GLM_LLC_ACCESS,
+ [C(RESULT_MISS)] = GLM_DEMAND_WRITE|
+ GLM_LLC_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH|
+ GLM_LLC_ACCESS,
+ [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH|
+ GLM_LLC_MISS,
+ },
+ },
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3447,7 +3581,7 @@
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
- intel_pmu_lbr_init_atom();
+ intel_pmu_lbr_init_slm();
x86_pmu.event_constraints = intel_slm_event_constraints;
x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
@@ -3456,6 +3590,30 @@
pr_cont("Silvermont events, ");
break;
+ case 92: /* 14nm Atom "Goldmont" */
+ case 95: /* 14nm Atom "Goldmont Denverton" */
+ memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_glm_extra_regs;
+ /*
+ * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+ * for precise cycles.
+ * :pp is identical to :ppp
+ */
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ pr_cont("Goldmont events, ");
+ break;
+
case 37: /* 32nm Westmere */
case 44: /* 32nm Westmere-EP */
case 47: /* 32nm Westmere-EX */
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 7946c42..9ba4e41 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -91,6 +91,8 @@
#include <asm/cpu_device_id.h>
#include "../perf_event.h"
+MODULE_LICENSE("GPL");
+
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
struct kobj_attribute *attr, \
@@ -106,22 +108,27 @@
struct device_attribute *attr,
char *buf);
+/* Model -> events mapping */
+struct cstate_model {
+ unsigned long core_events;
+ unsigned long pkg_events;
+ unsigned long quirks;
+};
+
+/* Quirk flags */
+#define SLM_PKG_C6_USE_C7_MSR (1UL << 0)
+
struct perf_cstate_msr {
u64 msr;
struct perf_pmu_events_attr *attr;
- bool (*test)(int idx);
};
/* cstate_core PMU */
-
static struct pmu cstate_core_pmu;
static bool has_cstate_core;
-enum perf_cstate_core_id {
- /*
- * cstate_core events
- */
+enum perf_cstate_core_events {
PERF_CSTATE_CORE_C1_RES = 0,
PERF_CSTATE_CORE_C3_RES,
PERF_CSTATE_CORE_C6_RES,
@@ -130,69 +137,16 @@
PERF_CSTATE_CORE_EVENT_MAX,
};
-bool test_core(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 69: /* 22nm Haswell ULT */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES ||
- idx == PERF_CSTATE_CORE_C7_RES)
- return true;
- break;
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_CSTATE_CORE_C1_RES ||
- idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- }
-
- return false;
-}
-
PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00");
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01");
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02");
PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03");
static struct perf_cstate_msr core_msr[] = {
- [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, },
- [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, },
- [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, },
- [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, },
+ [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 },
+ [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 },
+ [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 },
+ [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 },
};
static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = {
@@ -234,18 +188,11 @@
NULL,
};
-/* cstate_core PMU end */
-
-
/* cstate_pkg PMU */
-
static struct pmu cstate_pkg_pmu;
static bool has_cstate_pkg;
-enum perf_cstate_pkg_id {
- /*
- * cstate_pkg events
- */
+enum perf_cstate_pkg_events {
PERF_CSTATE_PKG_C2_RES = 0,
PERF_CSTATE_PKG_C3_RES,
PERF_CSTATE_PKG_C6_RES,
@@ -257,69 +204,6 @@
PERF_CSTATE_PKG_EVENT_MAX,
};
-bool test_pkg(int idx)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
- boot_cpu_data.x86 != 6)
- return false;
-
- switch (boot_cpu_data.x86_model) {
- case 30: /* 45nm Nehalem */
- case 26: /* 45nm Nehalem-EP */
- case 46: /* 45nm Nehalem-EX */
-
- case 37: /* 32nm Westmere */
- case 44: /* 32nm Westmere-EP */
- case 47: /* 32nm Westmere-EX */
- if (idx == PERF_CSTATE_CORE_C3_RES ||
- idx == PERF_CSTATE_CORE_C6_RES ||
- idx == PERF_CSTATE_CORE_C7_RES)
- return true;
- break;
- case 42: /* 32nm SandyBridge */
- case 45: /* 32nm SandyBridge-E/EN/EP */
-
- case 58: /* 22nm IvyBridge */
- case 62: /* 22nm IvyBridge-EP/EX */
-
- case 60: /* 22nm Haswell Core */
- case 63: /* 22nm Haswell Server */
- case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
- case 61: /* 14nm Broadwell Core-M */
- case 86: /* 14nm Broadwell Xeon D */
- case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
- case 79: /* 14nm Broadwell Server */
-
- case 78: /* 14nm Skylake Mobile */
- case 94: /* 14nm Skylake Desktop */
- if (idx == PERF_CSTATE_PKG_C2_RES ||
- idx == PERF_CSTATE_PKG_C3_RES ||
- idx == PERF_CSTATE_PKG_C6_RES ||
- idx == PERF_CSTATE_PKG_C7_RES)
- return true;
- break;
- case 55: /* 22nm Atom "Silvermont" */
- case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
- case 76: /* 14nm Atom "Airmont" */
- if (idx == PERF_CSTATE_CORE_C6_RES)
- return true;
- break;
- case 69: /* 22nm Haswell ULT */
- if (idx == PERF_CSTATE_PKG_C2_RES ||
- idx == PERF_CSTATE_PKG_C3_RES ||
- idx == PERF_CSTATE_PKG_C6_RES ||
- idx == PERF_CSTATE_PKG_C7_RES ||
- idx == PERF_CSTATE_PKG_C8_RES ||
- idx == PERF_CSTATE_PKG_C9_RES ||
- idx == PERF_CSTATE_PKG_C10_RES)
- return true;
- break;
- }
-
- return false;
-}
-
PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00");
PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01");
PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02");
@@ -329,13 +213,13 @@
PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06");
static struct perf_cstate_msr pkg_msr[] = {
- [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, },
- [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, },
- [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, },
- [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, },
- [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, },
- [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, },
- [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, },
+ [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 },
+ [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 },
+ [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 },
+ [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 },
+ [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 },
+ [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 },
+ [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 },
};
static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = {
@@ -366,8 +250,6 @@
NULL,
};
-/* cstate_pkg PMU end*/
-
static ssize_t cstate_get_attr_cpumask(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -385,7 +267,7 @@
static int cstate_pmu_event_init(struct perf_event *event)
{
u64 cfg = event->attr.config;
- int ret = 0;
+ int cpu;
if (event->attr.type != event->pmu->type)
return -ENOENT;
@@ -400,26 +282,36 @@
event->attr.sample_period) /* no sampling */
return -EINVAL;
+ if (event->cpu < 0)
+ return -EINVAL;
+
if (event->pmu == &cstate_core_pmu) {
if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
return -EINVAL;
if (!core_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = core_msr[cfg].msr;
+ cpu = cpumask_any_and(&cstate_core_cpu_mask,
+ topology_sibling_cpumask(event->cpu));
} else if (event->pmu == &cstate_pkg_pmu) {
if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
return -EINVAL;
if (!pkg_msr[cfg].attr)
return -EINVAL;
event->hw.event_base = pkg_msr[cfg].msr;
- } else
+ cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
+ topology_core_cpumask(event->cpu));
+ } else {
return -ENOENT;
+ }
- /* must be done before validate_group */
+ if (cpu >= nr_cpu_ids)
+ return -ENODEV;
+
+ event->cpu = cpu;
event->hw.config = cfg;
event->hw.idx = -1;
-
- return ret;
+ return 0;
}
static inline u64 cstate_pmu_read_counter(struct perf_event *event)
@@ -469,172 +361,91 @@
return 0;
}
+/*
+ * Check if exiting cpu is the designated reader. If so migrate the
+ * events when there is a valid target available
+ */
static void cstate_cpu_exit(int cpu)
{
- int i, id, target;
+ unsigned int target;
- /* cpu exit for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- target = -1;
+ if (has_cstate_core &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_core_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0)
+ target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_core_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_core_cpu_mask));
- if (target >= 0)
perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
+ }
}
- /* cpu exit for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- target = -1;
+ if (has_cstate_pkg &&
+ cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
- for_each_online_cpu(i) {
- if (i == cpu)
- continue;
- if (id == topology_physical_package_id(i)) {
- target = i;
- break;
- }
- }
- if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0)
+ target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
+ /* Migrate events if there is a valid target */
+ if (target < nr_cpu_ids) {
cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
- WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask));
- if (target >= 0)
perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
+ }
}
}
static void cstate_cpu_init(int cpu)
{
- int i, id;
+ unsigned int target;
- /* cpu init for cstate core */
- if (has_cstate_core) {
- id = topology_core_id(cpu);
- for_each_cpu(i, &cstate_core_cpu_mask) {
- if (id == topology_core_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
- }
+ /*
+ * If this is the first online thread of that core, set it in
+ * the core cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_core_cpu_mask,
+ topology_sibling_cpumask(cpu));
- /* cpu init for cstate pkg */
- if (has_cstate_pkg) {
- id = topology_physical_package_id(cpu);
- for_each_cpu(i, &cstate_pkg_cpu_mask) {
- if (id == topology_physical_package_id(i))
- break;
- }
- if (i >= nr_cpu_ids)
- cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
- }
+ if (has_cstate_core && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
+
+ /*
+ * If this is the first online thread of that package, set it
+ * in the package cpu mask as the designated reader.
+ */
+ target = cpumask_any_and(&cstate_pkg_cpu_mask,
+ topology_core_cpumask(cpu));
+ if (has_cstate_pkg && target >= nr_cpu_ids)
+ cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
}
static int cstate_cpu_notifier(struct notifier_block *self,
- unsigned long action, void *hcpu)
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- break;
case CPU_STARTING:
cstate_cpu_init(cpu);
break;
- case CPU_UP_CANCELED:
- case CPU_DYING:
- break;
- case CPU_ONLINE:
- case CPU_DEAD:
- break;
case CPU_DOWN_PREPARE:
cstate_cpu_exit(cpu);
break;
default:
break;
}
-
return NOTIFY_OK;
}
-/*
- * Probe the cstate events and insert the available one into sysfs attrs
- * Return false if there is no available events.
- */
-static bool cstate_probe_msr(struct perf_cstate_msr *msr,
- struct attribute **events_attrs,
- int max_event_nr)
-{
- int i, j = 0;
- u64 val;
-
- /* Probe the cstate events. */
- for (i = 0; i < max_event_nr; i++) {
- if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
- msr[i].attr = NULL;
- }
-
- /* List remaining events in the sysfs attrs. */
- for (i = 0; i < max_event_nr; i++) {
- if (msr[i].attr)
- events_attrs[j++] = &msr[i].attr->attr.attr;
- }
- events_attrs[j] = NULL;
-
- return (j > 0) ? true : false;
-}
-
-static int __init cstate_init(void)
-{
- /* SLM has different MSR for PKG C6 */
- switch (boot_cpu_data.x86_model) {
- case 55:
- case 76:
- case 77:
- pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
- }
-
- if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX))
- has_cstate_core = true;
-
- if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX))
- has_cstate_pkg = true;
-
- return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
-}
-
-static void __init cstate_cpumask_init(void)
-{
- int cpu;
-
- cpu_notifier_register_begin();
-
- for_each_online_cpu(cpu)
- cstate_cpu_init(cpu);
-
- __perf_cpu_notifier(cstate_cpu_notifier);
-
- cpu_notifier_register_done();
-}
+static struct notifier_block cstate_cpu_nb = {
+ .notifier_call = cstate_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
static struct pmu cstate_core_pmu = {
.attr_groups = core_attr_groups,
.name = "cstate_core",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
- .add = cstate_pmu_event_add, /* must have */
- .del = cstate_pmu_event_del, /* must have */
+ .add = cstate_pmu_event_add,
+ .del = cstate_pmu_event_del,
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
@@ -646,49 +457,203 @@
.name = "cstate_pkg",
.task_ctx_nr = perf_invalid_context,
.event_init = cstate_pmu_event_init,
- .add = cstate_pmu_event_add, /* must have */
- .del = cstate_pmu_event_del, /* must have */
+ .add = cstate_pmu_event_add,
+ .del = cstate_pmu_event_del,
.start = cstate_pmu_event_start,
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
};
-static void __init cstate_pmus_register(void)
+static const struct cstate_model nhm_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model snb_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES),
+};
+
+static const struct cstate_model hswult_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES) |
+ BIT(PERF_CSTATE_PKG_C8_RES) |
+ BIT(PERF_CSTATE_PKG_C9_RES) |
+ BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
+static const struct cstate_model slm_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES),
+ .quirks = SLM_PKG_C6_USE_C7_MSR,
+};
+
+#define X86_CSTATES_MODEL(model, states) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
+
+static const struct x86_cpu_id intel_cstates_match[] __initconst = {
+ X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */
+ X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */
+ X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */
+
+ X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */
+ X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */
+ X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */
+
+ X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */
+ X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */
+
+ X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */
+ X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */
+
+ X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */
+ X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */
+ X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */
+
+ X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */
+
+ X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */
+ X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */
+ X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */
+
+ X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */
+ X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */
+ X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */
+ X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */
+
+ X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */
+ X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */
+ { },
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
+
+/*
+ * Probe the cstate events and insert the available one into sysfs attrs
+ * Return false if there are no available events.
+ */
+static bool __init cstate_probe_msr(const unsigned long evmsk, int max,
+ struct perf_cstate_msr *msr,
+ struct attribute **attrs)
{
- int err;
+ bool found = false;
+ unsigned int bit;
+ u64 val;
+
+ for (bit = 0; bit < max; bit++) {
+ if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) {
+ *attrs++ = &msr[bit].attr->attr.attr;
+ found = true;
+ } else {
+ msr[bit].attr = NULL;
+ }
+ }
+ *attrs = NULL;
+
+ return found;
+}
+
+static int __init cstate_probe(const struct cstate_model *cm)
+{
+ /* SLM has different MSR for PKG C6 */
+ if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
+ pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
+
+ has_cstate_core = cstate_probe_msr(cm->core_events,
+ PERF_CSTATE_CORE_EVENT_MAX,
+ core_msr, core_events_attrs);
+
+ has_cstate_pkg = cstate_probe_msr(cm->pkg_events,
+ PERF_CSTATE_PKG_EVENT_MAX,
+ pkg_msr, pkg_events_attrs);
+
+ return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
+}
+
+static inline void cstate_cleanup(void)
+{
+ if (has_cstate_core)
+ perf_pmu_unregister(&cstate_core_pmu);
+
+ if (has_cstate_pkg)
+ perf_pmu_unregister(&cstate_pkg_pmu);
+}
+
+static int __init cstate_init(void)
+{
+ int cpu, err;
+
+ cpu_notifier_register_begin();
+ for_each_online_cpu(cpu)
+ cstate_cpu_init(cpu);
if (has_cstate_core) {
err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
- if (WARN_ON(err))
- pr_info("Failed to register PMU %s error %d\n",
- cstate_core_pmu.name, err);
+ if (err) {
+ has_cstate_core = false;
+ pr_info("Failed to register cstate core pmu\n");
+ goto out;
+ }
}
if (has_cstate_pkg) {
err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1);
- if (WARN_ON(err))
- pr_info("Failed to register PMU %s error %d\n",
- cstate_pkg_pmu.name, err);
+ if (err) {
+ has_cstate_pkg = false;
+ pr_info("Failed to register cstate pkg pmu\n");
+ cstate_cleanup();
+ goto out;
+ }
}
+ __register_cpu_notifier(&cstate_cpu_nb);
+out:
+ cpu_notifier_register_done();
+ return err;
}
static int __init cstate_pmu_init(void)
{
+ const struct x86_cpu_id *id;
int err;
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
- err = cstate_init();
+ id = x86_match_cpu(intel_cstates_match);
+ if (!id)
+ return -ENODEV;
+
+ err = cstate_probe((const struct cstate_model *) id->driver_data);
if (err)
return err;
- cstate_cpumask_init();
-
- cstate_pmus_register();
-
- return 0;
+ return cstate_init();
}
+module_init(cstate_pmu_init);
-device_initcall(cstate_pmu_init);
+static void __exit cstate_pmu_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&cstate_cpu_nb);
+ cstate_cleanup();
+ cpu_notifier_register_done();
+}
+module_exit(cstate_pmu_exit);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8584b90..7ce9f3f 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -645,6 +645,12 @@
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_glm_pebs_event_constraints[] = {
+ /* Allow all events as PEBS with no flags */
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 1ca5d1e..9e2b40c 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -14,7 +14,8 @@
LBR_FORMAT_EIP_FLAGS = 0x03,
LBR_FORMAT_EIP_FLAGS2 = 0x04,
LBR_FORMAT_INFO = 0x05,
- LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO,
+ LBR_FORMAT_TIME = 0x06,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
};
static enum {
@@ -464,6 +465,16 @@
abort = !!(info & LBR_INFO_ABORT);
cycles = (info & LBR_INFO_CYCLES);
}
+
+ if (lbr_format == LBR_FORMAT_TIME) {
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
+ skip = 1;
+ cycles = ((to >> 48) & LBR_INFO_CYCLES);
+
+ to = (u64)((((s64)to) << 16) >> 16);
+ }
+
if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
@@ -1049,6 +1060,24 @@
pr_cont("8-deep LBR, ");
}
+/* slm */
+void __init intel_pmu_lbr_init_slm(void)
+{
+ x86_pmu.lbr_nr = 8;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("8-deep LBR, ");
+}
+
/* Knights Landing */
void intel_pmu_lbr_init_knl(void)
{
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 7377814..04bb5fb 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -67,11 +67,13 @@
PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
+ PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
PT_CAP(mtc, 0, CR_EBX, BIT(3)),
PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
+ PT_CAP(num_address_ranges, 1, CR_EAX, 0x3),
PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
@@ -125,9 +127,46 @@
.attrs = pt_formats_attr,
};
+static ssize_t
+pt_timing_attr_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ switch (pmu_attr->id) {
+ case 0:
+ return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio);
+ case 1:
+ return sprintf(page, "%u:%u\n",
+ pt_pmu.tsc_art_num,
+ pt_pmu.tsc_art_den);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0,
+ pt_timing_attr_show);
+PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1,
+ pt_timing_attr_show);
+
+static struct attribute *pt_timing_attr[] = {
+ &timing_attr_max_nonturbo_ratio.attr.attr,
+ &timing_attr_tsc_art_ratio.attr.attr,
+ NULL,
+};
+
+static struct attribute_group pt_timing_group = {
+ .attrs = pt_timing_attr,
+};
+
static const struct attribute_group *pt_attr_groups[] = {
&pt_cap_group,
&pt_format_group,
+ &pt_timing_group,
NULL,
};
@@ -140,6 +179,23 @@
int ret;
long i;
+ rdmsrl(MSR_PLATFORM_INFO, reg);
+ pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8;
+
+ /*
+ * if available, read in TSC to core crystal clock ratio,
+ * otherwise, zero for numerator stands for "not enumerated"
+ * as per SDM
+ */
+ if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx);
+
+ pt_pmu.tsc_art_num = ebx;
+ pt_pmu.tsc_art_den = eax;
+ }
+
if (boot_cpu_has(X86_FEATURE_VMX)) {
/*
* Intel SDM, 36.5 "Tracing post-VMXON" says that
@@ -263,6 +319,75 @@
* These all are cpu affine and operate on a local PT
*/
+/* Address ranges and their corresponding msr configuration registers */
+static const struct pt_address_range {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned int reg_off;
+} pt_address_ranges[] = {
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR0_A,
+ .msr_b = MSR_IA32_RTIT_ADDR0_B,
+ .reg_off = RTIT_CTL_ADDR0_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR1_A,
+ .msr_b = MSR_IA32_RTIT_ADDR1_B,
+ .reg_off = RTIT_CTL_ADDR1_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR2_A,
+ .msr_b = MSR_IA32_RTIT_ADDR2_B,
+ .reg_off = RTIT_CTL_ADDR2_OFFSET,
+ },
+ {
+ .msr_a = MSR_IA32_RTIT_ADDR3_A,
+ .msr_b = MSR_IA32_RTIT_ADDR3_B,
+ .reg_off = RTIT_CTL_ADDR3_OFFSET,
+ }
+};
+
+static u64 pt_config_filters(struct perf_event *event)
+{
+ struct pt_filters *filters = event->hw.addr_filters;
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ unsigned int range = 0;
+ u64 rtit_ctl = 0;
+
+ if (!filters)
+ return 0;
+
+ perf_event_addr_filters_sync(event);
+
+ for (range = 0; range < filters->nr_filters; range++) {
+ struct pt_filter *filter = &filters->filter[range];
+
+ /*
+ * Note, if the range has zero start/end addresses due
+ * to its dynamic object not being loaded yet, we just
+ * go ahead and program zeroed range, which will simply
+ * produce no data. Note^2: if executable code at 0x0
+ * is a concern, we can set up an "invalid" configuration
+ * such as msr_b < msr_a.
+ */
+
+ /* avoid redundant msr writes */
+ if (pt->filters.filter[range].msr_a != filter->msr_a) {
+ wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a);
+ pt->filters.filter[range].msr_a = filter->msr_a;
+ }
+
+ if (pt->filters.filter[range].msr_b != filter->msr_b) {
+ wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b);
+ pt->filters.filter[range].msr_b = filter->msr_b;
+ }
+
+ rtit_ctl |= filter->config << pt_address_ranges[range].reg_off;
+ }
+
+ return rtit_ctl;
+}
+
static void pt_config(struct perf_event *event)
{
u64 reg;
@@ -272,7 +397,8 @@
wrmsrl(MSR_IA32_RTIT_STATUS, 0);
}
- reg = RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
+ reg = pt_config_filters(event);
+ reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN;
if (!event->attr.exclude_kernel)
reg |= RTIT_CTL_OS;
@@ -921,24 +1047,80 @@
kfree(buf);
}
-/**
- * pt_buffer_is_full() - check if the buffer is full
- * @buf: PT buffer.
- * @pt: Per-cpu pt handle.
- *
- * If the user hasn't read data from the output region that aux_head
- * points to, the buffer is considered full: the user needs to read at
- * least this region and update aux_tail to point past it.
- */
-static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt)
+static int pt_addr_filters_init(struct perf_event *event)
{
- if (buf->snapshot)
- return false;
+ struct pt_filters *filters;
+ int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
- if (local_read(&buf->data_size) >= pt->handle.size)
- return true;
+ if (!pt_cap_get(PT_CAP_num_address_ranges))
+ return 0;
- return false;
+ filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node);
+ if (!filters)
+ return -ENOMEM;
+
+ if (event->parent)
+ memcpy(filters, event->parent->hw.addr_filters,
+ sizeof(*filters));
+
+ event->hw.addr_filters = filters;
+
+ return 0;
+}
+
+static void pt_addr_filters_fini(struct perf_event *event)
+{
+ kfree(event->hw.addr_filters);
+ event->hw.addr_filters = NULL;
+}
+
+static int pt_event_addr_filters_validate(struct list_head *filters)
+{
+ struct perf_addr_filter *filter;
+ int range = 0;
+
+ list_for_each_entry(filter, filters, entry) {
+ /* PT doesn't support single address triggers */
+ if (!filter->range)
+ return -EOPNOTSUPP;
+
+ if (!filter->inode && !kernel_ip(filter->offset))
+ return -EINVAL;
+
+ if (++range > pt_cap_get(PT_CAP_num_address_ranges))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void pt_event_addr_filters_sync(struct perf_event *event)
+{
+ struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+ unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+ struct pt_filters *filters = event->hw.addr_filters;
+ struct perf_addr_filter *filter;
+ int range = 0;
+
+ if (!filters)
+ return;
+
+ list_for_each_entry(filter, &head->list, entry) {
+ if (filter->inode && !offs[range]) {
+ msr_a = msr_b = 0;
+ } else {
+ /* apply the offset */
+ msr_a = filter->offset + offs[range];
+ msr_b = filter->size + msr_a;
+ }
+
+ filters->filter[range].msr_a = msr_a;
+ filters->filter[range].msr_b = msr_b;
+ filters->filter[range].config = filter->filter ? 1 : 2;
+ range++;
+ }
+
+ filters->nr_filters = range;
}
/**
@@ -955,7 +1137,7 @@
* after PT has been disabled by pt_event_stop(). Make sure we don't
* do anything (particularly, re-enable) for this event here.
*/
- if (!ACCESS_ONCE(pt->handle_nmi))
+ if (!READ_ONCE(pt->handle_nmi))
return;
/*
@@ -1040,23 +1222,36 @@
static void pt_event_start(struct perf_event *event, int mode)
{
+ struct hw_perf_event *hwc = &event->hw;
struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ struct pt_buffer *buf;
if (READ_ONCE(pt->vmx_on))
return;
- if (!buf || pt_buffer_is_full(buf, pt)) {
- event->hw.state = PERF_HES_STOPPED;
- return;
+ buf = perf_aux_output_begin(&pt->handle, event);
+ if (!buf)
+ goto fail_stop;
+
+ pt_buffer_reset_offsets(buf, pt->handle.head);
+ if (!buf->snapshot) {
+ if (pt_buffer_reset_markers(buf, &pt->handle))
+ goto fail_end_stop;
}
- ACCESS_ONCE(pt->handle_nmi) = 1;
- event->hw.state = 0;
+ WRITE_ONCE(pt->handle_nmi, 1);
+ hwc->state = 0;
pt_config_buffer(buf->cur->table, buf->cur_idx,
buf->output_off);
pt_config(event);
+
+ return;
+
+fail_end_stop:
+ perf_aux_output_end(&pt->handle, 0, true);
+fail_stop:
+ hwc->state = PERF_HES_STOPPED;
}
static void pt_event_stop(struct perf_event *event, int mode)
@@ -1067,7 +1262,7 @@
* Protect against the PMI racing with disabling wrmsr,
* see comment in intel_pt_interrupt().
*/
- ACCESS_ONCE(pt->handle_nmi) = 0;
+ WRITE_ONCE(pt->handle_nmi, 0);
pt_config_stop(event);
@@ -1090,19 +1285,7 @@
pt_handle_status(pt);
pt_update_head(pt);
- }
-}
-static void pt_event_del(struct perf_event *event, int mode)
-{
- struct pt *pt = this_cpu_ptr(&pt_ctx);
- struct pt_buffer *buf;
-
- pt_event_stop(event, PERF_EF_UPDATE);
-
- buf = perf_get_aux(&pt->handle);
-
- if (buf) {
if (buf->snapshot)
pt->handle.head =
local_xchg(&buf->data_size,
@@ -1112,9 +1295,13 @@
}
}
+static void pt_event_del(struct perf_event *event, int mode)
+{
+ pt_event_stop(event, PERF_EF_UPDATE);
+}
+
static int pt_event_add(struct perf_event *event, int mode)
{
- struct pt_buffer *buf;
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct hw_perf_event *hwc = &event->hw;
int ret = -EBUSY;
@@ -1122,34 +1309,18 @@
if (pt->handle.event)
goto fail;
- buf = perf_aux_output_begin(&pt->handle, event);
- ret = -EINVAL;
- if (!buf)
- goto fail_stop;
-
- pt_buffer_reset_offsets(buf, pt->handle.head);
- if (!buf->snapshot) {
- ret = pt_buffer_reset_markers(buf, &pt->handle);
- if (ret)
- goto fail_end_stop;
- }
-
if (mode & PERF_EF_START) {
pt_event_start(event, 0);
- ret = -EBUSY;
+ ret = -EINVAL;
if (hwc->state == PERF_HES_STOPPED)
- goto fail_end_stop;
+ goto fail;
} else {
hwc->state = PERF_HES_STOPPED;
}
- return 0;
-
-fail_end_stop:
- perf_aux_output_end(&pt->handle, 0, true);
-fail_stop:
- hwc->state = PERF_HES_STOPPED;
+ ret = 0;
fail:
+
return ret;
}
@@ -1159,6 +1330,7 @@
static void pt_event_destroy(struct perf_event *event)
{
+ pt_addr_filters_fini(event);
x86_del_exclusive(x86_lbr_exclusive_pt);
}
@@ -1173,6 +1345,11 @@
if (x86_add_exclusive(x86_lbr_exclusive_pt))
return -EBUSY;
+ if (pt_addr_filters_init(event)) {
+ x86_del_exclusive(x86_lbr_exclusive_pt);
+ return -ENOMEM;
+ }
+
event->destroy = pt_event_destroy;
return 0;
@@ -1192,7 +1369,7 @@
BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
- if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_INTEL_PT))
+ if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
return -ENODEV;
get_online_cpus();
@@ -1226,16 +1403,21 @@
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
- pt_pmu.pmu.attr_groups = pt_attr_groups;
- pt_pmu.pmu.task_ctx_nr = perf_sw_context;
- pt_pmu.pmu.event_init = pt_event_init;
- pt_pmu.pmu.add = pt_event_add;
- pt_pmu.pmu.del = pt_event_del;
- pt_pmu.pmu.start = pt_event_start;
- pt_pmu.pmu.stop = pt_event_stop;
- pt_pmu.pmu.read = pt_event_read;
- pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
- pt_pmu.pmu.free_aux = pt_buffer_free_aux;
+ pt_pmu.pmu.attr_groups = pt_attr_groups;
+ pt_pmu.pmu.task_ctx_nr = perf_sw_context;
+ pt_pmu.pmu.event_init = pt_event_init;
+ pt_pmu.pmu.add = pt_event_add;
+ pt_pmu.pmu.del = pt_event_del;
+ pt_pmu.pmu.start = pt_event_start;
+ pt_pmu.pmu.stop = pt_event_stop;
+ pt_pmu.pmu.read = pt_event_read;
+ pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
+ pt_pmu.pmu.free_aux = pt_buffer_free_aux;
+ pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync;
+ pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate;
+ pt_pmu.pmu.nr_addr_filters =
+ pt_cap_get(PT_CAP_num_address_ranges);
+
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
return ret;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 3abb5f5..efffa4a 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -20,6 +20,40 @@
#define __INTEL_PT_H__
/*
+ * PT MSR bit definitions
+ */
+#define RTIT_CTL_TRACEEN BIT(0)
+#define RTIT_CTL_CYCLEACC BIT(1)
+#define RTIT_CTL_OS BIT(2)
+#define RTIT_CTL_USR BIT(3)
+#define RTIT_CTL_CR3EN BIT(7)
+#define RTIT_CTL_TOPA BIT(8)
+#define RTIT_CTL_MTC_EN BIT(9)
+#define RTIT_CTL_TSC_EN BIT(10)
+#define RTIT_CTL_DISRETC BIT(11)
+#define RTIT_CTL_BRANCH_EN BIT(13)
+#define RTIT_CTL_MTC_RANGE_OFFSET 14
+#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
+#define RTIT_CTL_CYC_THRESH_OFFSET 19
+#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
+#define RTIT_CTL_PSB_FREQ_OFFSET 24
+#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
+#define RTIT_CTL_ADDR0_OFFSET 32
+#define RTIT_CTL_ADDR0 (0x0full << RTIT_CTL_ADDR0_OFFSET)
+#define RTIT_CTL_ADDR1_OFFSET 36
+#define RTIT_CTL_ADDR1 (0x0full << RTIT_CTL_ADDR1_OFFSET)
+#define RTIT_CTL_ADDR2_OFFSET 40
+#define RTIT_CTL_ADDR2 (0x0full << RTIT_CTL_ADDR2_OFFSET)
+#define RTIT_CTL_ADDR3_OFFSET 44
+#define RTIT_CTL_ADDR3 (0x0full << RTIT_CTL_ADDR3_OFFSET)
+#define RTIT_STATUS_FILTEREN BIT(0)
+#define RTIT_STATUS_CONTEXTEN BIT(1)
+#define RTIT_STATUS_TRIGGEREN BIT(2)
+#define RTIT_STATUS_BUFFOVF BIT(3)
+#define RTIT_STATUS_ERROR BIT(4)
+#define RTIT_STATUS_STOPPED BIT(5)
+
+/*
* Single-entry ToPA: when this close to region boundary, switch
* buffers to avoid losing data.
*/
@@ -48,15 +82,20 @@
#define PT_CPUID_LEAVES 2
#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */
+/* TSC to Core Crystal Clock Ratio */
+#define CPUID_TSC_LEAF 0x15
+
enum pt_capabilities {
PT_CAP_max_subleaf = 0,
PT_CAP_cr3_filtering,
PT_CAP_psb_cyc,
+ PT_CAP_ip_filtering,
PT_CAP_mtc,
PT_CAP_topa_output,
PT_CAP_topa_multiple_entries,
PT_CAP_single_range_output,
PT_CAP_payloads_lip,
+ PT_CAP_num_address_ranges,
PT_CAP_mtc_periods,
PT_CAP_cycle_thresholds,
PT_CAP_psb_periods,
@@ -66,6 +105,9 @@
struct pmu pmu;
u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
bool vmx;
+ unsigned long max_nonturbo_ratio;
+ unsigned int tsc_art_num;
+ unsigned int tsc_art_den;
};
/**
@@ -104,14 +146,40 @@
struct topa_entry *topa_index[0];
};
+#define PT_FILTERS_NUM 4
+
+/**
+ * struct pt_filter - IP range filter configuration
+ * @msr_a: range start, goes to RTIT_ADDRn_A
+ * @msr_b: range end, goes to RTIT_ADDRn_B
+ * @config: 4-bit field in RTIT_CTL
+ */
+struct pt_filter {
+ unsigned long msr_a;
+ unsigned long msr_b;
+ unsigned long config;
+};
+
+/**
+ * struct pt_filters - IP range filtering context
+ * @filter: filters defined for this context
+ * @nr_filters: number of defined filters in the @filter array
+ */
+struct pt_filters {
+ struct pt_filter filter[PT_FILTERS_NUM];
+ unsigned int nr_filters;
+};
+
/**
* struct pt - per-cpu pt context
* @handle: perf output handle
+ * @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
*/
struct pt {
struct perf_output_handle handle;
+ struct pt_filters filters;
int handle_nmi;
int vmx_on;
};
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 1705c9d..99c4bab 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -27,10 +27,14 @@
* event: rapl_energy_dram
* perf code: 0x3
*
- * dram counter: consumption of the builtin-gpu domain (client only)
+ * gpu counter: consumption of the builtin-gpu domain (client only)
* event: rapl_energy_gpu
* perf code: 0x4
*
+ * psys counter: consumption of the builtin-psys domain (client only)
+ * event: rapl_energy_psys
+ * perf code: 0x5
+ *
* We manage those counters as free running (read-only). They may be
* use simultaneously by other tools, such as turbostat.
*
@@ -53,6 +57,8 @@
#include <asm/cpu_device_id.h>
#include "../perf_event.h"
+MODULE_LICENSE("GPL");
+
/*
* RAPL energy status counters
*/
@@ -64,13 +70,16 @@
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
+#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */
+#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */
-#define NR_RAPL_DOMAINS 0x4
+#define NR_RAPL_DOMAINS 0x5
static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
"pp0-core",
"package",
"dram",
"pp1-gpu",
+ "psys",
};
/* Clients have PP0, PKG */
@@ -89,6 +98,13 @@
1<<RAPL_IDX_RAM_NRG_STAT|\
1<<RAPL_IDX_PP1_NRG_STAT)
+/* SKL clients have PP0, PKG, RAM, PP1, PSYS */
+#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT|\
+ 1<<RAPL_IDX_PP1_NRG_STAT|\
+ 1<<RAPL_IDX_PSYS_NRG_STAT)
+
/* Knights Landing has PKG, RAM */
#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
@@ -360,6 +376,10 @@
bit = RAPL_IDX_PP1_NRG_STAT;
msr = MSR_PP1_ENERGY_STATUS;
break;
+ case INTEL_RAPL_PSYS:
+ bit = RAPL_IDX_PSYS_NRG_STAT;
+ msr = MSR_PLATFORM_ENERGY_STATUS;
+ break;
default:
return -EINVAL;
}
@@ -414,11 +434,13 @@
RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02");
RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03");
RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04");
+RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules");
RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules");
+RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules");
/*
* we compute in 0.23 nJ increments regardless of MSR
@@ -427,6 +449,7 @@
RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10");
RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10");
+RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10");
static struct attribute *rapl_events_srv_attr[] = {
EVENT_PTR(rapl_cores),
@@ -476,6 +499,27 @@
NULL,
};
+static struct attribute *rapl_events_skl_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_ram),
+ EVENT_PTR(rapl_psys),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_ram_unit),
+ EVENT_PTR(rapl_psys_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_gpu_scale),
+ EVENT_PTR(rapl_ram_scale),
+ EVENT_PTR(rapl_psys_scale),
+ NULL,
+};
+
static struct attribute *rapl_events_knl_attr[] = {
EVENT_PTR(rapl_pkg),
EVENT_PTR(rapl_ram),
@@ -592,6 +636,11 @@
return NOTIFY_OK;
}
+static struct notifier_block rapl_cpu_nb = {
+ .notifier_call = rapl_cpu_notifier,
+ .priority = CPU_PRI_PERF + 1,
+};
+
static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
@@ -660,7 +709,7 @@
return 0;
}
-static void __init cleanup_rapl_pmus(void)
+static void cleanup_rapl_pmus(void)
{
int i;
@@ -691,52 +740,92 @@
return 0;
}
-static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
- [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 },
- [1] = {},
+#define X86_RAPL_MODEL_MATCH(model, init) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_rapl_init_fun {
+ bool apply_quirk;
+ int cntr_mask;
+ struct attribute **attrs;
};
+static const struct intel_rapl_init_fun snb_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_CLN,
+ .attrs = rapl_events_cln_attr,
+};
+
+static const struct intel_rapl_init_fun hsx_rapl_init __initconst = {
+ .apply_quirk = true,
+ .cntr_mask = RAPL_IDX_SRV,
+ .attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun hsw_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_HSW,
+ .attrs = rapl_events_hsw_attr,
+};
+
+static const struct intel_rapl_init_fun snbep_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_SRV,
+ .attrs = rapl_events_srv_attr,
+};
+
+static const struct intel_rapl_init_fun knl_rapl_init __initconst = {
+ .apply_quirk = true,
+ .cntr_mask = RAPL_IDX_KNL,
+ .attrs = rapl_events_knl_attr,
+};
+
+static const struct intel_rapl_init_fun skl_rapl_init __initconst = {
+ .apply_quirk = false,
+ .cntr_mask = RAPL_IDX_SKL_CLN,
+ .attrs = rapl_events_skl_attr,
+};
+
+static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
+ X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */
+ X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */
+
+ X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */
+ X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */
+
+ X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */
+ X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */
+ X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */
+ X86_RAPL_MODEL_MATCH(70, hsw_rapl_init), /* Haswell GT3e */
+
+ X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */
+ X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */
+ X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */
+ X86_RAPL_MODEL_MATCH(86, hsx_rapl_init), /* Broadwell Xeon D */
+
+ X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */
+
+ X86_RAPL_MODEL_MATCH(78, skl_rapl_init), /* Skylake */
+ X86_RAPL_MODEL_MATCH(94, skl_rapl_init), /* Skylake H/S */
+ {},
+};
+
+MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match);
+
static int __init rapl_pmu_init(void)
{
- bool apply_quirk = false;
+ const struct x86_cpu_id *id;
+ struct intel_rapl_init_fun *rapl_init;
+ bool apply_quirk;
int ret;
- if (!x86_match_cpu(rapl_cpu_match))
+ id = x86_match_cpu(rapl_cpu_match);
+ if (!id)
return -ENODEV;
- switch (boot_cpu_data.x86_model) {
- case 42: /* Sandy Bridge */
- case 58: /* Ivy Bridge */
- rapl_cntr_mask = RAPL_IDX_CLN;
- rapl_pmu_events_group.attrs = rapl_events_cln_attr;
- break;
- case 63: /* Haswell-Server */
- case 79: /* Broadwell-Server */
- apply_quirk = true;
- rapl_cntr_mask = RAPL_IDX_SRV;
- rapl_pmu_events_group.attrs = rapl_events_srv_attr;
- break;
- case 60: /* Haswell */
- case 69: /* Haswell-Celeron */
- case 70: /* Haswell GT3e */
- case 61: /* Broadwell */
- case 71: /* Broadwell-H */
- rapl_cntr_mask = RAPL_IDX_HSW;
- rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
- break;
- case 45: /* Sandy Bridge-EP */
- case 62: /* IvyTown */
- rapl_cntr_mask = RAPL_IDX_SRV;
- rapl_pmu_events_group.attrs = rapl_events_srv_attr;
- break;
- case 87: /* Knights Landing */
- apply_quirk = true;
- rapl_cntr_mask = RAPL_IDX_KNL;
- rapl_pmu_events_group.attrs = rapl_events_knl_attr;
- break;
- default:
- return -ENODEV;
- }
+ rapl_init = (struct intel_rapl_init_fun *)id->driver_data;
+ apply_quirk = rapl_init->apply_quirk;
+ rapl_cntr_mask = rapl_init->cntr_mask;
+ rapl_pmu_events_group.attrs = rapl_init->attrs;
ret = rapl_check_hw_unit(apply_quirk);
if (ret)
@@ -756,7 +845,7 @@
if (ret)
goto out;
- __perf_cpu_notifier(rapl_cpu_notifier);
+ __register_cpu_notifier(&rapl_cpu_nb);
cpu_notifier_register_done();
rapl_advertise();
return 0;
@@ -767,4 +856,14 @@
cpu_notifier_register_done();
return ret;
}
-device_initcall(rapl_pmu_init);
+module_init(rapl_pmu_init);
+
+static void __exit intel_rapl_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&rapl_cpu_nb);
+ perf_pmu_unregister(&rapl_pmus->pmu);
+ cleanup_rapl_pmus();
+ cpu_notifier_register_done();
+}
+module_exit(intel_rapl_exit);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7012d18..16c1789 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1,3 +1,4 @@
+#include <asm/cpu_device_id.h>
#include "uncore.h"
static struct intel_uncore_type *empty_uncore[] = { NULL, };
@@ -21,6 +22,8 @@
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
+MODULE_LICENSE("GPL");
+
static int uncore_pcibus_to_physid(struct pci_bus *bus)
{
struct pci2phy_map *map;
@@ -754,7 +757,7 @@
pmu->registered = false;
}
-static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
+static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu)
{
struct intel_uncore_pmu *pmu = type->pmus;
struct intel_uncore_box *box;
@@ -770,7 +773,7 @@
}
}
-static void __init uncore_exit_boxes(void *dummy)
+static void uncore_exit_boxes(void *dummy)
{
struct intel_uncore_type **types;
@@ -787,7 +790,7 @@
kfree(pmu->boxes);
}
-static void __init uncore_type_exit(struct intel_uncore_type *type)
+static void uncore_type_exit(struct intel_uncore_type *type)
{
struct intel_uncore_pmu *pmu = type->pmus;
int i;
@@ -804,7 +807,7 @@
type->events_group = NULL;
}
-static void __init uncore_types_exit(struct intel_uncore_type **types)
+static void uncore_types_exit(struct intel_uncore_type **types)
{
for (; *types; types++)
uncore_type_exit(*types);
@@ -989,46 +992,6 @@
size_t size;
int ret;
- switch (boot_cpu_data.x86_model) {
- case 45: /* Sandy Bridge-EP */
- ret = snbep_uncore_pci_init();
- break;
- case 62: /* Ivy Bridge-EP */
- ret = ivbep_uncore_pci_init();
- break;
- case 63: /* Haswell-EP */
- ret = hswep_uncore_pci_init();
- break;
- case 79: /* BDX-EP */
- case 86: /* BDX-DE */
- ret = bdx_uncore_pci_init();
- break;
- case 42: /* Sandy Bridge */
- ret = snb_uncore_pci_init();
- break;
- case 58: /* Ivy Bridge */
- ret = ivb_uncore_pci_init();
- break;
- case 60: /* Haswell */
- case 69: /* Haswell Celeron */
- ret = hsw_uncore_pci_init();
- break;
- case 61: /* Broadwell */
- ret = bdw_uncore_pci_init();
- break;
- case 87: /* Knights Landing */
- ret = knl_uncore_pci_init();
- break;
- case 94: /* SkyLake */
- ret = skl_uncore_pci_init();
- break;
- default:
- return -ENODEV;
- }
-
- if (ret)
- return ret;
-
size = max_packages * sizeof(struct pci_extra_dev);
uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
if (!uncore_extra_pci_dev) {
@@ -1060,7 +1023,7 @@
return ret;
}
-static void __init uncore_pci_exit(void)
+static void uncore_pci_exit(void)
{
if (pcidrv_registered) {
pcidrv_registered = false;
@@ -1287,46 +1250,6 @@
{
int ret;
- switch (boot_cpu_data.x86_model) {
- case 26: /* Nehalem */
- case 30:
- case 37: /* Westmere */
- case 44:
- nhm_uncore_cpu_init();
- break;
- case 42: /* Sandy Bridge */
- case 58: /* Ivy Bridge */
- case 60: /* Haswell */
- case 69: /* Haswell */
- case 70: /* Haswell */
- case 61: /* Broadwell */
- case 71: /* Broadwell */
- snb_uncore_cpu_init();
- break;
- case 45: /* Sandy Bridge-EP */
- snbep_uncore_cpu_init();
- break;
- case 46: /* Nehalem-EX */
- case 47: /* Westmere-EX aka. Xeon E7 */
- nhmex_uncore_cpu_init();
- break;
- case 62: /* Ivy Bridge-EP */
- ivbep_uncore_cpu_init();
- break;
- case 63: /* Haswell-EP */
- hswep_uncore_cpu_init();
- break;
- case 79: /* BDX-EP */
- case 86: /* BDX-DE */
- bdx_uncore_cpu_init();
- break;
- case 87: /* Knights Landing */
- knl_uncore_cpu_init();
- break;
- default:
- return -ENODEV;
- }
-
ret = uncore_types_init(uncore_msr_uncores, true);
if (ret)
goto err;
@@ -1376,20 +1299,123 @@
return 0;
}
+#define X86_UNCORE_MODEL_MATCH(model, init) \
+ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init }
+
+struct intel_uncore_init_fun {
+ void (*cpu_init)(void);
+ int (*pci_init)(void);
+};
+
+static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
+ .cpu_init = nhm_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = snb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = ivb_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = hsw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
+ .cpu_init = snb_uncore_cpu_init,
+ .pci_init = bdw_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
+ .cpu_init = snbep_uncore_cpu_init,
+ .pci_init = snbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
+ .cpu_init = nhmex_uncore_cpu_init,
+};
+
+static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
+ .cpu_init = ivbep_uncore_cpu_init,
+ .pci_init = ivbep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
+ .cpu_init = hswep_uncore_cpu_init,
+ .pci_init = hswep_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
+ .cpu_init = bdx_uncore_cpu_init,
+ .pci_init = bdx_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
+ .cpu_init = knl_uncore_cpu_init,
+ .pci_init = knl_uncore_pci_init,
+};
+
+static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .pci_init = skl_uncore_pci_init,
+};
+
+static const struct x86_cpu_id intel_uncore_match[] __initconst = {
+ X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */
+ X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */
+ X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init),
+ X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */
+ X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */
+ X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */
+ X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */
+ X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */
+ X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */
+ X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */
+ X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */
+ X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */
+ X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */
+ X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */
+ X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */
+ X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
+ X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
+ X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
+ X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ {},
+};
+
+MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
+
static int __init intel_uncore_init(void)
{
- int pret, cret, ret;
+ const struct x86_cpu_id *id;
+ struct intel_uncore_init_fun *uncore_init;
+ int pret = 0, cret = 0, ret;
- if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ id = x86_match_cpu(intel_uncore_match);
+ if (!id)
return -ENODEV;
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return -ENODEV;
max_packages = topology_max_packages();
- pret = uncore_pci_init();
- cret = uncore_cpu_init();
+ uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
+ if (uncore_init->pci_init) {
+ pret = uncore_init->pci_init();
+ if (!pret)
+ pret = uncore_pci_init();
+ }
+
+ if (uncore_init->cpu_init) {
+ uncore_init->cpu_init();
+ cret = uncore_cpu_init();
+ }
if (cret && pret)
return -ENODEV;
@@ -1409,4 +1435,14 @@
cpu_notifier_register_done();
return ret;
}
-device_initcall(intel_uncore_init);
+module_init(intel_uncore_init);
+
+static void __exit intel_uncore_exit(void)
+{
+ cpu_notifier_register_begin();
+ __unregister_cpu_notifier(&uncore_cpu_nb);
+ uncore_types_exit(uncore_msr_uncores);
+ uncore_pci_exit();
+ cpu_notifier_register_done();
+}
+module_exit(intel_uncore_exit);
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 8bef19f..85ef3c2 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -6,6 +6,8 @@
PERF_MSR_MPERF = 2,
PERF_MSR_PPERF = 3,
PERF_MSR_SMI = 4,
+ PERF_MSR_PTSC = 5,
+ PERF_MSR_IRPERF = 6,
PERF_MSR_EVENT_MAX,
};
@@ -15,6 +17,16 @@
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
+static bool test_ptsc(int idx)
+{
+ return boot_cpu_has(X86_FEATURE_PTSC);
+}
+
+static bool test_irperf(int idx)
+{
+ return boot_cpu_has(X86_FEATURE_IRPERF);
+}
+
static bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
@@ -69,18 +81,22 @@
bool (*test)(int idx);
};
-PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
-PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
-PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
-PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
-PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
+PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00");
+PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01");
+PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
+PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
+PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04");
+PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05");
+PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06");
static struct perf_msr msr[] = {
- [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
- [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
- [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
- [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
- [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
+ [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, },
+ [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, },
+ [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, },
+ [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, },
+ [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, },
+ [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, },
+ [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, },
};
static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ad4dc7f..8bd764d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -601,6 +601,7 @@
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
bool lbr_double_abort; /* duplicated lbr aborts */
+ bool lbr_pt_coexist; /* LBR may coexist with PT */
/*
* Intel PT/LBR/BTS are exclusive
@@ -859,6 +860,8 @@
extern struct event_constraint intel_slm_pebs_event_constraints[];
+extern struct event_constraint intel_glm_pebs_event_constraints[];
+
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
extern struct event_constraint intel_westmere_pebs_event_constraints[];
@@ -907,6 +910,8 @@
void intel_pmu_lbr_init_atom(void);
+void intel_pmu_lbr_init_slm(void);
+
void intel_pmu_lbr_init_snb(void);
void intel_pmu_lbr_init_hsw(void);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 0552884..2f29f4e 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -357,7 +357,7 @@
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
/* Create the ucontext. */
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 99afb66..e77a644 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -1,11 +1,12 @@
#ifndef _ASM_X86_ALTERNATIVE_H
#define _ASM_X86_ALTERNATIVE_H
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
#include <asm/asm.h>
-#include <asm/ptrace.h>
/*
* Alternative inline assembly for SMP.
@@ -233,36 +234,6 @@
*/
#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
-struct paravirt_patch_site;
-#ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end);
-#else
-static inline void apply_paravirt(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end)
-{}
-#define __parainstructions NULL
-#define __parainstructions_end NULL
-#endif
-
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
-
-/*
- * Clear and restore the kernel write-protection flag on the local CPU.
- * Allows the kernel to edit read-only pages.
- * Side-effect: any interrupt handler running between save and restore will have
- * the ability to write to read-only pages.
- *
- * Warning:
- * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
- * no thread can be preempted in the instructions being modified (no iret to an
- * invalid instruction possible) or if the instructions are changed from a
- * consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
- */
-extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 98f25bb..bc27611 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -239,10 +239,10 @@
extern void x2apic_setup(void);
static inline int x2apic_enabled(void)
{
- return cpu_has_x2apic && apic_is_x2apic_enabled();
+ return boot_cpu_has(X86_FEATURE_X2APIC) && apic_is_x2apic_enabled();
}
-#define x2apic_supported() (cpu_has_x2apic)
+#define x2apic_supported() (boot_cpu_has(X86_FEATURE_X2APIC))
#else /* !CONFIG_X86_X2APIC */
static inline void check_x2apic(void) { }
static inline void x2apic_setup(void) { }
diff --git a/arch/x86/include/asm/bios_ebda.h b/arch/x86/include/asm/bios_ebda.h
index aa6a317..2b00c77 100644
--- a/arch/x86/include/asm/bios_ebda.h
+++ b/arch/x86/include/asm/bios_ebda.h
@@ -17,27 +17,6 @@
return address; /* 0 means none */
}
-/*
- * Return the sanitized length of the EBDA in bytes, if it exists.
- */
-static inline unsigned int get_bios_ebda_length(void)
-{
- unsigned int address;
- unsigned int length;
-
- address = get_bios_ebda();
- if (!address)
- return 0;
-
- /* EBDA length is byte 0 of the EBDA (stored in KiB) */
- length = *(unsigned char *)phys_to_virt(address);
- length <<= 10;
-
- /* Trim the length if it extends beyond 640KiB */
- length = min_t(unsigned int, (640 * 1024) - address, length);
- return length;
-}
-
void reserve_ebda_region(void);
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6b8d6e8..abd06b1 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -12,29 +12,46 @@
/* Minimum kernel alignment, as a power of two */
#ifdef CONFIG_X86_64
-#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
+# define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
#else
-#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER)
+# define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_SIZE_ORDER)
#endif
#define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
(CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN)
-#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
+# error "Invalid value for CONFIG_PHYSICAL_ALIGN"
#endif
#ifdef CONFIG_KERNEL_BZIP2
-#define BOOT_HEAP_SIZE 0x400000
+# define BOOT_HEAP_SIZE 0x400000
#else /* !CONFIG_KERNEL_BZIP2 */
-
-#define BOOT_HEAP_SIZE 0x10000
-
-#endif /* !CONFIG_KERNEL_BZIP2 */
+# define BOOT_HEAP_SIZE 0x10000
+#endif
#ifdef CONFIG_X86_64
-#define BOOT_STACK_SIZE 0x4000
-#else
-#define BOOT_STACK_SIZE 0x1000
+# define BOOT_STACK_SIZE 0x4000
+
+# define BOOT_INIT_PGT_SIZE (6*4096)
+# ifdef CONFIG_RANDOMIZE_BASE
+/*
+ * Assuming all cross the 512GB boundary:
+ * 1 page for level4
+ * (2+2)*4 pages for kernel, param, cmd_line, and randomized kernel
+ * 2 pages for first 2M (video RAM: CONFIG_X86_VERBOSE_BOOTUP).
+ * Total is 19 pages.
+ */
+# ifdef CONFIG_X86_VERBOSE_BOOTUP
+# define BOOT_PGT_SIZE (19*4096)
+# else /* !CONFIG_X86_VERBOSE_BOOTUP */
+# define BOOT_PGT_SIZE (17*4096)
+# endif
+# else /* !CONFIG_RANDOMIZE_BASE */
+# define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE
+# endif
+
+#else /* !CONFIG_X86_64 */
+# define BOOT_STACK_SIZE 0x1000
#endif
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index d194266..eae33c7 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,11 +3,10 @@
#ifndef _ASM_X86_CLOCKSOURCE_H
#define _ASM_X86_CLOCKSOURCE_H
-#define VCLOCK_NONE 0 /* No vDSO clock available. */
-#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
-#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
-#define VCLOCK_MAX 3
+#define VCLOCK_NONE 0 /* No vDSO clock available. */
+#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
+#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX 2
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index ebb102e..5a3b2c1 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -307,7 +307,7 @@
return (void __user *)round_down(sp - len, 16);
}
-static inline bool is_x32_task(void)
+static inline bool in_x32_syscall(void)
{
#ifdef CONFIG_X86_X32_ABI
if (task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)
@@ -318,7 +318,7 @@
static inline bool in_compat_syscall(void)
{
- return is_ia32_task() || is_x32_task();
+ return in_ia32_syscall() || in_x32_syscall();
}
#define in_compat_syscall in_compat_syscall /* override the generic impl */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3636ec0..25ebb54 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -27,6 +27,7 @@
CPUID_6_EAX,
CPUID_8000_000A_EDX,
CPUID_7_ECX,
+ CPUID_8000_0007_EBX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@@ -118,31 +119,6 @@
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
-#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
-#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
-#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
-#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
-#define cpu_has_fxsr boot_cpu_has(X86_FEATURE_FXSR)
-#define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM)
-#define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2)
-#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES)
-#define cpu_has_avx boot_cpu_has(X86_FEATURE_AVX)
-#define cpu_has_avx2 boot_cpu_has(X86_FEATURE_AVX2)
-#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH)
-#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
-#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
-#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
-#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
-#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
-#define cpu_has_xsaves boot_cpu_has(X86_FEATURE_XSAVES)
-#define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE)
-#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
-/*
- * Do not add any more of those clumsy macros - use static_cpu_has() for
- * fast paths and boot_cpu_has() otherwise!
- */
-
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8f9afef..4a41348 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 17 /* N 32-bit words worth of info */
+#define NCAPINTS 18 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -177,6 +177,7 @@
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
#define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
@@ -250,6 +251,7 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -280,6 +282,11 @@
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+
/*
* BUG word(s)
*/
@@ -294,6 +301,9 @@
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */
+
#ifdef CONFIG_X86_32
/*
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 53748c4..78d1e74 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -3,6 +3,7 @@
#include <asm/fpu/api.h>
#include <asm/pgtable.h>
+#include <asm/processor-flags.h>
#include <asm/tlb.h>
/*
@@ -28,33 +29,22 @@
#define MAX_CMDLINE_ADDRESS UINT_MAX
+#define ARCH_EFI_IRQ_FLAGS_MASK X86_EFLAGS_IF
+
#ifdef CONFIG_X86_32
-
extern unsigned long asmlinkage efi_call_phys(void *, ...);
+#define arch_efi_call_virt_setup() kernel_fpu_begin()
+#define arch_efi_call_virt_teardown() kernel_fpu_end()
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
*/
-
-/* Use this macro if your virtual returns a non-void value */
-#define efi_call_virt(f, args...) \
+#define arch_efi_call_virt(f, args...) \
({ \
- efi_status_t __s; \
- kernel_fpu_begin(); \
- __s = ((efi_##f##_t __attribute__((regparm(0)))*) \
- efi.systab->runtime->f)(args); \
- kernel_fpu_end(); \
- __s; \
-})
-
-/* Use this macro if your virtual call does not return any value */
-#define __efi_call_virt(f, args...) \
-({ \
- kernel_fpu_begin(); \
((efi_##f##_t __attribute__((regparm(0)))*) \
efi.systab->runtime->f)(args); \
- kernel_fpu_end(); \
})
#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
@@ -78,10 +68,8 @@
u64 phys_stack;
} __packed;
-#define efi_call_virt(f, ...) \
+#define arch_efi_call_virt_setup() \
({ \
- efi_status_t __s; \
- \
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
@@ -91,9 +79,13 @@
write_cr3((unsigned long)efi_scratch.efi_pgt); \
__flush_tlb_all(); \
} \
- \
- __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \
- \
+})
+
+#define arch_efi_call_virt(f, args...) \
+ efi_call((void *)efi.systab->runtime->f, args) \
+
+#define arch_efi_call_virt_teardown() \
+({ \
if (efi_scratch.use_pgd) { \
write_cr3(efi_scratch.prev_cr3); \
__flush_tlb_all(); \
@@ -101,15 +93,8 @@
\
__kernel_fpu_end(); \
preempt_enable(); \
- __s; \
})
-/*
- * All X86_64 virt calls return non-void values. Thus, use non-void call for
- * virt calls that would be void on X86_32.
- */
-#define __efi_call_virt(f, args...) efi_call_virt(f, args)
-
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
@@ -180,6 +165,8 @@
extern struct console early_efi_console;
extern void parse_efi_setup(u64 phys_addr, u32 data_len);
+extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
+
#ifdef CONFIG_EFI_MIXED
extern void efi_thunk_runtime_setup(void);
extern efi_status_t efi_thunk_set_virtual_address_map(
@@ -225,6 +212,11 @@
#define efi_call_early(f, ...) \
__efi_early()->call(__efi_early()->f, __VA_ARGS__);
+#define __efi_call_early(f, ...) \
+ __efi_early()->call((unsigned long)f, __VA_ARGS__);
+
+#define efi_is_64bit() __efi_early()->is64
+
extern bool efi_reboot_required(void);
#else
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 15340e3..fea7724 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -176,7 +176,7 @@
regs->si = regs->di = regs->bp = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
- t->fs = t->gs = 0;
+ t->fsbase = t->gsbase = 0;
t->fsindex = t->gsindex = 0;
t->ds = t->es = ds;
}
@@ -226,8 +226,8 @@
(pr_reg)[18] = (regs)->flags; \
(pr_reg)[19] = (regs)->sp; \
(pr_reg)[20] = (regs)->ss; \
- (pr_reg)[21] = current->thread.fs; \
- (pr_reg)[22] = current->thread.gs; \
+ (pr_reg)[21] = current->thread.fsbase; \
+ (pr_reg)[22] = current->thread.gsbase; \
asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index e6a8613..3a10616 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -4,7 +4,7 @@
#include <asm/page.h>
#include <asm-generic/hugetlb.h>
-#define hugepages_supported() cpu_has_pse
+#define hugepages_supported() boot_cpu_has(X86_FEATURE_PSE)
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
diff --git a/arch/x86/include/asm/irq_work.h b/arch/x86/include/asm/irq_work.h
index d0afb05..f706041 100644
--- a/arch/x86/include/asm/irq_work.h
+++ b/arch/x86/include/asm/irq_work.h
@@ -5,7 +5,7 @@
static inline bool arch_irq_work_has_interrupt(void)
{
- return cpu_has_apic;
+ return boot_cpu_has(X86_FEATURE_APIC);
}
#endif /* _ASM_IRQ_WORK_H */
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 332f98c..22a8537 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -6,6 +6,8 @@
* Copyright (C) 2008 Wind River Systems, Inc.
*/
+#include <asm/ptrace.h>
+
/*
* BUFMAX defines the maximum number of characters in inbound/outbound
* buffers at least NUMREGBYTES*2 are needed for register packets
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 79327e9..0ccb26d 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -8,40 +8,6 @@
#ifdef CONFIG_X86_32
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
-
-/*
- * Make sure the compiler doesn't do anything stupid with the
- * arguments on the stack - they are owned by the *caller*, not
- * the callee. This just fools gcc into not spilling into them,
- * and keeps it from doing tailcall recursion and/or using the
- * stack slots for temporaries, since they are live and "used"
- * all the way to the end of the function.
- *
- * NOTE! On x86-64, all the arguments are in registers, so this
- * only matters on a 32-bit kernel.
- */
-#define asmlinkage_protect(n, ret, args...) \
- __asmlinkage_protect##n(ret, ##args)
-#define __asmlinkage_protect_n(ret, args...) \
- __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args)
-#define __asmlinkage_protect0(ret) \
- __asmlinkage_protect_n(ret)
-#define __asmlinkage_protect1(ret, arg1) \
- __asmlinkage_protect_n(ret, "m" (arg1))
-#define __asmlinkage_protect2(ret, arg1, arg2) \
- __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
-#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
- __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
-#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
- __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
- "m" (arg4))
-#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
- __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
- "m" (arg4), "m" (arg5))
-#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
- __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
- "m" (arg4), "m" (arg5), "m" (arg6))
-
#endif /* CONFIG_X86_32 */
#ifdef __ASSEMBLY__
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 92b6f65..8bf766e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -104,13 +104,23 @@
#define MCE_LOG_SIGNATURE "MACHINECHECK"
/* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_CTL 0xc0002000
+#define MSR_AMD64_SMCA_MC0_STATUS 0xc0002001
+#define MSR_AMD64_SMCA_MC0_ADDR 0xc0002002
#define MSR_AMD64_SMCA_MC0_MISC0 0xc0002003
#define MSR_AMD64_SMCA_MC0_CONFIG 0xc0002004
#define MSR_AMD64_SMCA_MC0_IPID 0xc0002005
+#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
+#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISC(x) (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_CONFIG(x) (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_IPID(x) (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
/*
@@ -168,9 +178,18 @@
__reserved_0 : 61;
};
+
+struct mca_msr_regs {
+ u32 (*ctl) (int bank);
+ u32 (*status) (int bank);
+ u32 (*addr) (int bank);
+ u32 (*misc) (int bank);
+};
+
extern struct mce_vendor_flags mce_flags;
extern struct mca_config mca_cfg;
+extern struct mca_msr_regs msr_ops;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8428002..39634819 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -115,103 +115,12 @@
destroy_context_ldt(mm);
}
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
-{
- unsigned cpu = smp_processor_id();
+extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
- if (likely(prev != next)) {
-#ifdef CONFIG_SMP
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- this_cpu_write(cpu_tlbstate.active_mm, next);
-#endif
- cpumask_set_cpu(cpu, mm_cpumask(next));
-
- /*
- * Re-load page tables.
- *
- * This logic has an ordering constraint:
- *
- * CPU 0: Write to a PTE for 'next'
- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
- * CPU 1: set bit 1 in next's mm_cpumask
- * CPU 1: load from the PTE that CPU 0 writes (implicit)
- *
- * We need to prevent an outcome in which CPU 1 observes
- * the new PTE value and CPU 0 observes bit 1 clear in
- * mm_cpumask. (If that occurs, then the IPI will never
- * be sent, and CPU 0's TLB will contain a stale entry.)
- *
- * The bad outcome can occur if either CPU's load is
- * reordered before that CPU's store, so both CPUs must
- * execute full barriers to prevent this from happening.
- *
- * Thus, switch_mm needs a full barrier between the
- * store to mm_cpumask and any operation that could load
- * from next->pgd. TLB fills are special and can happen
- * due to instruction fetches or for no reason at all,
- * and neither LOCK nor MFENCE orders them.
- * Fortunately, load_cr3() is serializing and gives the
- * ordering guarantee we need.
- *
- */
- load_cr3(next->pgd);
-
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-
- /* Stop flush ipis for the previous mm */
- cpumask_clear_cpu(cpu, mm_cpumask(prev));
-
- /* Load per-mm CR4 state */
- load_mm_cr4(next);
-
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- /*
- * Load the LDT, if the LDT is different.
- *
- * It's possible that prev->context.ldt doesn't match
- * the LDT register. This can happen if leave_mm(prev)
- * was called and then modify_ldt changed
- * prev->context.ldt but suppressed an IPI to this CPU.
- * In this case, prev->context.ldt != NULL, because we
- * never set context.ldt to NULL while the mm still
- * exists. That means that next->context.ldt !=
- * prev->context.ldt, because mms never share an LDT.
- */
- if (unlikely(prev->context.ldt != next->context.ldt))
- load_mm_ldt(next);
-#endif
- }
-#ifdef CONFIG_SMP
- else {
- this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
- BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
-
- if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
- /*
- * On established mms, the mm_cpumask is only changed
- * from irq context, from ptep_clear_flush() while in
- * lazy tlb mode, and here. Irqs are blocked during
- * schedule, protecting us from simultaneous changes.
- */
- cpumask_set_cpu(cpu, mm_cpumask(next));
-
- /*
- * We were in lazy tlb mode and leave_mm disabled
- * tlb flush IPI delivery. We must reload CR3
- * to make sure to use no freed page tables.
- *
- * As above, load_cr3() is serializing and orders TLB
- * fills with respect to the mm_cpumask write.
- */
- load_cr3(next->pgd);
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
- load_mm_cr4(next);
- load_mm_ldt(next);
- }
- }
-#endif
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
+#define switch_mm_irqs_off switch_mm_irqs_off
#define activate_mm(prev, next) \
do { \
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5b3c9a5..5a73a9c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -89,27 +89,16 @@
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
#define MSR_IA32_RTIT_CTL 0x00000570
-#define RTIT_CTL_TRACEEN BIT(0)
-#define RTIT_CTL_CYCLEACC BIT(1)
-#define RTIT_CTL_OS BIT(2)
-#define RTIT_CTL_USR BIT(3)
-#define RTIT_CTL_CR3EN BIT(7)
-#define RTIT_CTL_TOPA BIT(8)
-#define RTIT_CTL_MTC_EN BIT(9)
-#define RTIT_CTL_TSC_EN BIT(10)
-#define RTIT_CTL_DISRETC BIT(11)
-#define RTIT_CTL_BRANCH_EN BIT(13)
-#define RTIT_CTL_MTC_RANGE_OFFSET 14
-#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
-#define RTIT_CTL_CYC_THRESH_OFFSET 19
-#define RTIT_CTL_CYC_THRESH (0x0full << RTIT_CTL_CYC_THRESH_OFFSET)
-#define RTIT_CTL_PSB_FREQ_OFFSET 24
-#define RTIT_CTL_PSB_FREQ (0x0full << RTIT_CTL_PSB_FREQ_OFFSET)
#define MSR_IA32_RTIT_STATUS 0x00000571
-#define RTIT_STATUS_CONTEXTEN BIT(1)
-#define RTIT_STATUS_TRIGGEREN BIT(2)
-#define RTIT_STATUS_ERROR BIT(4)
-#define RTIT_STATUS_STOPPED BIT(5)
+#define MSR_IA32_RTIT_STATUS 0x00000571
+#define MSR_IA32_RTIT_ADDR0_A 0x00000580
+#define MSR_IA32_RTIT_ADDR0_B 0x00000581
+#define MSR_IA32_RTIT_ADDR1_A 0x00000582
+#define MSR_IA32_RTIT_ADDR1_B 0x00000583
+#define MSR_IA32_RTIT_ADDR2_A 0x00000584
+#define MSR_IA32_RTIT_ADDR2_B 0x00000585
+#define MSR_IA32_RTIT_ADDR3_A 0x00000586
+#define MSR_IA32_RTIT_ADDR3_B 0x00000587
#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
@@ -205,6 +194,8 @@
#define MSR_CONFIG_TDP_CONTROL 0x0000064B
#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+
#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
@@ -315,6 +306,9 @@
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
@@ -328,6 +322,7 @@
#define MSR_F15H_PERF_CTR 0xc0010201
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_PTSC 0xc0010280
#define MSR_F15H_IC_CFG 0xc0011021
/* Fam 10h MSRs */
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 7a79ee2..7dc1d8f 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -84,7 +84,10 @@
{
DECLARE_ARGS(val, low, high);
- asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+ asm volatile("1: rdmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+ : EAX_EDX_RET(val, low, high) : "c" (msr));
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0);
return EAX_EDX_VAL(val, low, high);
@@ -98,7 +101,10 @@
asm volatile("2: rdmsr ; xor %[err],%[err]\n"
"1:\n\t"
".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err] ; jmp 1b\n\t"
+ "3: mov %[fault],%[err]\n\t"
+ "xorl %%eax, %%eax\n\t"
+ "xorl %%edx, %%edx\n\t"
+ "jmp 1b\n\t"
".previous\n\t"
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
@@ -108,10 +114,14 @@
return EAX_EDX_VAL(val, low, high);
}
-static inline void native_write_msr(unsigned int msr,
- unsigned low, unsigned high)
+/* Can be uninlined because referenced by paravirt */
+notrace static inline void native_write_msr(unsigned int msr,
+ unsigned low, unsigned high)
{
- asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+ asm volatile("1: wrmsr\n"
+ "2:\n"
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+ : : "c" (msr), "a"(low), "d" (high) : "memory");
if (msr_tracepoint_active(__tracepoint_read_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index b94f6f6..dbff145 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -24,6 +24,7 @@
#define _ASM_X86_MTRR_H
#include <uapi/asm/mtrr.h>
+#include <asm/pat.h>
/*
@@ -83,9 +84,12 @@
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
+static inline void mtrr_bp_init(void)
+{
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+}
#define mtrr_ap_init() do {} while (0)
-#define mtrr_bp_init() do {} while (0)
#define set_mtrr_aps_delayed_init() do {} while (0)
#define mtrr_aps_init() do {} while (0)
#define mtrr_bp_restore() do {} while (0)
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 802dde3..cf8f619 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -37,7 +37,10 @@
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
+#endif
+
#define __pa_nodebug(x) __phys_addr_nodebug((unsigned long)(x))
/* __pa_symbol should be used for C visible symbols.
This seems to be the official gcc blessed way to do such arithmetic. */
@@ -51,7 +54,9 @@
#define __pa_symbol(x) \
__phys_addr_symbol(__phys_reloc_hide((unsigned long)(x)))
+#ifndef __va
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#endif
#define __boot_va(x) __va(x)
#define __boot_pa(x) __pa(x)
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 4928cf0..d5c2f8b 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -47,12 +47,10 @@
* are fully set up. If kernel ASLR is configured, it can extend the
* kernel page table mapping, reducing the size of the modules area.
*/
-#define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024)
-#if defined(CONFIG_RANDOMIZE_BASE) && \
- CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT
-#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET
+#if defined(CONFIG_RANDOMIZE_BASE)
+#define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024)
#else
-#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT
+#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#endif
#endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 601f1b8..2970d22 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -15,17 +15,6 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
-static inline int paravirt_enabled(void)
-{
- return pv_info.paravirt_enabled;
-}
-
-static inline int paravirt_has_feature(unsigned int feature)
-{
- WARN_ON_ONCE(!pv_info.paravirt_enabled);
- return (pv_info.features & feature);
-}
-
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
@@ -130,21 +119,31 @@
#define get_kernel_rpl() (pv_info.kernel_rpl)
-static inline u64 paravirt_read_msr(unsigned msr, int *err)
+static inline u64 paravirt_read_msr(unsigned msr)
{
- return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
+ return PVOP_CALL1(u64, pv_cpu_ops.read_msr, msr);
}
-static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
+static inline void paravirt_write_msr(unsigned msr,
+ unsigned low, unsigned high)
{
- return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
+ return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high);
}
-/* These should all do BUG_ON(_err), but our headers are too tangled. */
+static inline u64 paravirt_read_msr_safe(unsigned msr, int *err)
+{
+ return PVOP_CALL2(u64, pv_cpu_ops.read_msr_safe, msr, err);
+}
+
+static inline int paravirt_write_msr_safe(unsigned msr,
+ unsigned low, unsigned high)
+{
+ return PVOP_CALL3(int, pv_cpu_ops.write_msr_safe, msr, low, high);
+}
+
#define rdmsr(msr, val1, val2) \
do { \
- int _err; \
- u64 _l = paravirt_read_msr(msr, &_err); \
+ u64 _l = paravirt_read_msr(msr); \
val1 = (u32)_l; \
val2 = _l >> 32; \
} while (0)
@@ -156,8 +155,7 @@
#define rdmsrl(msr, val) \
do { \
- int _err; \
- val = paravirt_read_msr(msr, &_err); \
+ val = paravirt_read_msr(msr); \
} while (0)
static inline void wrmsrl(unsigned msr, u64 val)
@@ -165,23 +163,23 @@
wrmsr(msr, (u32)val, (u32)(val>>32));
}
-#define wrmsr_safe(msr, a, b) paravirt_write_msr(msr, a, b)
+#define wrmsr_safe(msr, a, b) paravirt_write_msr_safe(msr, a, b)
/* rdmsr with exception handling */
-#define rdmsr_safe(msr, a, b) \
-({ \
- int _err; \
- u64 _l = paravirt_read_msr(msr, &_err); \
- (*a) = (u32)_l; \
- (*b) = _l >> 32; \
- _err; \
+#define rdmsr_safe(msr, a, b) \
+({ \
+ int _err; \
+ u64 _l = paravirt_read_msr_safe(msr, &_err); \
+ (*a) = (u32)_l; \
+ (*b) = _l >> 32; \
+ _err; \
})
static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
{
int err;
- *p = paravirt_read_msr(msr, &err);
+ *p = paravirt_read_msr_safe(msr, &err);
return err;
}
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index e8c2326..7fa9e77 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -69,15 +69,9 @@
u16 extra_user_64bit_cs; /* __USER_CS if none */
#endif
- int paravirt_enabled;
- unsigned int features; /* valid only if paravirt_enabled is set */
const char *name;
};
-#define paravirt_has(x) paravirt_has_feature(PV_SUPPORTED_##x)
-/* Supported features */
-#define PV_SUPPORTED_RTC (1<<0)
-
struct pv_init_ops {
/*
* Patch may replace one of the defined code sequences with
@@ -155,10 +149,16 @@
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
- /* MSR, PMC and TSR operations.
- err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
- u64 (*read_msr)(unsigned int msr, int *err);
- int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+ /* Unsafe MSR operations. These will warn or panic on failure. */
+ u64 (*read_msr)(unsigned int msr);
+ void (*write_msr)(unsigned int msr, unsigned low, unsigned high);
+
+ /*
+ * Safe MSR operations.
+ * read sets err to 0 or -EIO. write returns 0 or -EIO.
+ */
+ u64 (*read_msr_safe)(unsigned int msr, int *err);
+ int (*write_msr_safe)(unsigned int msr, unsigned low, unsigned high);
u64 (*read_pmc)(int counter);
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index ca6c228..0b1ff4c 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -5,8 +5,8 @@
#include <asm/pgtable_types.h>
bool pat_enabled(void);
+void pat_disable(const char *reason);
extern void pat_init(void);
-void pat_init_cache_modes(u64);
extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 97f3242..f86491a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -183,7 +183,7 @@
static inline int has_transparent_hugepage(void)
{
- return cpu_has_pse;
+ return boot_cpu_has(X86_FEATURE_PSE);
}
#ifdef __HAVE_ARCH_PTE_DEVMAP
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9264476..62c6cc3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -388,9 +388,16 @@
unsigned long ip;
#endif
#ifdef CONFIG_X86_64
- unsigned long fs;
+ unsigned long fsbase;
+ unsigned long gsbase;
+#else
+ /*
+ * XXX: this could presumably be unsigned short. Alternatively,
+ * 32-bit kernels could be taught to use fsindex instead.
+ */
+ unsigned long fs;
+ unsigned long gs;
#endif
- unsigned long gs;
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
@@ -473,8 +480,6 @@
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
-#define paravirt_enabled() 0
-#define paravirt_has(x) 0
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ceec86eb..453744c 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -99,26 +99,36 @@
/*
* lock for writing
*/
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
-{
- long tmp;
- asm volatile("# beginning down_write\n\t"
- LOCK_PREFIX " xadd %1,(%2)\n\t"
- /* adds 0xffff0001, returns the old value */
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
- /* was the active mask 0 before? */
- " jz 1f\n"
- " call call_rwsem_down_write_failed\n"
- "1:\n"
- "# ending down_write"
- : "+m" (sem->count), "=d" (tmp)
- : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
-}
+#define ____down_write(sem, slow_path) \
+({ \
+ long tmp; \
+ struct rw_semaphore* ret; \
+ asm volatile("# beginning down_write\n\t" \
+ LOCK_PREFIX " xadd %1,(%3)\n\t" \
+ /* adds 0xffff0001, returns the old value */ \
+ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
+ /* was the active mask 0 before? */\
+ " jz 1f\n" \
+ " call " slow_path "\n" \
+ "1:\n" \
+ "# ending down_write" \
+ : "+m" (sem->count), "=d" (tmp), "=a" (ret) \
+ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
+ : "memory", "cc"); \
+ ret; \
+})
static inline void __down_write(struct rw_semaphore *sem)
{
- __down_write_nested(sem, 0);
+ ____down_write(sem, "call_rwsem_down_write_failed");
+}
+
+static inline int __down_write_killable(struct rw_semaphore *sem)
+{
+ if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
+ return -EINTR;
+
+ return 0;
}
/*
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 7d5a192..1549caa0 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -2,6 +2,7 @@
#define _ASM_X86_SEGMENT_H
#include <linux/const.h>
+#include <asm/alternative.h>
/*
* Constructor for a conventional segment GDT (or LDT) entry.
@@ -207,13 +208,6 @@
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
-/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
#endif
#ifndef CONFIG_PARAVIRT
@@ -249,10 +243,13 @@
#endif
/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
+ * Load a segment. Fall back on loading the zero segment if something goes
+ * wrong. This variant assumes that loading zero fully clears the segment.
+ * This is always the case on Intel CPUs and, even on 64-bit AMD CPUs, any
+ * failure to fully clear the cached descriptor is only observable for
+ * FS and GS.
*/
-#define loadsegment(seg, value) \
+#define __loadsegment_simple(seg, value) \
do { \
unsigned short __val = (value); \
\
@@ -269,6 +266,38 @@
: "+r" (__val) : : "memory"); \
} while (0)
+#define __loadsegment_ss(value) __loadsegment_simple(ss, (value))
+#define __loadsegment_ds(value) __loadsegment_simple(ds, (value))
+#define __loadsegment_es(value) __loadsegment_simple(es, (value))
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On 32-bit systems, the hidden parts of FS and GS are unobservable if
+ * the selector is NULL, so there's no funny business here.
+ */
+#define __loadsegment_fs(value) __loadsegment_simple(fs, (value))
+#define __loadsegment_gs(value) __loadsegment_simple(gs, (value))
+
+#else
+
+static inline void __loadsegment_fs(unsigned short value)
+{
+ asm volatile(" \n"
+ "1: movw %0, %%fs \n"
+ "2: \n"
+
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+
+ : : "rm" (value) : "memory");
+}
+
+/* __loadsegment_gs is intentionally undefined. Use load_gs_index instead. */
+
+#endif
+
+#define loadsegment(seg, value) __loadsegment_ ## seg (value)
+
/*
* Save a segment register away:
*/
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 11af24e..ac1d5da 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -6,6 +6,7 @@
#define COMMAND_LINE_SIZE 2048
#include <linux/linkage.h>
+#include <asm/page_types.h>
#ifdef __i386__
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 751bf4b..8f321a1 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -39,8 +39,7 @@
*/ \
unsigned long ebx, ecx, edx, esi, edi; \
\
- asm volatile("pushfl\n\t" /* save flags */ \
- "pushl %%ebp\n\t" /* save EBP */ \
+ asm volatile("pushl %%ebp\n\t" /* save EBP */ \
"movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
@@ -49,7 +48,6 @@
"jmp __switch_to\n" /* regparm call */ \
"1:\t" \
"popl %%ebp\n\t" /* restore EBP */ \
- "popfl\n" /* restore flags */ \
\
/* output parameters */ \
: [prev_sp] "=m" (prev->thread.sp), \
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
new file mode 100644
index 0000000..9039506
--- /dev/null
+++ b/arch/x86/include/asm/text-patching.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_X86_TEXT_PATCHING_H
+#define _ASM_X86_TEXT_PATCHING_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+
+struct paravirt_patch_site;
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end);
+#else
+static inline void apply_paravirt(struct paravirt_patch_site *start,
+ struct paravirt_patch_site *end)
+{}
+#define __parainstructions NULL
+#define __parainstructions_end NULL
+#endif
+
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+
+/*
+ * Clear and restore the kernel write-protection flag on the local CPU.
+ * Allows the kernel to edit read-only pages.
+ * Side-effect: any interrupt handler running between save and restore will have
+ * the ability to write to read-only pages.
+ *
+ * Warning:
+ * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
+ * no thread can be preempted in the instructions being modified (no iret to an
+ * invalid instruction possible) or if the instructions are changed from a
+ * consistent state to another consistent state atomically.
+ * On the local CPU you need to be protected again NMI or MCE handlers seeing an
+ * inconsistent instruction while you patch.
+ */
+extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern int poke_int3_handler(struct pt_regs *regs);
+extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+
+#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ffae84d..30c133a 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -255,7 +255,7 @@
return true;
}
-static inline bool is_ia32_task(void)
+static inline bool in_ia32_syscall(void)
{
#ifdef CONFIG_X86_32
return true;
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 1fde8d5..4e5be94 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -181,7 +181,7 @@
static inline void __flush_tlb_all(void)
{
- if (cpu_has_pge)
+ if (static_cpu_has(X86_FEATURE_PGE))
__flush_tlb_global();
else
__flush_tlb();
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 174c421..7428697 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -22,7 +22,7 @@
static inline cycles_t get_cycles(void)
{
#ifndef CONFIG_X86_TSC
- if (!cpu_has_tsc)
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return 0;
#endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2e7513d..12f9653 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -118,7 +118,7 @@
extern int fixup_exception(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
-extern int early_fixup_exception(unsigned long *ip);
+extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
/*
* These are the main single-value transfer routines. They automatically
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 71605c7..c852590 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -51,15 +51,66 @@
BIOS_STATUS_UNAVAIL = -EBUSY
};
+/* Address map parameters */
+struct uv_gam_parameters {
+ u64 mmr_base;
+ u64 gru_base;
+ u8 mmr_shift; /* Convert PNode to MMR space offset */
+ u8 gru_shift; /* Convert PNode to GRU space offset */
+ u8 gpa_shift; /* Size of offset field in GRU phys addr */
+ u8 unused1;
+};
+
+/* UV_TABLE_GAM_RANGE_ENTRY values */
+#define UV_GAM_RANGE_TYPE_UNUSED 0 /* End of table */
+#define UV_GAM_RANGE_TYPE_RAM 1 /* Normal RAM */
+#define UV_GAM_RANGE_TYPE_NVRAM 2 /* Non-volatile memory */
+#define UV_GAM_RANGE_TYPE_NV_WINDOW 3 /* NVMDIMM block window */
+#define UV_GAM_RANGE_TYPE_NV_MAILBOX 4 /* NVMDIMM mailbox */
+#define UV_GAM_RANGE_TYPE_HOLE 5 /* Unused address range */
+#define UV_GAM_RANGE_TYPE_MAX 6
+
+/* The structure stores PA bits 56:26, for 64MB granularity */
+#define UV_GAM_RANGE_SHFT 26 /* 64MB */
+
+struct uv_gam_range_entry {
+ char type; /* Entry type: GAM_RANGE_TYPE_UNUSED, etc. */
+ char unused1;
+ u16 nasid; /* HNasid */
+ u16 sockid; /* Socket ID, high bits of APIC ID */
+ u16 pnode; /* Index to MMR and GRU spaces */
+ u32 pxm; /* ACPI proximity domain number */
+ u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
+};
+
+#define UV_SYSTAB_SIG "UVST"
+#define UV_SYSTAB_VERSION_1 1 /* UV1/2/3 BIOS version */
+#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
+#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
+#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
+#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
+
+#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
+#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
+#define UV_SYSTAB_TYPE_GAM_RNG_TBL 2 /* GAM entry table */
+#define UV_SYSTAB_TYPE_MAX 3
+
/*
* The UV system table describes specific firmware
* capabilities available to the Linux kernel at runtime.
*/
struct uv_systab {
- char signature[4]; /* must be "UVST" */
+ char signature[4]; /* must be UV_SYSTAB_SIG */
u32 revision; /* distinguish different firmware revs */
u64 function; /* BIOS runtime callback function ptr */
+ u32 size; /* systab size (starting with _VERSION_UV4) */
+ struct {
+ u32 type:8; /* type of entry */
+ u32 offset:24; /* byte offset from struct start to entry */
+ } entry[1]; /* additional entries follow */
};
+extern struct uv_systab *uv_systab;
+/* (... end of definitions from UV BIOS ...) */
enum {
BIOS_FREQ_BASE_PLATFORM = 0,
@@ -99,7 +150,11 @@
extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
+#ifdef CONFIG_EFI
extern void uv_bios_init(void);
+#else
+void uv_bios_init(void) { }
+#endif
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
@@ -107,7 +162,7 @@
extern long sn_coherency_id;
extern long sn_region_size;
extern long system_serial_number;
-#define partition_coherence_id() (sn_coherency_id)
+#define uv_partition_coherence_id() (sn_coherency_id)
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index fc808b8..cc44d92 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -598,7 +598,7 @@
int timeout_tries;
int ipi_attempts;
int conseccompletes;
- short nobau;
+ bool nobau;
short baudisabled;
short cpu;
short osnode;
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index ea707478..097b80c 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -16,9 +16,11 @@
#include <linux/percpu.h>
#include <linux/timer.h>
#include <linux/io.h>
+#include <linux/topology.h>
#include <asm/types.h>
#include <asm/percpu.h>
#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/bios.h>
#include <asm/irq_vectors.h>
#include <asm/io_apic.h>
@@ -103,7 +105,6 @@
* processor APICID register.
*/
-
/*
* Maximum number of bricks in all partitions and in all coherency domains.
* This is the total number of bricks accessible in the numalink fabric. It
@@ -127,6 +128,7 @@
*/
#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
+/* System Controller Interface Reg info */
struct uv_scir_s {
struct timer_list timer;
unsigned long offset;
@@ -137,71 +139,173 @@
unsigned char enabled;
};
+/* GAM (globally addressed memory) range table */
+struct uv_gam_range_s {
+ u32 limit; /* PA bits 56:26 (GAM_RANGE_SHFT) */
+ u16 nasid; /* node's global physical address */
+ s8 base; /* entry index of node's base addr */
+ u8 reserved;
+};
+
/*
* The following defines attributes of the HUB chip. These attributes are
- * frequently referenced and are kept in the per-cpu data areas of each cpu.
- * They are kept together in a struct to minimize cache misses.
+ * frequently referenced and are kept in a common per hub struct.
+ * After setup, the struct is read only, so it should be readily
+ * available in the L3 cache on the cpu socket for the node.
*/
struct uv_hub_info_s {
unsigned long global_mmr_base;
+ unsigned long global_mmr_shift;
unsigned long gpa_mask;
- unsigned int gnode_extra;
+ unsigned short *socket_to_node;
+ unsigned short *socket_to_pnode;
+ unsigned short *pnode_to_socket;
+ struct uv_gam_range_s *gr_table;
+ unsigned short min_socket;
+ unsigned short min_pnode;
+ unsigned char m_val;
+ unsigned char n_val;
+ unsigned char gr_table_len;
unsigned char hub_revision;
unsigned char apic_pnode_shift;
+ unsigned char gpa_shift;
unsigned char m_shift;
unsigned char n_lshift;
+ unsigned int gnode_extra;
unsigned long gnode_upper;
unsigned long lowmem_remap_top;
unsigned long lowmem_remap_base;
+ unsigned long global_gru_base;
+ unsigned long global_gru_shift;
unsigned short pnode;
unsigned short pnode_mask;
unsigned short coherency_domain_number;
unsigned short numa_blade_id;
- unsigned char blade_processor_id;
- unsigned char m_val;
- unsigned char n_val;
- struct uv_scir_s scir;
+ unsigned short nr_possible_cpus;
+ unsigned short nr_online_cpus;
+ short memory_nid;
};
-DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
-#define uv_hub_info this_cpu_ptr(&__uv_hub_info)
-#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu))
+/* CPU specific info with a pointer to the hub common info struct */
+struct uv_cpu_info_s {
+ void *p_uv_hub_info;
+ unsigned char blade_cpu_id;
+ struct uv_scir_s scir;
+};
+DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
+
+#define uv_cpu_info this_cpu_ptr(&__uv_cpu_info)
+#define uv_cpu_info_per(cpu) (&per_cpu(__uv_cpu_info, cpu))
+
+#define uv_scir_info (&uv_cpu_info->scir)
+#define uv_cpu_scir_info(cpu) (&uv_cpu_info_per(cpu)->scir)
+
+/* Node specific hub common info struct */
+extern void **__uv_hub_info_list;
+static inline struct uv_hub_info_s *uv_hub_info_list(int node)
+{
+ return (struct uv_hub_info_s *)__uv_hub_info_list[node];
+}
+
+static inline struct uv_hub_info_s *_uv_hub_info(void)
+{
+ return (struct uv_hub_info_s *)uv_cpu_info->p_uv_hub_info;
+}
+#define uv_hub_info _uv_hub_info()
+
+static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
+{
+ return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
+}
+
+#define UV_HUB_INFO_VERSION 0x7150
+extern int uv_hub_info_version(void);
+static inline int uv_hub_info_check(int version)
+{
+ if (uv_hub_info_version() == version)
+ return 0;
+
+ pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n",
+ uv_hub_info_version(), version);
+
+ BUG(); /* Catastrophic - cannot continue on unknown UV system */
+}
+#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION)
/*
- * Hub revisions less than UV2_HUB_REVISION_BASE are UV1 hubs. All UV2
- * hubs have revision numbers greater than or equal to UV2_HUB_REVISION_BASE.
+ * HUB revision ranges for each UV HUB architecture.
* This is a software convention - NOT the hardware revision numbers in
* the hub chip.
*/
#define UV1_HUB_REVISION_BASE 1
#define UV2_HUB_REVISION_BASE 3
#define UV3_HUB_REVISION_BASE 5
+#define UV4_HUB_REVISION_BASE 7
+#ifdef UV1_HUB_IS_SUPPORTED
static inline int is_uv1_hub(void)
{
return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
}
+#else
+static inline int is_uv1_hub(void)
+{
+ return 0;
+}
+#endif
+#ifdef UV2_HUB_IS_SUPPORTED
static inline int is_uv2_hub(void)
{
return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
(uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
}
+#else
+static inline int is_uv2_hub(void)
+{
+ return 0;
+}
+#endif
+#ifdef UV3_HUB_IS_SUPPORTED
static inline int is_uv3_hub(void)
{
- return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE;
+ return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
+ (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
+}
+#else
+static inline int is_uv3_hub(void)
+{
+ return 0;
+}
+#endif
+
+#ifdef UV4_HUB_IS_SUPPORTED
+static inline int is_uv4_hub(void)
+{
+ return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
+}
+#else
+static inline int is_uv4_hub(void)
+{
+ return 0;
+}
+#endif
+
+static inline int is_uvx_hub(void)
+{
+ if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
+ return uv_hub_info->hub_revision;
+
+ return 0;
}
static inline int is_uv_hub(void)
{
+#ifdef UV1_HUB_IS_SUPPORTED
return uv_hub_info->hub_revision;
-}
-
-/* code common to uv2 and uv3 only */
-static inline int is_uvx_hub(void)
-{
- return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE;
+#endif
+ return is_uvx_hub();
}
union uvh_apicid {
@@ -243,24 +347,42 @@
#define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024)
-#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
- (is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
- UV3_LOCAL_MMR_BASE))
-#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\
- (is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\
- UV3_GLOBAL_MMR32_BASE))
-#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
- (is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
- UV3_LOCAL_MMR_SIZE))
-#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\
- (is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\
- UV3_GLOBAL_MMR32_SIZE))
+#define UV4_LOCAL_MMR_BASE 0xfa000000UL
+#define UV4_GLOBAL_MMR32_BASE 0xfc000000UL
+#define UV4_LOCAL_MMR_SIZE (32UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE (16UL * 1024 * 1024)
+
+#define UV_LOCAL_MMR_BASE ( \
+ is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \
+ is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
+ is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
+ /*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+
+#define UV_GLOBAL_MMR32_BASE ( \
+ is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE : \
+ is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
+ is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
+ /*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+
+#define UV_LOCAL_MMR_SIZE ( \
+ is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \
+ is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
+ is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
+ /*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+
+#define UV_GLOBAL_MMR32_SIZE ( \
+ is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE : \
+ is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \
+ is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \
+ /*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE)
+
#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base)
#define UV_GLOBAL_GRU_MMR_BASE 0x4000000
#define UV_GLOBAL_MMR32_PNODE_SHIFT 15
-#define UV_GLOBAL_MMR64_PNODE_SHIFT 26
+#define _UV_GLOBAL_MMR64_PNODE_SHIFT 26
+#define UV_GLOBAL_MMR64_PNODE_SHIFT (uv_hub_info->global_mmr_shift)
#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
@@ -307,18 +429,74 @@
* between socket virtual and socket physical addresses.
*/
+/* global bits offset - number of local address bits in gpa for this UV arch */
+static inline unsigned int uv_gpa_shift(void)
+{
+ return uv_hub_info->gpa_shift;
+}
+#define _uv_gpa_shift
+
+/* Find node that has the address range that contains global address */
+static inline struct uv_gam_range_s *uv_gam_range(unsigned long pa)
+{
+ struct uv_gam_range_s *gr = uv_hub_info->gr_table;
+ unsigned long pal = (pa & uv_hub_info->gpa_mask) >> UV_GAM_RANGE_SHFT;
+ int i, num = uv_hub_info->gr_table_len;
+
+ if (gr) {
+ for (i = 0; i < num; i++, gr++) {
+ if (pal < gr->limit)
+ return gr;
+ }
+ }
+ pr_crit("UV: GAM Range for 0x%lx not found at %p!\n", pa, gr);
+ BUG();
+}
+
+/* Return base address of node that contains global address */
+static inline unsigned long uv_gam_range_base(unsigned long pa)
+{
+ struct uv_gam_range_s *gr = uv_gam_range(pa);
+ int base = gr->base;
+
+ if (base < 0)
+ return 0UL;
+
+ return uv_hub_info->gr_table[base].limit;
+}
+
+/* socket phys RAM --> UV global NASID (UV4+) */
+static inline unsigned long uv_soc_phys_ram_to_nasid(unsigned long paddr)
+{
+ return uv_gam_range(paddr)->nasid;
+}
+#define _uv_soc_phys_ram_to_nasid
+
+/* socket virtual --> UV global NASID (UV4+) */
+static inline unsigned long uv_gpa_nasid(void *v)
+{
+ return uv_soc_phys_ram_to_nasid(__pa(v));
+}
+
/* socket phys RAM --> UV global physical address */
static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr)
{
+ unsigned int m_val = uv_hub_info->m_val;
+
if (paddr < uv_hub_info->lowmem_remap_top)
paddr |= uv_hub_info->lowmem_remap_base;
paddr |= uv_hub_info->gnode_upper;
- paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
- ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift);
+ if (m_val)
+ paddr = ((paddr << uv_hub_info->m_shift)
+ >> uv_hub_info->m_shift) |
+ ((paddr >> uv_hub_info->m_val)
+ << uv_hub_info->n_lshift);
+ else
+ paddr |= uv_soc_phys_ram_to_nasid(paddr)
+ << uv_hub_info->gpa_shift;
return paddr;
}
-
/* socket virtual --> UV global physical address */
static inline unsigned long uv_gpa(void *v)
{
@@ -338,54 +516,89 @@
unsigned long paddr;
unsigned long remap_base = uv_hub_info->lowmem_remap_base;
unsigned long remap_top = uv_hub_info->lowmem_remap_top;
+ unsigned int m_val = uv_hub_info->m_val;
- gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
- ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
+ if (m_val)
+ gpa = ((gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift) |
+ ((gpa >> uv_hub_info->n_lshift) << uv_hub_info->m_val);
+
paddr = gpa & uv_hub_info->gpa_mask;
if (paddr >= remap_base && paddr < remap_base + remap_top)
paddr -= remap_base;
return paddr;
}
-
-/* gpa -> pnode */
+/* gpa -> gnode */
static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
{
- return gpa >> uv_hub_info->n_lshift;
+ unsigned int n_lshift = uv_hub_info->n_lshift;
+
+ if (n_lshift)
+ return gpa >> n_lshift;
+
+ return uv_gam_range(gpa)->nasid >> 1;
}
/* gpa -> pnode */
static inline int uv_gpa_to_pnode(unsigned long gpa)
{
- unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1;
-
- return uv_gpa_to_gnode(gpa) & n_mask;
+ return uv_gpa_to_gnode(gpa) & uv_hub_info->pnode_mask;
}
-/* gpa -> node offset*/
+/* gpa -> node offset */
static inline unsigned long uv_gpa_to_offset(unsigned long gpa)
{
- return (gpa << uv_hub_info->m_shift) >> uv_hub_info->m_shift;
+ unsigned int m_shift = uv_hub_info->m_shift;
+
+ if (m_shift)
+ return (gpa << m_shift) >> m_shift;
+
+ return (gpa & uv_hub_info->gpa_mask) - uv_gam_range_base(gpa);
+}
+
+/* Convert socket to node */
+static inline int _uv_socket_to_node(int socket, unsigned short *s2nid)
+{
+ return s2nid ? s2nid[socket - uv_hub_info->min_socket] : socket;
+}
+
+static inline int uv_socket_to_node(int socket)
+{
+ return _uv_socket_to_node(socket, uv_hub_info->socket_to_node);
}
/* pnode, offset --> socket virtual */
static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
{
- return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset);
+ unsigned int m_val = uv_hub_info->m_val;
+ unsigned long base;
+ unsigned short sockid, node, *p2s;
+
+ if (m_val)
+ return __va(((unsigned long)pnode << m_val) | offset);
+
+ p2s = uv_hub_info->pnode_to_socket;
+ sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
+ node = uv_socket_to_node(sockid);
+
+ /* limit address of previous socket is our base, except node 0 is 0 */
+ if (!node)
+ return __va((unsigned long)offset);
+
+ base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
+ return __va(base << UV_GAM_RANGE_SHFT | offset);
}
-
-/*
- * Extract a PNODE from an APICID (full apicid, not processor subset)
- */
+/* Extract/Convert a PNODE from an APICID (full apicid, not processor subset) */
static inline int uv_apicid_to_pnode(int apicid)
{
- return (apicid >> uv_hub_info->apic_pnode_shift);
+ int pnode = apicid >> uv_hub_info->apic_pnode_shift;
+ unsigned short *s2pn = uv_hub_info->socket_to_pnode;
+
+ return s2pn ? s2pn[pnode - uv_hub_info->min_socket] : pnode;
}
-/*
- * Convert an apicid to the socket number on the blade
- */
+/* Convert an apicid to the socket number on the blade */
static inline int uv_apicid_to_socket(int apicid)
{
if (is_uv1_hub())
@@ -434,16 +647,6 @@
return readq(uv_global_mmr64_address(pnode, offset));
}
-/*
- * Global MMR space addresses when referenced by the GRU. (GRU does
- * NOT use socket addressing).
- */
-static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset)
-{
- return UV_GLOBAL_GRU_MMR_BASE | offset |
- ((unsigned long)pnode << uv_hub_info->m_val);
-}
-
static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val)
{
writeb(val, uv_global_mmr64_address(pnode, offset));
@@ -483,27 +686,23 @@
writeb(val, uv_local_mmr_address(offset));
}
-/*
- * Structures and definitions for converting between cpu, node, pnode, and blade
- * numbers.
- */
-struct uv_blade_info {
- unsigned short nr_possible_cpus;
- unsigned short nr_online_cpus;
- unsigned short pnode;
- short memory_nid;
- spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */
- unsigned long nmi_count; /* obsolete, see uv_hub_nmi */
-};
-extern struct uv_blade_info *uv_blade_info;
-extern short *uv_node_to_blade;
-extern short *uv_cpu_to_blade;
-extern short uv_possible_blades;
-
/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */
static inline int uv_blade_processor_id(void)
{
- return uv_hub_info->blade_processor_id;
+ return uv_cpu_info->blade_cpu_id;
+}
+
+/* Blade-local cpu number of cpu N. Numbered 0 .. <# cpus on the blade> */
+static inline int uv_cpu_blade_processor_id(int cpu)
+{
+ return uv_cpu_info_per(cpu)->blade_cpu_id;
+}
+#define _uv_cpu_blade_processor_id 1 /* indicate function available */
+
+/* Blade number to Node number (UV1..UV4 is 1:1) */
+static inline int uv_blade_to_node(int blade)
+{
+ return blade;
}
/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
@@ -512,55 +711,60 @@
return uv_hub_info->numa_blade_id;
}
+/*
+ * Convert linux node number to the UV blade number.
+ * .. Currently for UV1 thru UV4 the node and the blade are identical.
+ * .. If this changes then you MUST check references to this function!
+ */
+static inline int uv_node_to_blade_id(int nid)
+{
+ return nid;
+}
+
/* Convert a cpu number to the the UV blade number */
static inline int uv_cpu_to_blade_id(int cpu)
{
- return uv_cpu_to_blade[cpu];
-}
-
-/* Convert linux node number to the UV blade number */
-static inline int uv_node_to_blade_id(int nid)
-{
- return uv_node_to_blade[nid];
+ return uv_node_to_blade_id(cpu_to_node(cpu));
}
/* Convert a blade id to the PNODE of the blade */
static inline int uv_blade_to_pnode(int bid)
{
- return uv_blade_info[bid].pnode;
+ return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
}
/* Nid of memory node on blade. -1 if no blade-local memory */
static inline int uv_blade_to_memory_nid(int bid)
{
- return uv_blade_info[bid].memory_nid;
+ return uv_hub_info_list(uv_blade_to_node(bid))->memory_nid;
}
/* Determine the number of possible cpus on a blade */
static inline int uv_blade_nr_possible_cpus(int bid)
{
- return uv_blade_info[bid].nr_possible_cpus;
+ return uv_hub_info_list(uv_blade_to_node(bid))->nr_possible_cpus;
}
/* Determine the number of online cpus on a blade */
static inline int uv_blade_nr_online_cpus(int bid)
{
- return uv_blade_info[bid].nr_online_cpus;
+ return uv_hub_info_list(uv_blade_to_node(bid))->nr_online_cpus;
}
/* Convert a cpu id to the PNODE of the blade containing the cpu */
static inline int uv_cpu_to_pnode(int cpu)
{
- return uv_blade_info[uv_cpu_to_blade_id(cpu)].pnode;
+ return uv_cpu_hub_info(cpu)->pnode;
}
/* Convert a linux node number to the PNODE of the blade */
static inline int uv_node_to_pnode(int nid)
{
- return uv_blade_info[uv_node_to_blade_id(nid)].pnode;
+ return uv_hub_info_list(nid)->pnode;
}
/* Maximum possible number of blades */
+extern short uv_possible_blades;
static inline int uv_num_possible_blades(void)
{
return uv_possible_blades;
@@ -578,9 +782,7 @@
/* Newer SMM NMI handler, not present in all systems */
#define UVH_NMI_MMRX UVH_EVENT_OCCURRED0
#define UVH_NMI_MMRX_CLEAR UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT (is_uv1_hub() ? \
- UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :\
- UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+#define UVH_NMI_MMRX_SHIFT UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
#define UVH_NMI_MMRX_TYPE "EXTIO_INT0"
/* Non-zero indicates newer SMM NMI handler present */
@@ -622,9 +824,9 @@
/* Update SCIR state */
static inline void uv_set_scir_bits(unsigned char value)
{
- if (uv_hub_info->scir.state != value) {
- uv_hub_info->scir.state = value;
- uv_write_local_mmr8(uv_hub_info->scir.offset, value);
+ if (uv_scir_info->state != value) {
+ uv_scir_info->state = value;
+ uv_write_local_mmr8(uv_scir_info->offset, value);
}
}
@@ -635,10 +837,10 @@
static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
{
- if (uv_cpu_hub_info(cpu)->scir.state != value) {
+ if (uv_cpu_scir_info(cpu)->state != value) {
uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
- uv_cpu_hub_info(cpu)->scir.offset, value);
- uv_cpu_hub_info(cpu)->scir.state = value;
+ uv_cpu_scir_info(cpu)->offset, value);
+ uv_cpu_scir_info(cpu)->state = value;
}
}
@@ -666,10 +868,7 @@
/*
* Get the minimum revision number of the hub chips within the partition.
- * 1 - UV1 rev 1.0 initial silicon
- * 2 - UV1 rev 2.0 production silicon
- * 3 - UV2 rev 1.0 initial silicon
- * 5 - UV3 rev 1.0 initial silicon
+ * (See UVx_HUB_REVISION_BASE above for specific values.)
*/
static inline int uv_get_min_hub_revision_id(void)
{
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index ddd8db6..548d684 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,7 +5,7 @@
*
* SGI UV MMR definitions
*
- * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
*/
#ifndef _ASM_X86_UV_UV_MMRS_H
@@ -18,10 +18,11 @@
* grouped by architecture types.
*
* UVH - definitions common to all UV hub types.
- * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3).
+ * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4).
* UV1H - definitions specific to UV type 1 hub.
* UV2H - definitions specific to UV type 2 hub.
* UV3H - definitions specific to UV type 3 hub.
+ * UV4H - definitions specific to UV type 4 hub.
*
* So in general, MMR addresses and structures are identical on all hubs types.
* These MMRs are identified as:
@@ -32,19 +33,25 @@
* } s;
* };
*
- * If the MMR exists on all hub types but have different addresses:
+ * If the MMR exists on all hub types but have different addresses,
+ * use a conditional operator to define the value at runtime.
* #define UV1Hxxx a
* #define UV2Hxxx b
* #define UV3Hxxx c
+ * #define UV4Hxxx d
* #define UVHxxx (is_uv1_hub() ? UV1Hxxx :
* (is_uv2_hub() ? UV2Hxxx :
- * UV3Hxxx))
+ * (is_uv3_hub() ? UV3Hxxx :
+ * UV4Hxxx))
*
- * If the MMR exists on all hub types > 1 but have different addresses:
+ * If the MMR exists on all hub types > 1 but have different addresses, the
+ * variation using "UVX" as the prefix exists.
* #define UV2Hxxx b
* #define UV3Hxxx c
- * #define UVXHxxx (is_uv2_hub() ? UV2Hxxx :
- * UV3Hxxx))
+ * #define UV4Hxxx d
+ * #define UVHxxx (is_uv2_hub() ? UV2Hxxx :
+ * (is_uv3_hub() ? UV3Hxxx :
+ * UV4Hxxx))
*
* union uvh_xxx {
* unsigned long v;
@@ -56,6 +63,8 @@
* } s2;
* struct uv3h_xxx_s { # Full UV3 definition (*)
* } s3;
+ * struct uv4h_xxx_s { # Full UV4 definition (*)
+ * } s4;
* };
* (* - if present and different than the common struct)
*
@@ -73,7 +82,7 @@
* } sn;
* };
*
- * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH)
+ * (GEN Flags: mflags_opt= undefs=function UV234=UVXH)
*/
#define UV_MMR_ENABLE (1UL << 63)
@@ -83,20 +92,36 @@
#define UV2_HUB_PART_NUMBER_X 0x1111
#define UV3_HUB_PART_NUMBER 0x9578
#define UV3_HUB_PART_NUMBER_X 0x4321
+#define UV4_HUB_PART_NUMBER 0x99a1
/* Compat: Indicate which UV Hubs are supported. */
+#define UV1_HUB_IS_SUPPORTED 1
#define UV2_HUB_IS_SUPPORTED 1
#define UV3_HUB_IS_SUPPORTED 1
+#define UV4_HUB_IS_SUPPORTED 1
+
+/* Error function to catch undefined references */
+extern unsigned long uv_undefined(char *str);
/* ========================================================================= */
/* UVH_BAU_DATA_BROADCAST */
/* ========================================================================= */
#define UVH_BAU_DATA_BROADCAST 0x61688UL
-#define UVH_BAU_DATA_BROADCAST_32 0x440
+
+#define UV1H_BAU_DATA_BROADCAST_32 0x440
+#define UV2H_BAU_DATA_BROADCAST_32 0x440
+#define UV3H_BAU_DATA_BROADCAST_32 0x440
+#define UV4H_BAU_DATA_BROADCAST_32 0x360
+#define UVH_BAU_DATA_BROADCAST_32 ( \
+ is_uv1_hub() ? UV1H_BAU_DATA_BROADCAST_32 : \
+ is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 : \
+ is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 : \
+ /*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32)
#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0
#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL
+
union uvh_bau_data_broadcast_u {
unsigned long v;
struct uvh_bau_data_broadcast_s {
@@ -109,7 +134,16 @@
/* UVH_BAU_DATA_CONFIG */
/* ========================================================================= */
#define UVH_BAU_DATA_CONFIG 0x61680UL
-#define UVH_BAU_DATA_CONFIG_32 0x438
+
+#define UV1H_BAU_DATA_CONFIG_32 0x438
+#define UV2H_BAU_DATA_CONFIG_32 0x438
+#define UV3H_BAU_DATA_CONFIG_32 0x438
+#define UV4H_BAU_DATA_CONFIG_32 0x358
+#define UVH_BAU_DATA_CONFIG_32 ( \
+ is_uv1_hub() ? UV1H_BAU_DATA_CONFIG_32 : \
+ is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 : \
+ is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 : \
+ /*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32)
#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0
#define UVH_BAU_DATA_CONFIG_DM_SHFT 8
@@ -128,6 +162,7 @@
#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL
#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
union uvh_bau_data_config_u {
unsigned long v;
struct uvh_bau_data_config_s {
@@ -266,7 +301,6 @@
#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL
#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL
-#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1
#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2
#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3
#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4
@@ -275,55 +309,11 @@
#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7
#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8
#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9
-#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12
#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13
#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14
#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15
#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16
-#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
-#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
-#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
-#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
-#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
-#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
-#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
-#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
-#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
-#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
-#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
-#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
-#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT 53
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
-#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
-#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL
#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL
#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL
@@ -332,54 +322,294 @@
#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL
#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL
#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL
-#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL
#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL
#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL
#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL
#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL
-#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
-#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
-#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
-#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
-#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
-#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
-#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
-#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
-#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
-#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
-#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
-#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
-#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
-#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
-#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
-#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
-#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
-#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52
+#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL
+#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL
+
+#define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT 1
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT 10
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT 17
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT 18
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT 19
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT 20
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 21
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 22
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT 23
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT 24
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT 25
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 26
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 27
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 28
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 29
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT 30
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT 31
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT 32
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT 33
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT 34
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 35
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 36
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 37
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 38
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 39
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 40
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 41
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 42
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 43
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 44
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 45
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 46
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 47
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 48
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 49
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 50
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 51
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 52
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 53
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 54
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 55
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 56
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 57
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 58
+#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT 59
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 60
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 61
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 62
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 63
+#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0004000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0008000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0010000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0020000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0040000000000000UL
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0080000000000000UL
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0100000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0200000000000000UL
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0400000000000000UL
+#define UV4H_EVENT_OCCURRED0_IPI_INT_MASK 0x0800000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x1000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x2000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x4000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x8000000000000000UL
+
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT ( \
+ is_uv1_hub() ? UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
+ is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
+ is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT : \
+ /*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
union uvh_event_occurred0_u {
unsigned long v;
@@ -391,7 +621,7 @@
} s;
struct uvxh_event_occurred0_s {
unsigned long lb_hcerr:1; /* RW */
- unsigned long qp_hcerr:1; /* RW */
+ unsigned long rsvd_1:1;
unsigned long rh_hcerr:1; /* RW */
unsigned long lh0_hcerr:1; /* RW */
unsigned long lh1_hcerr:1; /* RW */
@@ -400,25 +630,51 @@
unsigned long ni0_hcerr:1; /* RW */
unsigned long ni1_hcerr:1; /* RW */
unsigned long lb_aoerr0:1; /* RW */
- unsigned long qp_aoerr0:1; /* RW */
+ unsigned long rsvd_10:1;
unsigned long rh_aoerr0:1; /* RW */
unsigned long lh0_aoerr0:1; /* RW */
unsigned long lh1_aoerr0:1; /* RW */
unsigned long gr0_aoerr0:1; /* RW */
unsigned long gr1_aoerr0:1; /* RW */
unsigned long xb_aoerr0:1; /* RW */
- unsigned long rt_aoerr0:1; /* RW */
+ unsigned long rsvd_17_63:47;
+ } sx;
+ struct uv4h_event_occurred0_s {
+ unsigned long lb_hcerr:1; /* RW */
+ unsigned long kt_hcerr:1; /* RW */
+ unsigned long rh_hcerr:1; /* RW */
+ unsigned long lh0_hcerr:1; /* RW */
+ unsigned long lh1_hcerr:1; /* RW */
+ unsigned long gr0_hcerr:1; /* RW */
+ unsigned long gr1_hcerr:1; /* RW */
+ unsigned long ni0_hcerr:1; /* RW */
+ unsigned long ni1_hcerr:1; /* RW */
+ unsigned long lb_aoerr0:1; /* RW */
+ unsigned long kt_aoerr0:1; /* RW */
+ unsigned long rh_aoerr0:1; /* RW */
+ unsigned long lh0_aoerr0:1; /* RW */
+ unsigned long lh1_aoerr0:1; /* RW */
+ unsigned long gr0_aoerr0:1; /* RW */
+ unsigned long gr1_aoerr0:1; /* RW */
+ unsigned long xb_aoerr0:1; /* RW */
+ unsigned long rtq0_aoerr0:1; /* RW */
+ unsigned long rtq1_aoerr0:1; /* RW */
+ unsigned long rtq2_aoerr0:1; /* RW */
+ unsigned long rtq3_aoerr0:1; /* RW */
unsigned long ni0_aoerr0:1; /* RW */
unsigned long ni1_aoerr0:1; /* RW */
unsigned long lb_aoerr1:1; /* RW */
- unsigned long qp_aoerr1:1; /* RW */
+ unsigned long kt_aoerr1:1; /* RW */
unsigned long rh_aoerr1:1; /* RW */
unsigned long lh0_aoerr1:1; /* RW */
unsigned long lh1_aoerr1:1; /* RW */
unsigned long gr0_aoerr1:1; /* RW */
unsigned long gr1_aoerr1:1; /* RW */
unsigned long xb_aoerr1:1; /* RW */
- unsigned long rt_aoerr1:1; /* RW */
+ unsigned long rtq0_aoerr1:1; /* RW */
+ unsigned long rtq1_aoerr1:1; /* RW */
+ unsigned long rtq2_aoerr1:1; /* RW */
+ unsigned long rtq3_aoerr1:1; /* RW */
unsigned long ni0_aoerr1:1; /* RW */
unsigned long ni1_aoerr1:1; /* RW */
unsigned long system_shutdown_int:1; /* RW */
@@ -448,9 +704,7 @@
unsigned long extio_int1:1; /* RW */
unsigned long extio_int2:1; /* RW */
unsigned long extio_int3:1; /* RW */
- unsigned long profile_int:1; /* RW */
- unsigned long rsvd_59_63:5;
- } sx;
+ } s4;
};
/* ========================================================================= */
@@ -464,11 +718,21 @@
/* UVH_EXTIO_INT0_BROADCAST */
/* ========================================================================= */
#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
-#define UVH_EXTIO_INT0_BROADCAST_32 0x3f0
+
+#define UV1H_EXTIO_INT0_BROADCAST_32 0x3f0
+#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0
+#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0
+#define UV4H_EXTIO_INT0_BROADCAST_32 0x310
+#define UVH_EXTIO_INT0_BROADCAST_32 ( \
+ is_uv1_hub() ? UV1H_EXTIO_INT0_BROADCAST_32 : \
+ is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 : \
+ is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 : \
+ /*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32)
#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT 0
#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK 0x0000000000000001UL
+
union uvh_extio_int0_broadcast_u {
unsigned long v;
struct uvh_extio_int0_broadcast_s {
@@ -499,6 +763,7 @@
#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
union uvh_gr0_tlb_int0_config_u {
unsigned long v;
struct uvh_gr0_tlb_int0_config_s {
@@ -537,6 +802,7 @@
#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
union uvh_gr0_tlb_int1_config_u {
unsigned long v;
struct uvh_gr0_tlb_int1_config_s {
@@ -559,19 +825,18 @@
#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL
#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UVH_GR0_TLB_MMR_CONTROL \
- (is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \
- (is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
- UV3H_GR0_TLB_MMR_CONTROL))
+#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL
+#define UVH_GR0_TLB_MMR_CONTROL ( \
+ is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \
+ is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \
+ is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL : \
+ /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL)
#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
@@ -601,14 +866,11 @@
#define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
@@ -651,12 +913,45 @@
#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0
+#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 13
+#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
+#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
+#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59
+#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL
+#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL
+
+#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK ( \
+ is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
+ is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
+ is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK : \
+ /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK)
+#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK ( \
+ is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
+ is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
+ is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK : \
+ /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK)
+#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT ( \
+ is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
+ is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
+ is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT : \
+ /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT)
+
union uvh_gr0_tlb_mmr_control_u {
unsigned long v;
struct uvh_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
+ unsigned long rsvd_0_15:16;
unsigned long auto_valid_en:1; /* RW */
unsigned long rsvd_17_19:3;
unsigned long mmr_hash_index_en:1; /* RW */
@@ -690,9 +985,7 @@
unsigned long rsvd_61_63:3;
} s1;
struct uvxh_gr0_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
+ unsigned long rsvd_0_15:16;
unsigned long auto_valid_en:1; /* RW */
unsigned long rsvd_17_19:3;
unsigned long mmr_hash_index_en:1; /* RW */
@@ -703,8 +996,7 @@
unsigned long rsvd_33_47:15;
unsigned long rsvd_48:1;
unsigned long rsvd_49_51:3;
- unsigned long rsvd_52:1;
- unsigned long rsvd_53_63:11;
+ unsigned long rsvd_52_63:12;
} sx;
struct uv2h_gr0_tlb_mmr_control_s {
unsigned long index:12; /* RW */
@@ -741,6 +1033,24 @@
unsigned long undef_52:1; /* Undefined */
unsigned long rsvd_53_63:11;
} s3;
+ struct uv4h_gr0_tlb_mmr_control_s {
+ unsigned long index:13; /* RW */
+ unsigned long mem_sel:2; /* RW */
+ unsigned long rsvd_15:1;
+ unsigned long auto_valid_en:1; /* RW */
+ unsigned long rsvd_17_19:3;
+ unsigned long mmr_hash_index_en:1; /* RW */
+ unsigned long ecc_sel:1; /* RW */
+ unsigned long rsvd_22_29:8;
+ unsigned long mmr_write:1; /* WP */
+ unsigned long mmr_read:1; /* WP */
+ unsigned long mmr_op_done:1; /* RW */
+ unsigned long rsvd_33_47:15;
+ unsigned long undef_48:1; /* Undefined */
+ unsigned long rsvd_49_51:3;
+ unsigned long rsvd_52_58:7;
+ unsigned long page_size:5; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -749,19 +1059,14 @@
#define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL
#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI \
- (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \
- (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
- UV3H_GR0_TLB_MMR_READ_DATA_HI))
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL
+#define UVH_GR0_TLB_MMR_READ_DATA_HI ( \
+ is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \
+ is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \
+ is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI : \
+ /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI)
#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -773,13 +1078,6 @@
#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -803,15 +1101,24 @@
#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT 34
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 49
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
+
+
union uvh_gr0_tlb_mmr_read_data_hi_u {
unsigned long v;
- struct uvh_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s;
struct uv1h_gr0_tlb_mmr_read_data_hi_s {
unsigned long pfn:41; /* RO */
unsigned long gaa:2; /* RO */
@@ -819,13 +1126,6 @@
unsigned long larger:1; /* RO */
unsigned long rsvd_45_63:19;
} s1;
- struct uvxh_gr0_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } sx;
struct uv2h_gr0_tlb_mmr_read_data_hi_s {
unsigned long pfn:41; /* RO */
unsigned long gaa:2; /* RO */
@@ -842,6 +1142,16 @@
unsigned long undef_46_54:9; /* Undefined */
unsigned long way_ecc:9; /* RO */
} s3;
+ struct uv4h_gr0_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:34; /* RO */
+ unsigned long pnid:15; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long aa_ext:1; /* RO */
+ unsigned long undef_54:1; /* Undefined */
+ unsigned long way_ecc:9; /* RO */
+ } s4;
};
/* ========================================================================= */
@@ -850,10 +1160,12 @@
#define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL
#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO \
- (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \
- (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
- UV3H_GR0_TLB_MMR_READ_DATA_LO))
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL
+#define UVH_GR0_TLB_MMR_READ_DATA_LO ( \
+ is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \
+ is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \
+ is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO : \
+ /*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO)
#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -890,6 +1202,14 @@
#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+
union uvh_gr0_tlb_mmr_read_data_lo_u {
unsigned long v;
struct uvh_gr0_tlb_mmr_read_data_lo_s {
@@ -917,12 +1237,25 @@
unsigned long asid:24; /* RO */
unsigned long valid:1; /* RO */
} s3;
+ struct uv4h_gr0_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s4;
};
/* ========================================================================= */
/* UVH_GR1_TLB_INT0_CONFIG */
/* ========================================================================= */
-#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL
+#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL
+#define UVH_GR1_TLB_INT0_CONFIG ( \
+ is_uv1_hub() ? UV1H_GR1_TLB_INT0_CONFIG : \
+ is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG : \
+ is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG : \
+ /*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG)
#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0
#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8
@@ -941,6 +1274,7 @@
#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL
#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
union uvh_gr1_tlb_int0_config_u {
unsigned long v;
struct uvh_gr1_tlb_int0_config_s {
@@ -960,7 +1294,15 @@
/* ========================================================================= */
/* UVH_GR1_TLB_INT1_CONFIG */
/* ========================================================================= */
-#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL
+#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL
+#define UVH_GR1_TLB_INT1_CONFIG ( \
+ is_uv1_hub() ? UV1H_GR1_TLB_INT1_CONFIG : \
+ is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG : \
+ is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG : \
+ /*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG)
#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0
#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8
@@ -979,6 +1321,7 @@
#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL
#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
union uvh_gr1_tlb_int1_config_u {
unsigned long v;
struct uvh_gr1_tlb_int1_config_s {
@@ -1001,19 +1344,18 @@
#define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL
#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UVH_GR1_TLB_MMR_CONTROL \
- (is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \
- (is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
- UV3H_GR1_TLB_MMR_CONTROL))
+#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL
+#define UVH_GR1_TLB_MMR_CONTROL ( \
+ is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \
+ is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \
+ is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL : \
+ /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL)
#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
-#define UVH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
@@ -1043,14 +1385,11 @@
#define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL
#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
-#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12
#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
-#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL
-#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL
#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
@@ -1093,12 +1432,30 @@
#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0
+#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 13
+#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20
+#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32
+#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT 59
+#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000001fffUL
+#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000006000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL
+#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK 0xf800000000000000UL
+
+
union uvh_gr1_tlb_mmr_control_u {
unsigned long v;
struct uvh_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
+ unsigned long rsvd_0_15:16;
unsigned long auto_valid_en:1; /* RW */
unsigned long rsvd_17_19:3;
unsigned long mmr_hash_index_en:1; /* RW */
@@ -1132,9 +1489,7 @@
unsigned long rsvd_61_63:3;
} s1;
struct uvxh_gr1_tlb_mmr_control_s {
- unsigned long index:12; /* RW */
- unsigned long mem_sel:2; /* RW */
- unsigned long rsvd_14_15:2;
+ unsigned long rsvd_0_15:16;
unsigned long auto_valid_en:1; /* RW */
unsigned long rsvd_17_19:3;
unsigned long mmr_hash_index_en:1; /* RW */
@@ -1145,8 +1500,7 @@
unsigned long rsvd_33_47:15;
unsigned long rsvd_48:1;
unsigned long rsvd_49_51:3;
- unsigned long rsvd_52:1;
- unsigned long rsvd_53_63:11;
+ unsigned long rsvd_52_63:12;
} sx;
struct uv2h_gr1_tlb_mmr_control_s {
unsigned long index:12; /* RW */
@@ -1183,6 +1537,24 @@
unsigned long undef_52:1; /* Undefined */
unsigned long rsvd_53_63:11;
} s3;
+ struct uv4h_gr1_tlb_mmr_control_s {
+ unsigned long index:13; /* RW */
+ unsigned long mem_sel:2; /* RW */
+ unsigned long rsvd_15:1;
+ unsigned long auto_valid_en:1; /* RW */
+ unsigned long rsvd_17_19:3;
+ unsigned long mmr_hash_index_en:1; /* RW */
+ unsigned long ecc_sel:1; /* RW */
+ unsigned long rsvd_22_29:8;
+ unsigned long mmr_write:1; /* WP */
+ unsigned long mmr_read:1; /* WP */
+ unsigned long mmr_op_done:1; /* RW */
+ unsigned long rsvd_33_47:15;
+ unsigned long undef_48:1; /* Undefined */
+ unsigned long rsvd_49_51:3;
+ unsigned long rsvd_52_58:7;
+ unsigned long page_size:5; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -1191,19 +1563,14 @@
#define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL
#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI \
- (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \
- (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
- UV3H_GR1_TLB_MMR_READ_DATA_HI))
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL
+#define UVH_GR1_TLB_MMR_READ_DATA_HI ( \
+ is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \
+ is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \
+ is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI : \
+ /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI)
#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -1215,13 +1582,6 @@
#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL
#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41
@@ -1245,15 +1605,24 @@
#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL
#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT 34
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 49
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 51
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 52
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 53
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x00000003ffffffffUL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK 0x0001fffc00000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0006000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0008000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0010000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0020000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL
+
+
union uvh_gr1_tlb_mmr_read_data_hi_u {
unsigned long v;
- struct uvh_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } s;
struct uv1h_gr1_tlb_mmr_read_data_hi_s {
unsigned long pfn:41; /* RO */
unsigned long gaa:2; /* RO */
@@ -1261,13 +1630,6 @@
unsigned long larger:1; /* RO */
unsigned long rsvd_45_63:19;
} s1;
- struct uvxh_gr1_tlb_mmr_read_data_hi_s {
- unsigned long pfn:41; /* RO */
- unsigned long gaa:2; /* RO */
- unsigned long dirty:1; /* RO */
- unsigned long larger:1; /* RO */
- unsigned long rsvd_45_63:19;
- } sx;
struct uv2h_gr1_tlb_mmr_read_data_hi_s {
unsigned long pfn:41; /* RO */
unsigned long gaa:2; /* RO */
@@ -1284,6 +1646,16 @@
unsigned long undef_46_54:9; /* Undefined */
unsigned long way_ecc:9; /* RO */
} s3;
+ struct uv4h_gr1_tlb_mmr_read_data_hi_s {
+ unsigned long pfn:34; /* RO */
+ unsigned long pnid:15; /* RO */
+ unsigned long gaa:2; /* RO */
+ unsigned long dirty:1; /* RO */
+ unsigned long larger:1; /* RO */
+ unsigned long aa_ext:1; /* RO */
+ unsigned long undef_54:1; /* Undefined */
+ unsigned long way_ecc:9; /* RO */
+ } s4;
};
/* ========================================================================= */
@@ -1292,10 +1664,12 @@
#define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL
#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO \
- (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \
- (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
- UV3H_GR1_TLB_MMR_READ_DATA_LO))
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL
+#define UVH_GR1_TLB_MMR_READ_DATA_LO ( \
+ is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \
+ is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \
+ is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO : \
+ /*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO)
#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
@@ -1332,6 +1706,14 @@
#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL
+#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL
+
+
union uvh_gr1_tlb_mmr_read_data_lo_u {
unsigned long v;
struct uvh_gr1_tlb_mmr_read_data_lo_s {
@@ -1359,6 +1741,11 @@
unsigned long asid:24; /* RO */
unsigned long valid:1; /* RO */
} s3;
+ struct uv4h_gr1_tlb_mmr_read_data_lo_s {
+ unsigned long vpn:39; /* RO */
+ unsigned long asid:24; /* RO */
+ unsigned long valid:1; /* RO */
+ } s4;
};
/* ========================================================================= */
@@ -1369,6 +1756,7 @@
#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0
#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL
+
union uvh_int_cmpb_u {
unsigned long v;
struct uvh_int_cmpb_s {
@@ -1382,12 +1770,14 @@
/* ========================================================================= */
#define UVH_INT_CMPC 0x22100UL
+
#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0
#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL
#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0
#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL
+
union uvh_int_cmpc_u {
unsigned long v;
struct uvh_int_cmpc_s {
@@ -1401,12 +1791,14 @@
/* ========================================================================= */
#define UVH_INT_CMPD 0x22180UL
+
#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0
#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL
#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0
#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL
+
union uvh_int_cmpd_u {
unsigned long v;
struct uvh_int_cmpd_s {
@@ -1419,7 +1811,16 @@
/* UVH_IPI_INT */
/* ========================================================================= */
#define UVH_IPI_INT 0x60500UL
-#define UVH_IPI_INT_32 0x348
+
+#define UV1H_IPI_INT_32 0x348
+#define UV2H_IPI_INT_32 0x348
+#define UV3H_IPI_INT_32 0x348
+#define UV4H_IPI_INT_32 0x268
+#define UVH_IPI_INT_32 ( \
+ is_uv1_hub() ? UV1H_IPI_INT_32 : \
+ is_uv2_hub() ? UV2H_IPI_INT_32 : \
+ is_uv3_hub() ? UV3H_IPI_INT_32 : \
+ /*is_uv4_hub*/ UV4H_IPI_INT_32)
#define UVH_IPI_INT_VECTOR_SHFT 0
#define UVH_IPI_INT_DELIVERY_MODE_SHFT 8
@@ -1432,6 +1833,7 @@
#define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL
#define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL
+
union uvh_ipi_int_u {
unsigned long v;
struct uvh_ipi_int_s {
@@ -1448,103 +1850,269 @@
/* ========================================================================= */
/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
+#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST")
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST ( \
+ is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
+ is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
+ is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST)
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
+
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+
union uvh_lb_bau_intd_payload_queue_first_u {
unsigned long v;
- struct uvh_lb_bau_intd_payload_queue_first_s {
+ struct uv1h_lb_bau_intd_payload_queue_first_s {
unsigned long rsvd_0_3:4;
unsigned long address:39; /* RW */
unsigned long rsvd_43_48:6;
unsigned long node_id:14; /* RW */
unsigned long rsvd_63:1;
- } s;
+ } s1;
+ struct uv2h_lb_bau_intd_payload_queue_first_s {
+ unsigned long rsvd_0_3:4;
+ unsigned long address:39; /* RW */
+ unsigned long rsvd_43_48:6;
+ unsigned long node_id:14; /* RW */
+ unsigned long rsvd_63:1;
+ } s2;
+ struct uv3h_lb_bau_intd_payload_queue_first_s {
+ unsigned long rsvd_0_3:4;
+ unsigned long address:39; /* RW */
+ unsigned long rsvd_43_48:6;
+ unsigned long node_id:14; /* RW */
+ unsigned long rsvd_63:1;
+ } s3;
};
/* ========================================================================= */
/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
+#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST")
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST ( \
+ is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
+ is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
+ is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST)
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
+
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+
union uvh_lb_bau_intd_payload_queue_last_u {
unsigned long v;
- struct uvh_lb_bau_intd_payload_queue_last_s {
+ struct uv1h_lb_bau_intd_payload_queue_last_s {
unsigned long rsvd_0_3:4;
unsigned long address:39; /* RW */
unsigned long rsvd_43_63:21;
- } s;
+ } s1;
+ struct uv2h_lb_bau_intd_payload_queue_last_s {
+ unsigned long rsvd_0_3:4;
+ unsigned long address:39; /* RW */
+ unsigned long rsvd_43_63:21;
+ } s2;
+ struct uv3h_lb_bau_intd_payload_queue_last_s {
+ unsigned long rsvd_0_3:4;
+ unsigned long address:39; /* RW */
+ unsigned long rsvd_43_63:21;
+ } s3;
};
/* ========================================================================= */
/* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
+#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL")
+#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL ( \
+ is_uv1_hub() ? UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
+ is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
+ is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL)
#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
+#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
+
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
+#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
+#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+
union uvh_lb_bau_intd_payload_queue_tail_u {
unsigned long v;
- struct uvh_lb_bau_intd_payload_queue_tail_s {
+ struct uv1h_lb_bau_intd_payload_queue_tail_s {
unsigned long rsvd_0_3:4;
unsigned long address:39; /* RW */
unsigned long rsvd_43_63:21;
- } s;
+ } s1;
+ struct uv2h_lb_bau_intd_payload_queue_tail_s {
+ unsigned long rsvd_0_3:4;
+ unsigned long address:39; /* RW */
+ unsigned long rsvd_43_63:21;
+ } s2;
+ struct uv3h_lb_bau_intd_payload_queue_tail_s {
+ unsigned long rsvd_0_3:4;
+ unsigned long address:39; /* RW */
+ unsigned long rsvd_43_63:21;
+ } s3;
};
/* ========================================================================= */
/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
+#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE")
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE ( \
+ is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
+ is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
+ is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE)
#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
+
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
+
union uvh_lb_bau_intd_software_acknowledge_u {
unsigned long v;
- struct uvh_lb_bau_intd_software_acknowledge_s {
+ struct uv1h_lb_bau_intd_software_acknowledge_s {
unsigned long pending_0:1; /* RW, W1C */
unsigned long pending_1:1; /* RW, W1C */
unsigned long pending_2:1; /* RW, W1C */
@@ -1562,27 +2130,84 @@
unsigned long timeout_6:1; /* RW, W1C */
unsigned long timeout_7:1; /* RW, W1C */
unsigned long rsvd_16_63:48;
- } s;
+ } s1;
+ struct uv2h_lb_bau_intd_software_acknowledge_s {
+ unsigned long pending_0:1; /* RW */
+ unsigned long pending_1:1; /* RW */
+ unsigned long pending_2:1; /* RW */
+ unsigned long pending_3:1; /* RW */
+ unsigned long pending_4:1; /* RW */
+ unsigned long pending_5:1; /* RW */
+ unsigned long pending_6:1; /* RW */
+ unsigned long pending_7:1; /* RW */
+ unsigned long timeout_0:1; /* RW */
+ unsigned long timeout_1:1; /* RW */
+ unsigned long timeout_2:1; /* RW */
+ unsigned long timeout_3:1; /* RW */
+ unsigned long timeout_4:1; /* RW */
+ unsigned long timeout_5:1; /* RW */
+ unsigned long timeout_6:1; /* RW */
+ unsigned long timeout_7:1; /* RW */
+ unsigned long rsvd_16_63:48;
+ } s2;
+ struct uv3h_lb_bau_intd_software_acknowledge_s {
+ unsigned long pending_0:1; /* RW */
+ unsigned long pending_1:1; /* RW */
+ unsigned long pending_2:1; /* RW */
+ unsigned long pending_3:1; /* RW */
+ unsigned long pending_4:1; /* RW */
+ unsigned long pending_5:1; /* RW */
+ unsigned long pending_6:1; /* RW */
+ unsigned long pending_7:1; /* RW */
+ unsigned long timeout_0:1; /* RW */
+ unsigned long timeout_1:1; /* RW */
+ unsigned long timeout_2:1; /* RW */
+ unsigned long timeout_3:1; /* RW */
+ unsigned long timeout_4:1; /* RW */
+ unsigned long timeout_5:1; /* RW */
+ unsigned long timeout_6:1; /* RW */
+ unsigned long timeout_7:1; /* RW */
+ unsigned long rsvd_16_63:48;
+ } s3;
};
/* ========================================================================= */
/* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */
/* ========================================================================= */
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
+#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS")
+#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS ( \
+ is_uv1_hub() ? UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
+ is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
+ is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS)
#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
/* ========================================================================= */
/* UVH_LB_BAU_MISC_CONTROL */
/* ========================================================================= */
-#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL
#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UVH_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL
-#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL
-#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL
+#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL
+#define UVH_LB_BAU_MISC_CONTROL ( \
+ is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL : \
+ is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL : \
+ is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL)
+
+#define UV1H_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10
+#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18
+#define UVH_LB_BAU_MISC_CONTROL_32 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_32 : \
+ is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 : \
+ is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32)
#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
@@ -1590,8 +2215,6 @@
#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
@@ -1606,8 +2229,6 @@
#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
@@ -1656,8 +2277,6 @@
#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
@@ -1679,8 +2298,6 @@
#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
@@ -1797,6 +2414,88 @@
#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10
+#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
+#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT 15
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
+#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
+#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
+#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT 37
+#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
+#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46
+#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT 48
+#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL
+#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL
+#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
+#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK 0x00000000000f8000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
+#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
+#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK 0x0000002000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL
+#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL
+
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK \
+ uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK")
+#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK ( \
+ is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
+ is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
+ is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK)
+#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT \
+ uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT")
+#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT ( \
+ is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
+ is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
+ is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT)
+#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK \
+ uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK")
+#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK ( \
+ is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
+ is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
+ is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK)
+#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT \
+ uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT")
+#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT ( \
+ is_uv1_hub() ? UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
+ is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
+ is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT)
+
union uvh_lb_bau_misc_control_u {
unsigned long v;
struct uvh_lb_bau_misc_control_s {
@@ -1806,8 +2505,7 @@
unsigned long force_lock_nop:1; /* RW */
unsigned long qpi_agent_presence_vector:3; /* RW */
unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
+ unsigned long rsvd_15_19:5;
unsigned long enable_dual_mapping_mode:1; /* RW */
unsigned long vga_io_port_decode_enable:1; /* RW */
unsigned long vga_io_port_16_bit_decode:1; /* RW */
@@ -1844,8 +2542,7 @@
unsigned long force_lock_nop:1; /* RW */
unsigned long qpi_agent_presence_vector:3; /* RW */
unsigned long descriptor_fetch_mode:1; /* RW */
- unsigned long enable_intd_soft_ack_mode:1; /* RW */
- unsigned long intd_soft_ack_timeout_period:4; /* RW */
+ unsigned long rsvd_15_19:5;
unsigned long enable_dual_mapping_mode:1; /* RW */
unsigned long vga_io_port_decode_enable:1; /* RW */
unsigned long vga_io_port_16_bit_decode:1; /* RW */
@@ -1918,13 +2615,59 @@
unsigned long rsvd_46_47:2;
unsigned long fun:16; /* RW */
} s3;
+ struct uv4h_lb_bau_misc_control_s {
+ unsigned long rejection_delay:8; /* RW */
+ unsigned long apic_mode:1; /* RW */
+ unsigned long force_broadcast:1; /* RW */
+ unsigned long force_lock_nop:1; /* RW */
+ unsigned long qpi_agent_presence_vector:3; /* RW */
+ unsigned long descriptor_fetch_mode:1; /* RW */
+ unsigned long rsvd_15_19:5;
+ unsigned long enable_dual_mapping_mode:1; /* RW */
+ unsigned long vga_io_port_decode_enable:1; /* RW */
+ unsigned long vga_io_port_16_bit_decode:1; /* RW */
+ unsigned long suppress_dest_registration:1; /* RW */
+ unsigned long programmed_initial_priority:3; /* RW */
+ unsigned long use_incoming_priority:1; /* RW */
+ unsigned long enable_programmed_initial_priority:1;/* RW */
+ unsigned long enable_automatic_apic_mode_selection:1;/* RW */
+ unsigned long apic_mode_status:1; /* RO */
+ unsigned long suppress_interrupts_to_self:1; /* RW */
+ unsigned long enable_lock_based_system_flush:1;/* RW */
+ unsigned long enable_extended_sb_status:1; /* RW */
+ unsigned long suppress_int_prio_udt_to_self:1;/* RW */
+ unsigned long use_legacy_descriptor_formats:1;/* RW */
+ unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */
+ unsigned long rsvd_37:1;
+ unsigned long thread_kill_timebase:8; /* RW */
+ unsigned long address_interleave_select:1; /* RW */
+ unsigned long rsvd_47:1;
+ unsigned long fun:16; /* RW */
+ } s4;
};
/* ========================================================================= */
/* UVH_LB_BAU_SB_ACTIVATION_CONTROL */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
+#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL)
+
+#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
+#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8
+#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32)
#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0
#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62
@@ -1933,6 +2676,7 @@
#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL
#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL
+
union uvh_lb_bau_sb_activation_control_u {
unsigned long v;
struct uvh_lb_bau_sb_activation_control_s {
@@ -1946,12 +2690,30 @@
/* ========================================================================= */
/* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0)
+
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32)
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL
+
union uvh_lb_bau_sb_activation_status_0_u {
unsigned long v;
struct uvh_lb_bau_sb_activation_status_0_s {
@@ -1962,12 +2724,30 @@
/* ========================================================================= */
/* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1)
+
+#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32)
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0
#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL
+
union uvh_lb_bau_sb_activation_status_1_u {
unsigned long v;
struct uvh_lb_bau_sb_activation_status_1_s {
@@ -1978,23 +2758,55 @@
/* ========================================================================= */
/* UVH_LB_BAU_SB_DESCRIPTOR_BASE */
/* ========================================================================= */
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE)
+
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0
+#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 ( \
+ is_uv1_hub() ? UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12
#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL
+#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+
+
+#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+
+#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
+
+#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
+
+
union uvh_lb_bau_sb_descriptor_base_u {
unsigned long v;
struct uvh_lb_bau_sb_descriptor_base_s {
unsigned long rsvd_0_11:12;
- unsigned long page_address:31; /* RW */
- unsigned long rsvd_43_48:6;
+ unsigned long rsvd_12_48:37;
unsigned long node_id:14; /* RW */
unsigned long rsvd_63:1;
} s;
+ struct uv4h_lb_bau_sb_descriptor_base_s {
+ unsigned long rsvd_0_11:12;
+ unsigned long page_address:34; /* RW */
+ unsigned long rsvd_46_48:3;
+ unsigned long node_id:14; /* RW */
+ unsigned long rsvd_63:1;
+ } s4;
};
/* ========================================================================= */
@@ -2004,6 +2816,7 @@
#define UV1H_NODE_ID 0x0UL
#define UV2H_NODE_ID 0x0UL
#define UV3H_NODE_ID 0x0UL
+#define UV4H_NODE_ID 0x0UL
#define UVH_NODE_ID_FORCE1_SHFT 0
#define UVH_NODE_ID_MANUFACTURER_SHFT 1
@@ -2080,6 +2893,26 @@
#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+#define UV4H_NODE_ID_FORCE1_SHFT 0
+#define UV4H_NODE_ID_MANUFACTURER_SHFT 1
+#define UV4H_NODE_ID_PART_NUMBER_SHFT 12
+#define UV4H_NODE_ID_REVISION_SHFT 28
+#define UV4H_NODE_ID_NODE_ID_SHFT 32
+#define UV4H_NODE_ID_ROUTER_SELECT_SHFT 48
+#define UV4H_NODE_ID_RESERVED_2_SHFT 49
+#define UV4H_NODE_ID_NODES_PER_BIT_SHFT 50
+#define UV4H_NODE_ID_NI_PORT_SHFT 57
+#define UV4H_NODE_ID_FORCE1_MASK 0x0000000000000001UL
+#define UV4H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL
+#define UV4H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL
+#define UV4H_NODE_ID_REVISION_MASK 0x00000000f0000000UL
+#define UV4H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL
+#define UV4H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL
+#define UV4H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL
+#define UV4H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL
+#define UV4H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL
+
+
union uvh_node_id_u {
unsigned long v;
struct uvh_node_id_s {
@@ -2137,17 +2970,40 @@
unsigned long ni_port:5; /* RO */
unsigned long rsvd_62_63:2;
} s3;
+ struct uv4h_node_id_s {
+ unsigned long force1:1; /* RO */
+ unsigned long manufacturer:11; /* RO */
+ unsigned long part_number:16; /* RO */
+ unsigned long revision:4; /* RO */
+ unsigned long node_id:15; /* RW */
+ unsigned long rsvd_47:1;
+ unsigned long router_select:1; /* RO */
+ unsigned long rsvd_49:1;
+ unsigned long nodes_per_bit:7; /* RO */
+ unsigned long ni_port:5; /* RO */
+ unsigned long rsvd_62_63:2;
+ } s4;
};
/* ========================================================================= */
/* UVH_NODE_PRESENT_TABLE */
/* ========================================================================= */
#define UVH_NODE_PRESENT_TABLE 0x1400UL
-#define UVH_NODE_PRESENT_TABLE_DEPTH 16
+
+#define UV1H_NODE_PRESENT_TABLE_DEPTH 16
+#define UV2H_NODE_PRESENT_TABLE_DEPTH 16
+#define UV3H_NODE_PRESENT_TABLE_DEPTH 16
+#define UV4H_NODE_PRESENT_TABLE_DEPTH 4
+#define UVH_NODE_PRESENT_TABLE_DEPTH ( \
+ is_uv1_hub() ? UV1H_NODE_PRESENT_TABLE_DEPTH : \
+ is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH : \
+ is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH : \
+ /*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH)
#define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0
#define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL
+
union uvh_node_present_table_u {
unsigned long v;
struct uvh_node_present_table_s {
@@ -2158,7 +3014,15 @@
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR)
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
@@ -2167,6 +3031,7 @@
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
unsigned long v;
struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
@@ -2182,7 +3047,15 @@
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR)
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
@@ -2191,6 +3064,7 @@
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
unsigned long v;
struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
@@ -2206,7 +3080,15 @@
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
+#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL
+#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR)
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
@@ -2215,6 +3097,7 @@
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
unsigned long v;
struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
@@ -2230,11 +3113,20 @@
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR)
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
unsigned long v;
struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
@@ -2247,11 +3139,20 @@
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR)
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
unsigned long v;
struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
@@ -2264,11 +3165,20 @@
/* ========================================================================= */
/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
+#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR)
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+
union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
unsigned long v;
struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
@@ -2281,14 +3191,17 @@
/* ========================================================================= */
/* UVH_RH_GAM_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL
#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL
#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
+#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL
+#define UVH_RH_GAM_CONFIG_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_CONFIG_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR)
-#define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
@@ -2298,9 +3211,7 @@
#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL
-#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
-#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0
@@ -2313,10 +3224,14 @@
#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL
#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6
+#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL
+
+
union uvh_rh_gam_config_mmr_u {
unsigned long v;
struct uvh_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
+ unsigned long rsvd_0_5:6;
unsigned long n_skt:4; /* RW */
unsigned long rsvd_10_63:54;
} s;
@@ -2328,7 +3243,7 @@
unsigned long rsvd_13_63:51;
} s1;
struct uvxh_rh_gam_config_mmr_s {
- unsigned long m_skt:6; /* RW */
+ unsigned long rsvd_0_5:6;
unsigned long n_skt:4; /* RW */
unsigned long rsvd_10_63:54;
} sx;
@@ -2342,20 +3257,28 @@
unsigned long n_skt:4; /* RW */
unsigned long rsvd_10_63:54;
} s3;
+ struct uv4h_rh_gam_config_mmr_s {
+ unsigned long rsvd_0_5:6;
+ unsigned long n_skt:4; /* RW */
+ unsigned long rsvd_10_63:54;
+ } s4;
};
/* ========================================================================= */
/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR)
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
@@ -2368,10 +3291,8 @@
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28
#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL
#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
@@ -2391,12 +3312,28 @@
#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL
#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK ( \
+ is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
+ is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
+ is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK)
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT ( \
+ is_uv1_hub() ? UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
+ is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
+ is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT)
+
union uvh_rh_gam_gru_overlay_config_mmr_u {
unsigned long v;
struct uvh_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_27:28;
- unsigned long base:18; /* RW */
- unsigned long rsvd_46_51:6;
+ unsigned long rsvd_0_51:52;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
unsigned long enable:1; /* RW */
@@ -2412,8 +3349,7 @@
unsigned long enable:1; /* RW */
} s1;
struct uvxh_rh_gam_gru_overlay_config_mmr_s {
- unsigned long rsvd_0_27:28;
- unsigned long base:18; /* RW */
+ unsigned long rsvd_0_45:46;
unsigned long rsvd_46_51:6;
unsigned long n_gru:4; /* RW */
unsigned long rsvd_56_62:7;
@@ -2436,6 +3372,15 @@
unsigned long mode:1; /* RW */
unsigned long enable:1; /* RW */
} s3;
+ struct uv4h_rh_gam_gru_overlay_config_mmr_s {
+ unsigned long rsvd_0_24:25;
+ unsigned long undef_25:1; /* Undefined */
+ unsigned long base:20; /* RW */
+ unsigned long rsvd_46_51:6;
+ unsigned long n_gru:4; /* RW */
+ unsigned long rsvd_56_62:7;
+ unsigned long enable:1; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -2443,6 +3388,14 @@
/* ========================================================================= */
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR)
+
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
@@ -2453,6 +3406,7 @@
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52
@@ -2462,6 +3416,7 @@
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
union uvh_rh_gam_mmioh_overlay_config_mmr_u {
unsigned long v;
struct uv1h_rh_gam_mmioh_overlay_config_mmr_s {
@@ -2485,10 +3440,15 @@
/* ========================================================================= */
/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */
/* ========================================================================= */
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR ( \
+ is_uv1_hub() ? UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
+ is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
+ is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR : \
+ /*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR)
#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
@@ -2517,6 +3477,12 @@
#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
+
+
union uvh_rh_gam_mmr_overlay_config_mmr_u {
unsigned long v;
struct uvh_rh_gam_mmr_overlay_config_mmr_s {
@@ -2550,16 +3516,31 @@
unsigned long rsvd_46_62:17;
unsigned long enable:1; /* RW */
} s3;
+ struct uv4h_rh_gam_mmr_overlay_config_mmr_s {
+ unsigned long rsvd_0_25:26;
+ unsigned long base:20; /* RW */
+ unsigned long rsvd_46_62:17;
+ unsigned long enable:1; /* RW */
+ } s4;
};
/* ========================================================================= */
/* UVH_RTC */
/* ========================================================================= */
-#define UVH_RTC 0x340000UL
+#define UV1H_RTC 0x340000UL
+#define UV2H_RTC 0x340000UL
+#define UV3H_RTC 0x340000UL
+#define UV4H_RTC 0xe0000UL
+#define UVH_RTC ( \
+ is_uv1_hub() ? UV1H_RTC : \
+ is_uv2_hub() ? UV2H_RTC : \
+ is_uv3_hub() ? UV3H_RTC : \
+ /*is_uv4_hub*/ UV4H_RTC)
#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0
#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL
+
union uvh_rtc_u {
unsigned long v;
struct uvh_rtc_s {
@@ -2590,6 +3571,7 @@
#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL
#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL
+
union uvh_rtc1_int_config_u {
unsigned long v;
struct uvh_rtc1_int_config_s {
@@ -2609,12 +3591,30 @@
/* ========================================================================= */
/* UVH_SCRATCH5 */
/* ========================================================================= */
-#define UVH_SCRATCH5 0x2d0200UL
-#define UVH_SCRATCH5_32 0x778
+#define UV1H_SCRATCH5 0x2d0200UL
+#define UV2H_SCRATCH5 0x2d0200UL
+#define UV3H_SCRATCH5 0x2d0200UL
+#define UV4H_SCRATCH5 0xb0200UL
+#define UVH_SCRATCH5 ( \
+ is_uv1_hub() ? UV1H_SCRATCH5 : \
+ is_uv2_hub() ? UV2H_SCRATCH5 : \
+ is_uv3_hub() ? UV3H_SCRATCH5 : \
+ /*is_uv4_hub*/ UV4H_SCRATCH5)
+
+#define UV1H_SCRATCH5_32 0x778
+#define UV2H_SCRATCH5_32 0x778
+#define UV3H_SCRATCH5_32 0x778
+#define UV4H_SCRATCH5_32 0x798
+#define UVH_SCRATCH5_32 ( \
+ is_uv1_hub() ? UV1H_SCRATCH5_32 : \
+ is_uv2_hub() ? UV2H_SCRATCH5_32 : \
+ is_uv3_hub() ? UV3H_SCRATCH5_32 : \
+ /*is_uv4_hub*/ UV4H_SCRATCH5_32)
#define UVH_SCRATCH5_SCRATCH5_SHFT 0
#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL
+
union uvh_scratch5_u {
unsigned long v;
struct uvh_scratch5_s {
@@ -2625,14 +3625,39 @@
/* ========================================================================= */
/* UVH_SCRATCH5_ALIAS */
/* ========================================================================= */
-#define UVH_SCRATCH5_ALIAS 0x2d0208UL
-#define UVH_SCRATCH5_ALIAS_32 0x780
+#define UV1H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV4H_SCRATCH5_ALIAS 0xb0208UL
+#define UVH_SCRATCH5_ALIAS ( \
+ is_uv1_hub() ? UV1H_SCRATCH5_ALIAS : \
+ is_uv2_hub() ? UV2H_SCRATCH5_ALIAS : \
+ is_uv3_hub() ? UV3H_SCRATCH5_ALIAS : \
+ /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS)
+
+#define UV1H_SCRATCH5_ALIAS_32 0x780
+#define UV2H_SCRATCH5_ALIAS_32 0x780
+#define UV3H_SCRATCH5_ALIAS_32 0x780
+#define UV4H_SCRATCH5_ALIAS_32 0x7a0
+#define UVH_SCRATCH5_ALIAS_32 ( \
+ is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_32 : \
+ is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 : \
+ is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 : \
+ /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32)
/* ========================================================================= */
/* UVH_SCRATCH5_ALIAS_2 */
/* ========================================================================= */
-#define UVH_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV1H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UVH_SCRATCH5_ALIAS_2 ( \
+ is_uv1_hub() ? UV1H_SCRATCH5_ALIAS_2 : \
+ is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 : \
+ is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 : \
+ /*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2)
#define UVH_SCRATCH5_ALIAS_2_32 0x788
@@ -2640,76 +3665,255 @@
/* UVXH_EVENT_OCCURRED2 */
/* ========================================================================= */
#define UVXH_EVENT_OCCURRED2 0x70100UL
-#define UVXH_EVENT_OCCURRED2_32 0xb68
-#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0
-#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1
-#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT 2
-#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT 3
-#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT 4
-#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT 5
-#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT 6
-#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT 7
-#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT 8
-#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT 9
-#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT 10
-#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT 11
-#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT 12
-#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT 13
-#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT 14
-#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT 15
-#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT 16
-#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT 17
-#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT 18
-#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT 19
-#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT 20
-#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT 21
-#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT 22
-#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT 23
-#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT 24
-#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT 25
-#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT 26
-#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT 27
-#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT 28
-#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT 29
-#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT 30
-#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT 31
-#define UVXH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
-#define UVXH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
-#define UVXH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
-#define UVXH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
-#define UVXH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
-#define UVXH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
-#define UVXH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
-#define UVXH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
-#define UVXH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
-#define UVXH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
-#define UVXH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
-#define UVXH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
-#define UVXH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
-#define UVXH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
-#define UVXH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
-#define UVXH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
-#define UVXH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
-#define UVXH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
-#define UVXH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
-#define UVXH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
-#define UVXH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
-#define UVXH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
-#define UVXH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
-#define UVXH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
-#define UVXH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
-#define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED2_32 0xb68
+#define UV3H_EVENT_OCCURRED2_32 0xb68
+#define UV4H_EVENT_OCCURRED2_32 0x608
+#define UVH_EVENT_OCCURRED2_32 ( \
+ is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 : \
+ is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 : \
+ /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32)
-union uvxh_event_occurred2_u {
+
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0
+#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1
+#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2
+#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3
+#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4
+#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5
+#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6
+#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7
+#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8
+#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9
+#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10
+#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11
+#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12
+#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13
+#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14
+#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15
+#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16
+#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17
+#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18
+#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19
+#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20
+#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21
+#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22
+#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23
+#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24
+#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25
+#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26
+#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27
+#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28
+#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29
+#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30
+#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31
+#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL
+
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT 16
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT 17
+#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT 18
+#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT 19
+#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT 20
+#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT 21
+#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT 22
+#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT 23
+#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT 24
+#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT 25
+#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT 26
+#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT 27
+#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT 28
+#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT 29
+#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT 30
+#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT 31
+#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT 32
+#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT 33
+#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT 34
+#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT 35
+#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT 36
+#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT 37
+#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT 38
+#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT 39
+#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT 40
+#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT 41
+#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT 42
+#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT 43
+#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT 44
+#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT 45
+#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT 46
+#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT 47
+#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT 48
+#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT 49
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK 0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK 0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_18_MASK 0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_19_MASK 0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_20_MASK 0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_21_MASK 0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_22_MASK 0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_23_MASK 0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_24_MASK 0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_25_MASK 0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_26_MASK 0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_27_MASK 0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_28_MASK 0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_29_MASK 0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_30_MASK 0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_31_MASK 0x0002000000000000UL
+
+#define UVXH_EVENT_OCCURRED2_RTC_1_MASK ( \
+ is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK : \
+ is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK : \
+ /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK)
+
+union uvh_event_occurred2_u {
unsigned long v;
- struct uvxh_event_occurred2_s {
+ struct uv2h_event_occurred2_s {
unsigned long rtc_0:1; /* RW */
unsigned long rtc_1:1; /* RW */
unsigned long rtc_2:1; /* RW */
@@ -2743,25 +3947,129 @@
unsigned long rtc_30:1; /* RW */
unsigned long rtc_31:1; /* RW */
unsigned long rsvd_32_63:32;
- } sx;
+ } s2;
+ struct uv3h_event_occurred2_s {
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_32_63:32;
+ } s3;
+ struct uv4h_event_occurred2_s {
+ unsigned long message_accelerator_int0:1; /* RW */
+ unsigned long message_accelerator_int1:1; /* RW */
+ unsigned long message_accelerator_int2:1; /* RW */
+ unsigned long message_accelerator_int3:1; /* RW */
+ unsigned long message_accelerator_int4:1; /* RW */
+ unsigned long message_accelerator_int5:1; /* RW */
+ unsigned long message_accelerator_int6:1; /* RW */
+ unsigned long message_accelerator_int7:1; /* RW */
+ unsigned long message_accelerator_int8:1; /* RW */
+ unsigned long message_accelerator_int9:1; /* RW */
+ unsigned long message_accelerator_int10:1; /* RW */
+ unsigned long message_accelerator_int11:1; /* RW */
+ unsigned long message_accelerator_int12:1; /* RW */
+ unsigned long message_accelerator_int13:1; /* RW */
+ unsigned long message_accelerator_int14:1; /* RW */
+ unsigned long message_accelerator_int15:1; /* RW */
+ unsigned long rtc_interval_int:1; /* RW */
+ unsigned long bau_dashboard_int:1; /* RW */
+ unsigned long rtc_0:1; /* RW */
+ unsigned long rtc_1:1; /* RW */
+ unsigned long rtc_2:1; /* RW */
+ unsigned long rtc_3:1; /* RW */
+ unsigned long rtc_4:1; /* RW */
+ unsigned long rtc_5:1; /* RW */
+ unsigned long rtc_6:1; /* RW */
+ unsigned long rtc_7:1; /* RW */
+ unsigned long rtc_8:1; /* RW */
+ unsigned long rtc_9:1; /* RW */
+ unsigned long rtc_10:1; /* RW */
+ unsigned long rtc_11:1; /* RW */
+ unsigned long rtc_12:1; /* RW */
+ unsigned long rtc_13:1; /* RW */
+ unsigned long rtc_14:1; /* RW */
+ unsigned long rtc_15:1; /* RW */
+ unsigned long rtc_16:1; /* RW */
+ unsigned long rtc_17:1; /* RW */
+ unsigned long rtc_18:1; /* RW */
+ unsigned long rtc_19:1; /* RW */
+ unsigned long rtc_20:1; /* RW */
+ unsigned long rtc_21:1; /* RW */
+ unsigned long rtc_22:1; /* RW */
+ unsigned long rtc_23:1; /* RW */
+ unsigned long rtc_24:1; /* RW */
+ unsigned long rtc_25:1; /* RW */
+ unsigned long rtc_26:1; /* RW */
+ unsigned long rtc_27:1; /* RW */
+ unsigned long rtc_28:1; /* RW */
+ unsigned long rtc_29:1; /* RW */
+ unsigned long rtc_30:1; /* RW */
+ unsigned long rtc_31:1; /* RW */
+ unsigned long rsvd_50_63:14;
+ } s4;
};
/* ========================================================================= */
/* UVXH_EVENT_OCCURRED2_ALIAS */
/* ========================================================================= */
#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
-#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70
+
+#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
+#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70
+#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610
+#define UVH_EVENT_OCCURRED2_ALIAS_32 ( \
+ is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 : \
+ is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 : \
+ /*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32)
/* ========================================================================= */
/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */
/* ========================================================================= */
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 ( \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2)
+
+#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10
+#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 ( \
+ is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \
+ is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 : \
+ /*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32)
#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
@@ -2772,6 +4080,10 @@
#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
+#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+
+
union uvxh_lb_bau_sb_activation_status_2_u {
unsigned long v;
struct uvxh_lb_bau_sb_activation_status_2_s {
@@ -2783,6 +4095,9 @@
struct uv3h_lb_bau_sb_activation_status_2_s {
unsigned long aux_error:64; /* RW */
} s3;
+ struct uv4h_lb_bau_sb_activation_status_2_s {
+ unsigned long aux_error:64; /* RW */
+ } s4;
};
/* ========================================================================= */
@@ -2823,26 +4138,6 @@
};
/* ========================================================================= */
-/* UV3H_GR1_GAM_GR_CONFIG */
-/* ========================================================================= */
-#define UV3H_GR1_GAM_GR_CONFIG 0x1000028UL
-
-#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_SHFT 0
-#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_SHFT 10
-#define UV3H_GR1_GAM_GR_CONFIG_M_SKT_MASK 0x000000000000003fUL
-#define UV3H_GR1_GAM_GR_CONFIG_SUBSPACE_MASK 0x0000000000000400UL
-
-union uv3h_gr1_gam_gr_config_u {
- unsigned long v;
- struct uv3h_gr1_gam_gr_config_s {
- unsigned long m_skt:6; /* RW */
- unsigned long undef_6_9:4; /* Undefined */
- unsigned long subspace:1; /* RW */
- unsigned long reserved:53;
- } s3;
-};
-
-/* ========================================================================= */
/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */
/* ========================================================================= */
#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
@@ -2924,5 +4219,67 @@
} s3;
};
+/* ========================================================================= */
+/* UV4H_LB_PROC_INTD_QUEUE_FIRST */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_QUEUE_FIRST 0xa4100UL
+
+#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6
+#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL
+
+union uv4h_lb_proc_intd_queue_first_u {
+ unsigned long v;
+ struct uv4h_lb_proc_intd_queue_first_s {
+ unsigned long undef_0_5:6; /* Undefined */
+ unsigned long first_payload_address:40; /* RW */
+ } s4;
+};
+
+/* ========================================================================= */
+/* UV4H_LB_PROC_INTD_QUEUE_LAST */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_QUEUE_LAST 0xa4108UL
+
+#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5
+#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL
+
+union uv4h_lb_proc_intd_queue_last_u {
+ unsigned long v;
+ struct uv4h_lb_proc_intd_queue_last_s {
+ unsigned long undef_0_4:5; /* Undefined */
+ unsigned long last_payload_address:41; /* RW */
+ } s4;
+};
+
+/* ========================================================================= */
+/* UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR 0xa4118UL
+
+#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0
+#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL
+
+union uv4h_lb_proc_intd_soft_ack_clear_u {
+ unsigned long v;
+ struct uv4h_lb_proc_intd_soft_ack_clear_s {
+ unsigned long soft_ack_pending_flags:8; /* WP */
+ } s4;
+};
+
+/* ========================================================================= */
+/* UV4H_LB_PROC_INTD_SOFT_ACK_PENDING */
+/* ========================================================================= */
+#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING 0xa4110UL
+
+#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0
+#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL
+
+union uv4h_lb_proc_intd_soft_ack_pending_u {
+ unsigned long v;
+ struct uv4h_lb_proc_intd_soft_ack_pending_s {
+ unsigned long soft_ack_flags:8; /* RW */
+ } s4;
+};
+
#endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 1ae89a2..4dcdf74 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,44 @@
struct timespec;
/**
+ * struct x86_legacy_devices - legacy x86 devices
+ *
+ * @pnpbios: this platform can have a PNPBIOS. If this is disabled the platform
+ * is known to never have a PNPBIOS.
+ *
+ * These are devices known to require LPC or ISA bus. The definition of legacy
+ * devices adheres to the ACPI 5.2.9.3 IA-PC Boot Architecture flag
+ * ACPI_FADT_LEGACY_DEVICES. These devices consist of user visible devices on
+ * the LPC or ISA bus. User visible devices are devices that have end-user
+ * accessible connectors (for example, LPT parallel port). Legacy devices on
+ * the LPC bus consist for example of serial and parallel ports, PS/2 keyboard
+ * / mouse, and the floppy disk controller. A system that lacks all known
+ * legacy devices can assume all devices can be detected exclusively via
+ * standard device enumeration mechanisms including the ACPI namespace.
+ *
+ * A system which has does not have ACPI_FADT_LEGACY_DEVICES enabled must not
+ * have any of the legacy devices enumerated below present.
+ */
+struct x86_legacy_devices {
+ int pnpbios;
+};
+
+/**
+ * struct x86_legacy_features - legacy x86 features
+ *
+ * @rtc: this device has a CMOS real-time clock present
+ * @ebda_search: it's safe to search for the EBDA signature in the hardware's
+ * low RAM
+ * @devices: legacy x86 devices, refer to struct x86_legacy_devices
+ * documentation for further details.
+ */
+struct x86_legacy_features {
+ int rtc;
+ int ebda_search;
+ struct x86_legacy_devices devices;
+};
+
+/**
* struct x86_platform_ops - platform specific runtime functions
* @calibrate_tsc: calibrate TSC
* @get_wallclock: get time from HW clock like RTC etc.
@@ -152,6 +190,14 @@
* @save_sched_clock_state: save state for sched_clock() on suspend
* @restore_sched_clock_state: restore state for sched_clock() on resume
* @apic_post_init: adjust apic if neeeded
+ * @legacy: legacy features
+ * @set_legacy_features: override legacy features. Use of this callback
+ * is highly discouraged. You should only need
+ * this if your hardware platform requires further
+ * custom fine tuning far beyong what may be
+ * possible in x86_early_init_platform_quirks() by
+ * only using the current x86_hardware_subarch
+ * semantics.
*/
struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
@@ -165,6 +211,8 @@
void (*save_sched_clock_state)(void);
void (*restore_sched_clock_state)(void);
void (*apic_post_init)(void);
+ struct x86_legacy_features legacy;
+ void (*set_legacy_features)(void);
};
struct pci_dev;
@@ -186,6 +234,8 @@
extern struct x86_platform_ops x86_platform;
extern struct x86_msi_ops x86_msi;
extern struct x86_io_apic_ops x86_io_apic_ops;
+
+extern void x86_early_init_platform_quirks(void);
extern void x86_init_noop(void);
extern void x86_init_uint_noop(unsigned int unused);
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index c54beb4..635eac5 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -550,7 +550,7 @@
#define XOR_TRY_TEMPLATES \
do { \
AVX_XOR_SPEED; \
- if (cpu_has_xmm) { \
+ if (boot_cpu_has(X86_FEATURE_XMM)) { \
xor_speed(&xor_block_pIII_sse); \
xor_speed(&xor_block_sse_pf64); \
} else if (boot_cpu_has(X86_FEATURE_MMX)) { \
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 7c0a517..22a7b18 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -167,12 +167,12 @@
#define AVX_XOR_SPEED \
do { \
- if (cpu_has_avx && cpu_has_osxsave) \
+ if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
xor_speed(&xor_block_avx); \
} while (0)
#define AVX_SELECT(FASTEST) \
- (cpu_has_avx && cpu_has_osxsave ? &xor_block_avx : FASTEST)
+ (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
#else
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 3292543..c18ce67 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -157,7 +157,46 @@
__u8 _pad9[276]; /* 0xeec */
} __attribute__((packed));
-enum {
+/**
+ * enum x86_hardware_subarch - x86 hardware subarchitecture
+ *
+ * The x86 hardware_subarch and hardware_subarch_data were added as of the x86
+ * boot protocol 2.07 to help distinguish and support custom x86 boot
+ * sequences. This enum represents accepted values for the x86
+ * hardware_subarch. Custom x86 boot sequences (not X86_SUBARCH_PC) do not
+ * have or simply *cannot* make use of natural stubs like BIOS or EFI, the
+ * hardware_subarch can be used on the Linux entry path to revector to a
+ * subarchitecture stub when needed. This subarchitecture stub can be used to
+ * set up Linux boot parameters or for special care to account for nonstandard
+ * handling of page tables.
+ *
+ * These enums should only ever be used by x86 code, and the code that uses
+ * it should be well contained and compartamentalized.
+ *
+ * KVM and Xen HVM do not have a subarch as these are expected to follow
+ * standard x86 boot entries. If there is a genuine need for "hypervisor" type
+ * that should be considered separately in the future. Future guest types
+ * should seriously consider working with standard x86 boot stubs such as
+ * the BIOS or EFI boot stubs.
+ *
+ * WARNING: this enum is only used for legacy hacks, for platform features that
+ * are not easily enumerated or discoverable. You should not ever use
+ * this for new features.
+ *
+ * @X86_SUBARCH_PC: Should be used if the hardware is enumerable using standard
+ * PC mechanisms (PCI, ACPI) and doesn't need a special boot flow.
+ * @X86_SUBARCH_LGUEST: Used for x86 hypervisor demo, lguest
+ * @X86_SUBARCH_XEN: Used for Xen guest types which follow the PV boot path,
+ * which start at asm startup_xen() entry point and later jump to the C
+ * xen_start_kernel() entry point. Both domU and dom0 type of guests are
+ * currently supportd through this PV boot path.
+ * @X86_SUBARCH_INTEL_MID: Used for Intel MID (Mobile Internet Device) platform
+ * systems which do not have the PCI legacy interfaces.
+ * @X86_SUBARCH_CE4100: Used for Intel CE media processor (CE4100) SoC for
+ * for settop boxes and media devices, the use of a subarch for CE4100
+ * is more of a hack...
+ */
+enum x86_hardware_subarch {
X86_SUBARCH_PC = 0,
X86_SUBARCH_LGUEST,
X86_SUBARCH_XEN,
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 616ebd2..9abf855 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,7 +2,11 @@
# Makefile for the linux kernel.
#
-extra-y := head_$(BITS).o head$(BITS).o head.o vmlinux.lds
+extra-y := head_$(BITS).o
+extra-y += head$(BITS).o
+extra-y += ebda.o
+extra-y += platform-quirks.o
+extra-y += vmlinux.lds
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c2f1ef..9414f84 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -136,7 +136,7 @@
{
struct acpi_table_madt *madt = NULL;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -EINVAL;
madt = (struct acpi_table_madt *)table;
@@ -445,7 +445,6 @@
polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
- acpi_penalize_sci_irq(bus_irq, trigger, polarity);
/*
* stash over-ride to indicate we've been here
@@ -913,6 +912,15 @@
static int __init acpi_parse_fadt(struct acpi_table_header *table)
{
+ if (!(acpi_gbl_FADT.boot_flags & ACPI_FADT_LEGACY_DEVICES)) {
+ pr_debug("ACPI: no legacy devices present\n");
+ x86_platform.legacy.devices.pnpbios = 0;
+ }
+
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
+ pr_debug("ACPI: not registering RTC platform device\n");
+ x86_platform.legacy.rtc = 0;
+ }
#ifdef CONFIG_X86_PM_TIMER
/* detect the location of the ACPI PM Timer */
@@ -951,7 +959,7 @@
{
int count;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
@@ -979,7 +987,7 @@
int ret;
struct acpi_subtable_proc madt_proc[2];
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
@@ -1125,7 +1133,7 @@
if (acpi_disabled || acpi_noirq)
return -ENODEV;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f9093..5cb272a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
#include <linux/stop_machine.h>
#include <linux/slab.h>
#include <linux/kdebug.h>
+#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d356987..60078a6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -607,7 +607,7 @@
long tapic = apic_read(APIC_TMCCT);
unsigned long pm = acpi_pm_read_early();
- if (cpu_has_tsc)
+ if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@
*delta = (long)res;
/* Correct the tsc counter value */
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
res = (((u64)(*deltatsc)) * pm_100ms);
do_div(res, deltapm);
apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
lapic_timer_frequency);
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@
{
unsigned long flags;
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return;
local_irq_save(flags);
@@ -1134,7 +1134,7 @@
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
- if (smp_found_config || !cpu_has_apic)
+ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/*
@@ -1227,7 +1227,7 @@
unsigned long long tsc = 0, ntsc;
long long max_loops = cpu_khz ? cpu_khz : 1000000;
- if (cpu_has_tsc)
+ if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
if (disable_apic) {
@@ -1311,7 +1311,7 @@
break;
}
if (queued) {
- if (cpu_has_tsc && cpu_khz) {
+ if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
max_loops = (cpu_khz << 10) - (ntsc - tsc);
} else
@@ -1445,7 +1445,7 @@
{
u64 msr;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return;
rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1561,7 +1561,7 @@
pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
x2apic_mode = 1;
x2apic_state = X2APIC_ON;
- } else if (!cpu_has_x2apic) {
+ } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
x2apic_state = X2APIC_DISABLED;
}
}
@@ -1632,7 +1632,7 @@
*/
static int __init detect_init_APIC(void)
{
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
pr_info("No local APIC present\n");
return -1;
}
@@ -1711,14 +1711,14 @@
goto no_apic;
case X86_VENDOR_INTEL:
if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
- (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
break;
goto no_apic;
default:
goto no_apic;
}
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
/*
* Over-ride BIOS and try to enable the local APIC only if
* "lapic" specified.
@@ -2233,19 +2233,19 @@
return -1;
}
#ifdef CONFIG_X86_64
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
disable_apic = 1;
pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
- if (!smp_found_config && !cpu_has_apic)
+ if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
return -1;
/*
* Complain if the BIOS pretends there is one.
*/
- if (!cpu_has_apic &&
+ if (!boot_cpu_has(X86_FEATURE_APIC) &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@
static int __init init_lapic_sysfs(void)
{
/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
- if (cpu_has_apic)
+ if (boot_cpu_has(X86_FEATURE_APIC))
register_syscore_ops(&lapic_syscore_ops);
return 0;
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 331a7a0..13d19ed 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,13 +100,13 @@
static u32 noop_apic_read(u32 reg)
{
- WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
return 0;
}
static void noop_apic_write(u32 reg, u32 v)
{
- WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
}
struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fdb0fbf..84e33ff 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1454,7 +1454,7 @@
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
- if (cpu_has_apic || apic_from_smp_config())
+ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 28bde88..2a0f225 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -230,7 +230,7 @@
{
int apicid, cpuid;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ef49551..a5e400a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -944,7 +944,7 @@
print_PIC();
/* don't print out if apic is not there */
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return 0;
print_local_APICs(show_lapic);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d7ce96a..2900315 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -48,12 +48,35 @@
static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
static u64 gru_dist_lmask, gru_dist_umask;
static union uvh_apicid uvh_apicid;
+
+/* info derived from CPUID */
+static struct {
+ unsigned int apicid_shift;
+ unsigned int apicid_mask;
+ unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */
+ unsigned int pnode_mask;
+ unsigned int gpa_shift;
+} uv_cpuid;
+
int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
unsigned int uv_apicid_hibits;
EXPORT_SYMBOL_GPL(uv_apicid_hibits);
static struct apic apic_x2apic_uv_x;
+static struct uv_hub_info_s uv_hub_info_node0;
+
+/* Set this to use hardware error handler instead of kernel panic */
+static int disable_uv_undefined_panic = 1;
+unsigned long uv_undefined(char *str)
+{
+ if (likely(!disable_uv_undefined_panic))
+ panic("UV: error: undefined MMR: %s\n", str);
+ else
+ pr_crit("UV: error: undefined MMR: %s\n", str);
+ return ~0ul; /* cause a machine fault */
+}
+EXPORT_SYMBOL(uv_undefined);
static unsigned long __init uv_early_read_mmr(unsigned long addr)
{
@@ -108,21 +131,71 @@
case UV3_HUB_PART_NUMBER_X:
uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
break;
+ case UV4_HUB_PART_NUMBER:
+ uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
+ break;
}
uv_hub_info->hub_revision = uv_min_hub_revision_id;
- pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+ uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
+ pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask;
+ uv_cpuid.gpa_shift = 46; /* default unless changed */
+
+ pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n",
+ node_id.s.revision, node_id.s.part_number, node_id.s.node_id,
+ m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
return pnode;
}
-static void __init early_get_apic_pnode_shift(void)
+/* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
+#define SMT_LEVEL 0 /* leaf 0xb SMT level */
+#define INVALID_TYPE 0 /* leaf 0xb sub-leaf types */
+#define SMT_TYPE 1
+#define CORE_TYPE 2
+#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
+#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
+
+static void set_x2apic_bits(void)
{
- uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
- if (!uvh_apicid.v)
- /*
- * Old bios, use default value
- */
- uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+ unsigned int eax, ebx, ecx, edx, sub_index;
+ unsigned int sid_shift;
+
+ cpuid(0, &eax, &ebx, &ecx, &edx);
+ if (eax < 0xb) {
+ pr_info("UV: CPU does not have CPUID.11\n");
+ return;
+ }
+ cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
+ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) {
+ pr_info("UV: CPUID.11 not implemented\n");
+ return;
+ }
+ sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
+ sub_index = 1;
+ do {
+ cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
+ if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
+ sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
+ break;
+ }
+ sub_index++;
+ } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
+ uv_cpuid.apicid_shift = 0;
+ uv_cpuid.apicid_mask = (~(-1 << sid_shift));
+ uv_cpuid.socketid_shift = sid_shift;
+}
+
+static void __init early_get_apic_socketid_shift(void)
+{
+ if (is_uv2_hub() || is_uv3_hub())
+ uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
+
+ set_x2apic_bits();
+
+ pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n",
+ uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
+ pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n",
+ uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
}
/*
@@ -150,13 +223,18 @@
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;
+ /* Setup early hub type field in uv_hub_info for Node 0 */
+ uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+
/*
* Determine UV arch type.
* SGI: UV100/1000
* SGI2: UV2000/3000
* SGI3: UV300 (truncated to 4 chars because of different varieties)
+ * SGI4: UV400 (truncated to 4 chars because of different varieties)
*/
uv_hub_info->hub_revision =
+ !strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
!strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
@@ -165,7 +243,7 @@
goto badbios;
pnodeid = early_get_pnodeid();
- early_get_apic_pnode_shift();
+ early_get_apic_socketid_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
x86_platform.nmi_init = uv_nmi_init;
@@ -211,17 +289,11 @@
}
EXPORT_SYMBOL_GPL(is_uv_system);
-DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
-EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+void **__uv_hub_info_list;
+EXPORT_SYMBOL_GPL(__uv_hub_info_list);
-struct uv_blade_info *uv_blade_info;
-EXPORT_SYMBOL_GPL(uv_blade_info);
-
-short *uv_node_to_blade;
-EXPORT_SYMBOL_GPL(uv_node_to_blade);
-
-short *uv_cpu_to_blade;
-EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+DEFINE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_cpu_info);
short uv_possible_blades;
EXPORT_SYMBOL_GPL(uv_possible_blades);
@@ -229,6 +301,115 @@
unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+/* the following values are used for the per node hub info struct */
+static __initdata unsigned short *_node_to_pnode;
+static __initdata unsigned short _min_socket, _max_socket;
+static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
+static __initdata struct uv_gam_range_entry *uv_gre_table;
+static __initdata struct uv_gam_parameters *uv_gp_table;
+static __initdata unsigned short *_socket_to_node;
+static __initdata unsigned short *_socket_to_pnode;
+static __initdata unsigned short *_pnode_to_socket;
+static __initdata struct uv_gam_range_s *_gr_table;
+#define SOCK_EMPTY ((unsigned short)~0)
+
+extern int uv_hub_info_version(void)
+{
+ return UV_HUB_INFO_VERSION;
+}
+EXPORT_SYMBOL(uv_hub_info_version);
+
+/* Build GAM range lookup table */
+static __init void build_uv_gr_table(void)
+{
+ struct uv_gam_range_entry *gre = uv_gre_table;
+ struct uv_gam_range_s *grt;
+ unsigned long last_limit = 0, ram_limit = 0;
+ int bytes, i, sid, lsid = -1;
+
+ if (!gre)
+ return;
+
+ bytes = _gr_table_len * sizeof(struct uv_gam_range_s);
+ grt = kzalloc(bytes, GFP_KERNEL);
+ BUG_ON(!grt);
+ _gr_table = grt;
+
+ for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ if (gre->type == UV_GAM_RANGE_TYPE_HOLE) {
+ if (!ram_limit) { /* mark hole between ram/non-ram */
+ ram_limit = last_limit;
+ last_limit = gre->limit;
+ lsid++;
+ continue;
+ }
+ last_limit = gre->limit;
+ pr_info("UV: extra hole in GAM RE table @%d\n",
+ (int)(gre - uv_gre_table));
+ continue;
+ }
+ if (_max_socket < gre->sockid) {
+ pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n",
+ gre->sockid, _max_socket,
+ (int)(gre - uv_gre_table));
+ continue;
+ }
+ sid = gre->sockid - _min_socket;
+ if (lsid < sid) { /* new range */
+ grt = &_gr_table[sid];
+ grt->base = lsid;
+ grt->nasid = gre->nasid;
+ grt->limit = last_limit = gre->limit;
+ lsid = sid;
+ continue;
+ }
+ if (lsid == sid && !ram_limit) { /* update range */
+ if (grt->limit == last_limit) { /* .. if contiguous */
+ grt->limit = last_limit = gre->limit;
+ continue;
+ }
+ }
+ if (!ram_limit) { /* non-contiguous ram range */
+ grt++;
+ grt->base = sid - 1;
+ grt->nasid = gre->nasid;
+ grt->limit = last_limit = gre->limit;
+ continue;
+ }
+ grt++; /* non-contiguous/non-ram */
+ grt->base = grt - _gr_table; /* base is this entry */
+ grt->nasid = gre->nasid;
+ grt->limit = last_limit = gre->limit;
+ lsid++;
+ }
+
+ /* shorten table if possible */
+ grt++;
+ i = grt - _gr_table;
+ if (i < _gr_table_len) {
+ void *ret;
+
+ bytes = i * sizeof(struct uv_gam_range_s);
+ ret = krealloc(_gr_table, bytes, GFP_KERNEL);
+ if (ret) {
+ _gr_table = ret;
+ _gr_table_len = i;
+ }
+ }
+
+ /* display resultant gam range table */
+ for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) {
+ int gb = grt->base;
+ unsigned long start = gb < 0 ? 0 :
+ (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT;
+ unsigned long end =
+ (unsigned long)grt->limit << UV_GAM_RANGE_SHFT;
+
+ pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n",
+ i, grt->nasid, start, end, gb);
+ }
+}
+
static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
unsigned long val;
@@ -355,7 +536,6 @@
static unsigned int uv_read_apic_id(void)
{
-
return x2apic_get_apic_id(apic_read(APIC_ID));
}
@@ -430,58 +610,38 @@
__this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
}
-/*
- * Called on boot cpu.
- */
-static __init int boot_pnode_to_blade(int pnode)
-{
- int blade;
-
- for (blade = 0; blade < uv_num_possible_blades(); blade++)
- if (pnode == uv_blade_info[blade].pnode)
- return blade;
- BUG();
-}
-
-struct redir_addr {
- unsigned long redirect;
- unsigned long alias;
-};
-
+#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH 3
#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
-static __initdata struct redir_addr redir_addrs[] = {
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
- {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
-};
-
-static unsigned char get_n_lshift(int m_val)
-{
- union uv3h_gr0_gam_gr_config_u m_gr_config;
-
- if (is_uv1_hub())
- return m_val;
-
- if (is_uv2_hub())
- return m_val == 40 ? 40 : 39;
-
- m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
- return m_gr_config.s3.m_skt;
-}
-
static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
{
union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+ unsigned long m_redirect;
+ unsigned long m_overlay;
int i;
- for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
- alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+ for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) {
+ switch (i) {
+ case 0:
+ m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR;
+ m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
+ break;
+ case 1:
+ m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR;
+ m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
+ break;
+ case 2:
+ m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR;
+ m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
+ break;
+ }
+ alias.v = uv_read_local_mmr(m_overlay);
if (alias.s.enable && alias.s.base == 0) {
*size = (1UL << alias.s.m_alias);
- redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
- *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+ redirect.v = uv_read_local_mmr(m_redirect);
+ *base = (unsigned long)redirect.s.dest_base
+ << DEST_SHIFT;
return;
}
}
@@ -544,6 +704,8 @@
{
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK;
+ unsigned long base;
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
if (!gru.s.enable) {
@@ -555,8 +717,9 @@
map_gru_distributed(gru.v);
return;
}
- map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
- gru_start_paddr = ((u64)gru.s.base << shift);
+ base = (gru.v & mask) >> shift;
+ map_high("GRU", base, shift, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)base << shift);
gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
}
@@ -595,6 +758,7 @@
},
};
+/* UV3 & UV4 have identical MMIOH overlay configs */
static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
{
union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
@@ -674,7 +838,7 @@
unsigned long mmr, base;
int shift, enable, m_io, n_io;
- if (is_uv3_hub()) {
+ if (is_uv3_hub() || is_uv4_hub()) {
/* Map both MMIOH Regions */
map_mmioh_high_uv3(0, min_pnode, max_pnode);
map_mmioh_high_uv3(1, min_pnode, max_pnode);
@@ -739,8 +903,8 @@
*/
static void uv_heartbeat(unsigned long ignored)
{
- struct timer_list *timer = &uv_hub_info->scir.timer;
- unsigned char bits = uv_hub_info->scir.state;
+ struct timer_list *timer = &uv_scir_info->timer;
+ unsigned char bits = uv_scir_info->state;
/* flip heartbeat bit */
bits ^= SCIR_CPU_HEARTBEAT;
@@ -760,14 +924,14 @@
static void uv_heartbeat_enable(int cpu)
{
- while (!uv_cpu_hub_info(cpu)->scir.enabled) {
- struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+ while (!uv_cpu_scir_info(cpu)->enabled) {
+ struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
setup_timer(timer, uv_heartbeat, cpu);
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
add_timer_on(timer, cpu);
- uv_cpu_hub_info(cpu)->scir.enabled = 1;
+ uv_cpu_scir_info(cpu)->enabled = 1;
/* also ensure that boot cpu is enabled */
cpu = 0;
@@ -777,9 +941,9 @@
#ifdef CONFIG_HOTPLUG_CPU
static void uv_heartbeat_disable(int cpu)
{
- if (uv_cpu_hub_info(cpu)->scir.enabled) {
- uv_cpu_hub_info(cpu)->scir.enabled = 0;
- del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+ if (uv_cpu_scir_info(cpu)->enabled) {
+ uv_cpu_scir_info(cpu)->enabled = 0;
+ del_timer(&uv_cpu_scir_info(cpu)->timer);
}
uv_set_cpu_scir_bits(cpu, 0xff);
}
@@ -862,28 +1026,368 @@
void uv_cpu_init(void)
{
/* CPU 0 initialization will be done via uv_system_init. */
- if (!uv_blade_info)
+ if (smp_processor_id() == 0)
return;
- uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+ uv_hub_info->nr_online_cpus++;
if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
set_x2apic_extra_bits(uv_hub_info->pnode);
}
+struct mn {
+ unsigned char m_val;
+ unsigned char n_val;
+ unsigned char m_shift;
+ unsigned char n_lshift;
+};
+
+static void get_mn(struct mn *mnp)
+{
+ union uvh_rh_gam_config_mmr_u m_n_config;
+ union uv3h_gr0_gam_gr_config_u m_gr_config;
+
+ m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
+ mnp->n_val = m_n_config.s.n_skt;
+ if (is_uv4_hub()) {
+ mnp->m_val = 0;
+ mnp->n_lshift = 0;
+ } else if (is_uv3_hub()) {
+ mnp->m_val = m_n_config.s3.m_skt;
+ m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
+ mnp->n_lshift = m_gr_config.s3.m_skt;
+ } else if (is_uv2_hub()) {
+ mnp->m_val = m_n_config.s2.m_skt;
+ mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
+ } else if (is_uv1_hub()) {
+ mnp->m_val = m_n_config.s1.m_skt;
+ mnp->n_lshift = mnp->m_val;
+ }
+ mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
+}
+
+void __init uv_init_hub_info(struct uv_hub_info_s *hub_info)
+{
+ struct mn mn = {0}; /* avoid unitialized warnings */
+ union uvh_node_id_u node_id;
+
+ get_mn(&mn);
+ hub_info->m_val = mn.m_val;
+ hub_info->n_val = mn.n_val;
+ hub_info->m_shift = mn.m_shift;
+ hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
+
+ hub_info->hub_revision = uv_hub_info->hub_revision;
+ hub_info->pnode_mask = uv_cpuid.pnode_mask;
+ hub_info->min_pnode = _min_pnode;
+ hub_info->min_socket = _min_socket;
+ hub_info->pnode_to_socket = _pnode_to_socket;
+ hub_info->socket_to_node = _socket_to_node;
+ hub_info->socket_to_pnode = _socket_to_pnode;
+ hub_info->gr_table_len = _gr_table_len;
+ hub_info->gr_table = _gr_table;
+ hub_info->gpa_mask = mn.m_val ?
+ (1UL << (mn.m_val + mn.n_val)) - 1 :
+ (1UL << uv_cpuid.gpa_shift) - 1;
+
+ node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+ hub_info->gnode_extra =
+ (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1;
+
+ hub_info->gnode_upper =
+ ((unsigned long)hub_info->gnode_extra << mn.m_val);
+
+ if (uv_gp_table) {
+ hub_info->global_mmr_base = uv_gp_table->mmr_base;
+ hub_info->global_mmr_shift = uv_gp_table->mmr_shift;
+ hub_info->global_gru_base = uv_gp_table->gru_base;
+ hub_info->global_gru_shift = uv_gp_table->gru_shift;
+ hub_info->gpa_shift = uv_gp_table->gpa_shift;
+ hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1;
+ } else {
+ hub_info->global_mmr_base =
+ uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+ ~UV_MMR_ENABLE;
+ hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
+ }
+
+ get_lowmem_redirect(
+ &hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top);
+
+ hub_info->apic_pnode_shift = uv_cpuid.socketid_shift;
+
+ /* show system specific info */
+ pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n",
+ hub_info->n_val, hub_info->m_val,
+ hub_info->m_shift, hub_info->n_lshift);
+
+ pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n",
+ hub_info->gpa_mask, hub_info->gpa_shift,
+ hub_info->pnode_mask, hub_info->apic_pnode_shift);
+
+ pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n",
+ hub_info->global_mmr_base, hub_info->global_mmr_shift,
+ hub_info->global_gru_base, hub_info->global_gru_shift);
+
+ pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n",
+ hub_info->gnode_upper, hub_info->gnode_extra);
+}
+
+static void __init decode_gam_params(unsigned long ptr)
+{
+ uv_gp_table = (struct uv_gam_parameters *)ptr;
+
+ pr_info("UV: GAM Params...\n");
+ pr_info("UV: mmr_base/shift:0x%llx/%d gru_base/shift:0x%llx/%d gpa_shift:%d\n",
+ uv_gp_table->mmr_base, uv_gp_table->mmr_shift,
+ uv_gp_table->gru_base, uv_gp_table->gru_shift,
+ uv_gp_table->gpa_shift);
+}
+
+static void __init decode_gam_rng_tbl(unsigned long ptr)
+{
+ struct uv_gam_range_entry *gre = (struct uv_gam_range_entry *)ptr;
+ unsigned long lgre = 0;
+ int index = 0;
+ int sock_min = 999999, pnode_min = 99999;
+ int sock_max = -1, pnode_max = -1;
+
+ uv_gre_table = gre;
+ for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ if (!index) {
+ pr_info("UV: GAM Range Table...\n");
+ pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+ "Range", "", "Size", "Type", "NASID",
+ "SID", "PN", "PXM");
+ }
+ pr_info(
+ "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
+ index++,
+ (unsigned long)lgre << UV_GAM_RANGE_SHFT,
+ (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
+ ((unsigned long)(gre->limit - lgre)) >>
+ (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+ gre->type, gre->nasid, gre->sockid,
+ gre->pnode, gre->pxm);
+
+ lgre = gre->limit;
+ if (sock_min > gre->sockid)
+ sock_min = gre->sockid;
+ if (sock_max < gre->sockid)
+ sock_max = gre->sockid;
+ if (pnode_min > gre->pnode)
+ pnode_min = gre->pnode;
+ if (pnode_max < gre->pnode)
+ pnode_max = gre->pnode;
+ }
+ _min_socket = sock_min;
+ _max_socket = sock_max;
+ _min_pnode = pnode_min;
+ _max_pnode = pnode_max;
+ _gr_table_len = index;
+ pr_info(
+ "UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n",
+ index, _min_socket, _max_socket, _min_pnode, _max_pnode);
+}
+
+static void __init decode_uv_systab(void)
+{
+ struct uv_systab *st;
+ int i;
+
+ st = uv_systab;
+ if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub())
+ return;
+ if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) {
+ pr_crit(
+ "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n",
+ st->revision, UV_SYSTAB_VERSION_UV4_LATEST);
+ BUG();
+ }
+
+ for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+ unsigned long ptr = st->entry[i].offset;
+
+ if (!ptr)
+ continue;
+
+ ptr = ptr + (unsigned long)st;
+
+ switch (st->entry[i].type) {
+ case UV_SYSTAB_TYPE_GAM_PARAMS:
+ decode_gam_params(ptr);
+ break;
+
+ case UV_SYSTAB_TYPE_GAM_RNG_TBL:
+ decode_gam_rng_tbl(ptr);
+ break;
+ }
+ }
+}
+
+/*
+ * Setup physical blade translations from UVH_NODE_PRESENT_TABLE
+ * .. NB: UVH_NODE_PRESENT_TABLE is going away,
+ * .. being replaced by GAM Range Table
+ */
+static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
+{
+ int i, uv_pb = 0;
+
+ pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH);
+ for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+ unsigned long np;
+
+ np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+ if (np)
+ pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
+
+ uv_pb += hweight64(np);
+ }
+ if (uv_possible_blades != uv_pb)
+ uv_possible_blades = uv_pb;
+}
+
+static void __init build_socket_tables(void)
+{
+ struct uv_gam_range_entry *gre = uv_gre_table;
+ int num, nump;
+ int cpu, i, lnid;
+ int minsock = _min_socket;
+ int maxsock = _max_socket;
+ int minpnode = _min_pnode;
+ int maxpnode = _max_pnode;
+ size_t bytes;
+
+ if (!gre) {
+ if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) {
+ pr_info("UV: No UVsystab socket table, ignoring\n");
+ return; /* not required */
+ }
+ pr_crit(
+ "UV: Error: UVsystab address translations not available!\n");
+ BUG();
+ }
+
+ /* build socket id -> node id, pnode */
+ num = maxsock - minsock + 1;
+ bytes = num * sizeof(_socket_to_node[0]);
+ _socket_to_node = kmalloc(bytes, GFP_KERNEL);
+ _socket_to_pnode = kmalloc(bytes, GFP_KERNEL);
+
+ nump = maxpnode - minpnode + 1;
+ bytes = nump * sizeof(_pnode_to_socket[0]);
+ _pnode_to_socket = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket);
+
+ for (i = 0; i < num; i++)
+ _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY;
+
+ for (i = 0; i < nump; i++)
+ _pnode_to_socket[i] = SOCK_EMPTY;
+
+ /* fill in pnode/node/addr conversion list values */
+ pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+ for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+ if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
+ continue;
+ i = gre->sockid - minsock;
+ if (_socket_to_pnode[i] != SOCK_EMPTY)
+ continue; /* duplicate */
+ _socket_to_pnode[i] = gre->pnode;
+ _socket_to_node[i] = gre->pxm;
+
+ i = gre->pnode - minpnode;
+ _pnode_to_socket[i] = gre->sockid;
+
+ pr_info(
+ "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+ gre->sockid, gre->type, gre->nasid,
+ _socket_to_pnode[gre->sockid - minsock],
+ _socket_to_node[gre->sockid - minsock],
+ _pnode_to_socket[gre->pnode - minpnode]);
+ }
+
+ /* check socket -> node values */
+ lnid = -1;
+ for_each_present_cpu(cpu) {
+ int nid = cpu_to_node(cpu);
+ int apicid, sockid;
+
+ if (lnid == nid)
+ continue;
+ lnid = nid;
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ sockid = apicid >> uv_cpuid.socketid_shift;
+ i = sockid - minsock;
+
+ if (nid != _socket_to_node[i]) {
+ pr_warn(
+ "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
+ i, sockid, gre->type, _socket_to_node[i], nid);
+ _socket_to_node[i] = nid;
+ }
+ }
+
+ /* Setup physical blade to pnode translation from GAM Range Table */
+ bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]);
+ _node_to_pnode = kmalloc(bytes, GFP_KERNEL);
+ BUG_ON(!_node_to_pnode);
+
+ for (lnid = 0; lnid < num_possible_nodes(); lnid++) {
+ unsigned short sockid;
+
+ for (sockid = minsock; sockid <= maxsock; sockid++) {
+ if (lnid == _socket_to_node[sockid - minsock]) {
+ _node_to_pnode[lnid] =
+ _socket_to_pnode[sockid - minsock];
+ break;
+ }
+ }
+ if (sockid > maxsock) {
+ pr_err("UV: socket for node %d not found!\n", lnid);
+ BUG();
+ }
+ }
+
+ /*
+ * If socket id == pnode or socket id == node for all nodes,
+ * system runs faster by removing corresponding conversion table.
+ */
+ pr_info("UV: Checking socket->node/pnode for identity maps\n");
+ if (minsock == 0) {
+ for (i = 0; i < num; i++)
+ if (_socket_to_node[i] == SOCK_EMPTY ||
+ i != _socket_to_node[i])
+ break;
+ if (i >= num) {
+ kfree(_socket_to_node);
+ _socket_to_node = NULL;
+ pr_info("UV: 1:1 socket_to_node table removed\n");
+ }
+ }
+ if (minsock == minpnode) {
+ for (i = 0; i < num; i++)
+ if (_socket_to_pnode[i] != SOCK_EMPTY &&
+ _socket_to_pnode[i] != i + minpnode)
+ break;
+ if (i >= num) {
+ kfree(_socket_to_pnode);
+ _socket_to_pnode = NULL;
+ pr_info("UV: 1:1 socket_to_pnode table removed\n");
+ }
+ }
+}
+
void __init uv_system_init(void)
{
- union uvh_rh_gam_config_mmr_u m_n_config;
- union uvh_node_id_u node_id;
- unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
- int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
- int gnode_extra, min_pnode = 999999, max_pnode = -1;
- unsigned long mmr_base, present, paddr;
- unsigned short pnode_mask;
- unsigned char n_lshift;
- char *hub = (is_uv1_hub() ? "UV100/1000" :
- (is_uv2_hub() ? "UV2000/3000" :
- (is_uv3_hub() ? "UV300" : NULL)));
+ struct uv_hub_info_s hub_info = {0};
+ int bytes, cpu, nodeid;
+ unsigned short min_pnode = 9999, max_pnode = 0;
+ char *hub = is_uv4_hub() ? "UV400" :
+ is_uv3_hub() ? "UV300" :
+ is_uv2_hub() ? "UV2000/3000" :
+ is_uv1_hub() ? "UV100/1000" : NULL;
if (!hub) {
pr_err("UV: Unknown/unsupported UV hub\n");
@@ -893,124 +1397,104 @@
map_low_mmrs();
- m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
- m_val = m_n_config.s.m_skt;
- n_val = m_n_config.s.n_skt;
- pnode_mask = (1 << n_val) - 1;
- n_lshift = get_n_lshift(m_val);
- mmr_base =
- uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
- ~UV_MMR_ENABLE;
-
- node_id.v = uv_read_local_mmr(UVH_NODE_ID);
- gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
- gnode_upper = ((unsigned long)gnode_extra << m_val);
- pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n",
- n_val, m_val, pnode_mask, gnode_upper, gnode_extra,
- n_lshift);
-
- pr_info("UV: global MMR base 0x%lx\n", mmr_base);
-
- for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
- uv_possible_blades +=
- hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
+ uv_bios_init(); /* get uv_systab for decoding */
+ decode_uv_systab();
+ build_socket_tables();
+ build_uv_gr_table();
+ uv_init_hub_info(&hub_info);
+ uv_possible_blades = num_possible_nodes();
+ if (!_node_to_pnode)
+ boot_init_possible_blades(&hub_info);
/* uv_num_possible_blades() is really the hub count */
- pr_info("UV: Found %d blades, %d hubs\n",
- is_uv1_hub() ? uv_num_possible_blades() :
- (uv_num_possible_blades() + 1) / 2,
- uv_num_possible_blades());
+ pr_info("UV: Found %d hubs, %d nodes, %d cpus\n",
+ uv_num_possible_blades(),
+ num_possible_nodes(),
+ num_possible_cpus());
- bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
- uv_blade_info = kzalloc(bytes, GFP_KERNEL);
- BUG_ON(!uv_blade_info);
-
- for (blade = 0; blade < uv_num_possible_blades(); blade++)
- uv_blade_info[blade].memory_nid = -1;
-
- get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
-
- bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
- uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!uv_node_to_blade);
- memset(uv_node_to_blade, 255, bytes);
-
- bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
- uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!uv_cpu_to_blade);
- memset(uv_cpu_to_blade, 255, bytes);
-
- blade = 0;
- for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
- present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
- for (j = 0; j < 64; j++) {
- if (!test_bit(j, &present))
- continue;
- pnode = (i * 64 + j) & pnode_mask;
- uv_blade_info[blade].pnode = pnode;
- uv_blade_info[blade].nr_possible_cpus = 0;
- uv_blade_info[blade].nr_online_cpus = 0;
- spin_lock_init(&uv_blade_info[blade].nmi_lock);
- min_pnode = min(pnode, min_pnode);
- max_pnode = max(pnode, max_pnode);
- blade++;
- }
- }
-
- uv_bios_init();
uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
&sn_region_size, &system_serial_number);
+ hub_info.coherency_domain_number = sn_coherency_id;
uv_rtc_init();
- for_each_present_cpu(cpu) {
+ bytes = sizeof(void *) * uv_num_possible_blades();
+ __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
+ BUG_ON(!__uv_hub_info_list);
+
+ bytes = sizeof(struct uv_hub_info_s);
+ for_each_node(nodeid) {
+ struct uv_hub_info_s *new_hub;
+
+ if (__uv_hub_info_list[nodeid]) {
+ pr_err("UV: Node %d UV HUB already initialized!?\n",
+ nodeid);
+ BUG();
+ }
+
+ /* Allocate new per hub info list */
+ new_hub = (nodeid == 0) ?
+ &uv_hub_info_node0 :
+ kzalloc_node(bytes, GFP_KERNEL, nodeid);
+ BUG_ON(!new_hub);
+ __uv_hub_info_list[nodeid] = new_hub;
+ new_hub = uv_hub_info_list(nodeid);
+ BUG_ON(!new_hub);
+ *new_hub = hub_info;
+
+ /* Use information from GAM table if available */
+ if (_node_to_pnode)
+ new_hub->pnode = _node_to_pnode[nodeid];
+ else /* Fill in during cpu loop */
+ new_hub->pnode = 0xffff;
+ new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
+ new_hub->memory_nid = -1;
+ new_hub->nr_possible_cpus = 0;
+ new_hub->nr_online_cpus = 0;
+ }
+
+ /* Initialize per cpu info */
+ for_each_possible_cpu(cpu) {
int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ int numa_node_id;
+ unsigned short pnode;
- nid = cpu_to_node(cpu);
- /*
- * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
- */
- uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
- uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
- uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
-
- uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
- uv_cpu_hub_info(cpu)->n_lshift = n_lshift;
-
+ nodeid = cpu_to_node(cpu);
+ numa_node_id = numa_cpu_node(cpu);
pnode = uv_apicid_to_pnode(apicid);
- blade = boot_pnode_to_blade(pnode);
- lcpu = uv_blade_info[blade].nr_possible_cpus;
- uv_blade_info[blade].nr_possible_cpus++;
- /* Any node on the blade, else will contain -1. */
- uv_blade_info[blade].memory_nid = nid;
-
- uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
- uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
- uv_cpu_hub_info(cpu)->m_val = m_val;
- uv_cpu_hub_info(cpu)->n_val = n_val;
- uv_cpu_hub_info(cpu)->numa_blade_id = blade;
- uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
- uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
- uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
- uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
- uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
- uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
- uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
- uv_node_to_blade[nid] = blade;
- uv_cpu_to_blade[cpu] = blade;
+ uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
+ uv_cpu_info_per(cpu)->blade_cpu_id =
+ uv_cpu_hub_info(cpu)->nr_possible_cpus++;
+ if (uv_cpu_hub_info(cpu)->memory_nid == -1)
+ uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
+ if (nodeid != numa_node_id && /* init memoryless node */
+ uv_hub_info_list(numa_node_id)->pnode == 0xffff)
+ uv_hub_info_list(numa_node_id)->pnode = pnode;
+ else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
+ uv_cpu_hub_info(cpu)->pnode = pnode;
+ uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid);
}
- /* Add blade/pnode info for nodes without cpus */
- for_each_online_node(nid) {
- if (uv_node_to_blade[nid] >= 0)
- continue;
- paddr = node_start_pfn(nid) << PAGE_SHIFT;
- pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
- blade = boot_pnode_to_blade(pnode);
- uv_node_to_blade[nid] = blade;
+ for_each_node(nodeid) {
+ unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
+
+ /* Add pnode info for pre-GAM list nodes without cpus */
+ if (pnode == 0xffff) {
+ unsigned long paddr;
+
+ paddr = node_start_pfn(nodeid) << PAGE_SHIFT;
+ pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
+ uv_hub_info_list(nodeid)->pnode = pnode;
+ }
+ min_pnode = min(pnode, min_pnode);
+ max_pnode = max(pnode, max_pnode);
+ pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n",
+ nodeid,
+ uv_hub_info_list(nodeid)->pnode,
+ uv_hub_info_list(nodeid)->nr_possible_cpus);
}
+ pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_mmioh_high(min_pnode, max_pnode);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 9307f18..c7364bd 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2267,7 +2267,7 @@
dmi_check_system(apm_dmi_table);
- if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) {
+ if (apm_info.bios.version == 0 || machine_is_olpc()) {
printk(KERN_INFO "apm: BIOS not found.\n");
return -ENODEV;
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 5c04246..674134e 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -80,6 +80,7 @@
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+ OFFSET(BP_init_size, boot_params, hdr.init_size);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7b76eb6..c343a54 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -565,14 +565,17 @@
* can safely set X86_FEATURE_EXTD_APICID unconditionally for families
* after 16h.
*/
- if (cpu_has_apic && c->x86 > 0x16) {
- set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
- } else if (cpu_has_apic && c->x86 >= 0xf) {
- /* check CPU config space for extended APIC ID */
- unsigned int val;
- val = read_pci_config(0, 24, 0, 0x68);
- if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+ if (boot_cpu_has(X86_FEATURE_APIC)) {
+ if (c->x86 > 0x16)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ else if (c->x86 >= 0xf) {
+ /* check CPU config space for extended APIC ID */
+ unsigned int val;
+
+ val = read_pci_config(0, 24, 0, 0x68);
+ if ((val >> 17 & 0x3) == 0x3)
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ }
}
#endif
@@ -628,6 +631,7 @@
*/
msr_set_bit(MSR_K7_HWCR, 6);
#endif
+ set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
}
static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -746,7 +750,7 @@
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
- if (cpu_has_xmm2) {
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
/* MFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8394b3d..6ef6ed9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -430,7 +430,7 @@
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
- loadsegment(gs, 0);
+ __loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
@@ -717,6 +717,13 @@
}
}
+ if (c->extended_cpuid_level >= 0x80000007) {
+ cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+ c->x86_capability[CPUID_8000_0007_EBX] = ebx;
+ c->x86_power = edx;
+ }
+
if (c->extended_cpuid_level >= 0x80000008) {
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
@@ -729,9 +736,6 @@
c->x86_phys_bits = 36;
#endif
- if (c->extended_cpuid_level >= 0x80000007)
- c->x86_power = cpuid_edx(0x80000007);
-
if (c->extended_cpuid_level >= 0x8000000a)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
@@ -862,30 +866,34 @@
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+}
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
/*
- * ESPFIX is a strange bug. All real CPUs have it. Paravirt
- * systems that run Linux at CPL > 0 may or may not have the
- * issue, but, even if they have the issue, there's absolutely
- * nothing we can do about it because we can't use the real IRET
- * instruction.
+ * Empirically, writing zero to a segment selector on AMD does
+ * not clear the base, whereas writing zero to a segment
+ * selector on Intel does clear the base. Intel's behavior
+ * allows slightly faster context switches in the common case
+ * where GS is unused by the prev and next threads.
*
- * NB: For the time being, only 32-bit kernels support
- * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
- * whether to apply espfix using paravirt hooks. If any
- * non-paravirt system ever shows up that does *not* have the
- * ESPFIX issue, we can change this.
+ * Since neither vendor documents this anywhere that I can see,
+ * detect it directly instead of hardcoding the choice by
+ * vendor.
+ *
+ * I've designated AMD's behavior as the "bug" because it's
+ * counterintuitive and less friendly.
*/
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
- do {
- extern void native_iret(void);
- if (pv_cpu_ops.iret == native_iret)
- set_cpu_bug(c, X86_BUG_ESPFIX);
- } while (0);
-#else
- set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+ unsigned long old_base, tmp;
+ rdmsrl(MSR_FS_BASE, old_base);
+ wrmsrl(MSR_FS_BASE, 1);
+ loadsegment(fs, 0);
+ rdmsrl(MSR_FS_BASE, tmp);
+ if (tmp != 0)
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+ wrmsrl(MSR_FS_BASE, old_base);
#endif
}
@@ -921,6 +929,33 @@
get_model_name(c); /* Default name */
detect_nopl(c);
+
+ detect_null_seg_behavior(c);
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+# else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1076,12 +1111,12 @@
struct tss_struct *tss;
int cpu;
+ if (!boot_cpu_has(X86_FEATURE_SEP))
+ return;
+
cpu = get_cpu();
tss = &per_cpu(cpu_tss, cpu);
- if (!boot_cpu_has(X86_FEATURE_SEP))
- goto out;
-
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
@@ -1096,7 +1131,6 @@
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
-out:
put_cpu();
}
#endif
@@ -1528,7 +1562,7 @@
pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
- cpu_has_tsc ||
+ boot_cpu_has(X86_FEATURE_TSC) ||
boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6adef9c..bd9dcd6 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
- p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+ p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
break;
case 0xe: /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e4393bf..8dae51f 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -152,9 +152,9 @@
* the TLB when any changes are made to any of the page table entries.
* The operating system must reload CR3 to cause the TLB to be flushed"
*
- * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
- * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
- * to be modified
+ * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+ * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+ * to be modified.
*/
if (c->x86 == 5 && c->x86_model == 9) {
pr_info("Disabling PGE capability bit\n");
@@ -233,7 +233,7 @@
* The Quark is also family 5, but does not have the same bug.
*/
clear_cpu_bug(c, X86_BUG_F00F);
- if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
+ if (c->x86 == 5 && c->x86_model < 9) {
static int f00f_workaround_enabled;
set_cpu_bug(c, X86_BUG_F00F);
@@ -281,7 +281,7 @@
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
- if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -456,7 +456,7 @@
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
- if (cpu_has_xmm2)
+ if (cpu_has(c, X86_FEATURE_XMM2))
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +468,7 @@
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && cpu_has_clflush &&
+ if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 2658e2a..93d824e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -26,6 +26,52 @@
static LLIST_HEAD(mce_event_llist);
static char gen_pool_buf[MCE_POOLSZ];
+/*
+ * Compare the record "t" with each of the records on list "l" to see if
+ * an equivalent one is present in the list.
+ */
+static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct mce_evt_llist *l)
+{
+ struct mce_evt_llist *node;
+ struct mce *m1, *m2;
+
+ m1 = &t->mce;
+
+ llist_for_each_entry(node, &l->llnode, llnode) {
+ m2 = &node->mce;
+
+ if (!mce_cmp(m1, m2))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * The system has panicked - we'd like to peruse the list of MCE records
+ * that have been queued, but not seen by anyone yet. The list is in
+ * reverse time order, so we need to reverse it. While doing that we can
+ * also drop duplicate records (these were logged because some banks are
+ * shared between cores or by all threads on a socket).
+ */
+struct llist_node *mce_gen_pool_prepare_records(void)
+{
+ struct llist_node *head;
+ LLIST_HEAD(new_head);
+ struct mce_evt_llist *node, *t;
+
+ head = llist_del_all(&mce_event_llist);
+ if (!head)
+ return NULL;
+
+ /* squeeze out duplicates while reversing order */
+ llist_for_each_entry_safe(node, t, head, llnode) {
+ if (!is_duplicate_mce_record(node, t))
+ llist_add(&node->llnode, &new_head);
+ }
+
+ return new_head.first;
+}
+
void mce_gen_pool_process(void)
{
struct llist_node *head;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 547720e..cd74a3f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -35,6 +35,7 @@
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
+struct llist_node *mce_gen_pool_prepare_records(void);
extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@
#endif
void mce_inject_log(struct mce *m);
+
+/*
+ * We consider records to be equivalent if bank+status+addr+misc all match.
+ * This is only used when the system is going down because of a fatal error
+ * to avoid cluttering the console log with essentially repeated information.
+ * In normal processing all errors seen are logged.
+ */
+static inline bool mce_cmp(struct mce *m1, struct mce *m2)
+{
+ return m1->bank != m2->bank ||
+ m1->status != m2->status ||
+ m1->addr != m2->addr ||
+ m1->misc != m2->misc;
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766..631356c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,6 +204,33 @@
return IN_KERNEL;
}
+static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+{
+ u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+ u32 low, high;
+
+ /*
+ * We need to look at the following bits:
+ * - "succor" bit (data poisoning support), and
+ * - TCC bit (Task Context Corrupt)
+ * in MCi_STATUS to determine error severity.
+ */
+ if (!mce_flags.succor)
+ return MCE_PANIC_SEVERITY;
+
+ if (rdmsr_safe(addr, &low, &high))
+ return MCE_PANIC_SEVERITY;
+
+ /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
+ if ((low & MCI_CONFIG_MCAX) &&
+ (m->status & MCI_STATUS_TCC) &&
+ (err_ctx == IN_KERNEL))
+ return MCE_PANIC_SEVERITY;
+
+ /* ...otherwise invoke hwpoison handler. */
+ return MCE_AR_SEVERITY;
+}
+
/*
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@
* to at least kill process to prolong system operation.
*/
if (mce_flags.overflow_recov) {
+ if (mce_flags.smca)
+ return mce_severity_amd_smca(m, ctx);
+
/* software can try to contain */
if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
return MCE_PANIC_SEVERITY;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f0c921b..92e5e37 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -161,7 +161,6 @@
if (!mce_gen_pool_add(mce))
irq_work_queue(&mce_irq_work);
- mce->finished = 0;
wmb();
for (;;) {
entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@
mcelog.entry[entry].finished = 1;
wmb();
- mce->finished = 1;
set_bit(0, &mce_need_notify);
}
@@ -224,6 +222,53 @@
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+static inline u32 ctl_reg(int bank)
+{
+ return MSR_IA32_MCx_CTL(bank);
+}
+
+static inline u32 status_reg(int bank)
+{
+ return MSR_IA32_MCx_STATUS(bank);
+}
+
+static inline u32 addr_reg(int bank)
+{
+ return MSR_IA32_MCx_ADDR(bank);
+}
+
+static inline u32 misc_reg(int bank)
+{
+ return MSR_IA32_MCx_MISC(bank);
+}
+
+static inline u32 smca_ctl_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_CTL(bank);
+}
+
+static inline u32 smca_status_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_STATUS(bank);
+}
+
+static inline u32 smca_addr_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_ADDR(bank);
+}
+
+static inline u32 smca_misc_reg(int bank)
+{
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+}
+
+struct mca_msr_regs msr_ops = {
+ .ctl = ctl_reg,
+ .status = status_reg,
+ .addr = addr_reg,
+ .misc = misc_reg
+};
+
static void print_mce(struct mce *m)
{
int ret = 0;
@@ -290,7 +335,9 @@
static void mce_panic(const char *msg, struct mce *final, char *exp)
{
- int i, apei_err = 0;
+ int apei_err = 0;
+ struct llist_node *pending;
+ struct mce_evt_llist *l;
if (!fake_panic) {
/*
@@ -307,11 +354,10 @@
if (atomic_inc_return(&mce_fake_panicked) > 1)
return;
}
+ pending = mce_gen_pool_prepare_records();
/* First print corrected ones that are still unlogged */
- for (i = 0; i < MCE_LOG_LEN; i++) {
- struct mce *m = &mcelog.entry[i];
- if (!(m->status & MCI_STATUS_VAL))
- continue;
+ llist_for_each_entry(l, pending, llnode) {
+ struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC)) {
print_mce(m);
if (!apei_err)
@@ -319,13 +365,11 @@
}
}
/* Now print uncorrected but with the final one last */
- for (i = 0; i < MCE_LOG_LEN; i++) {
- struct mce *m = &mcelog.entry[i];
- if (!(m->status & MCI_STATUS_VAL))
- continue;
+ llist_for_each_entry(l, pending, llnode) {
+ struct mce *m = &l->mce;
if (!(m->status & MCI_STATUS_UC))
continue;
- if (!final || memcmp(m, final, sizeof(struct mce))) {
+ if (!final || mce_cmp(m, final)) {
print_mce(m);
if (!apei_err)
apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
- if (msr == MSR_IA32_MCx_STATUS(bank))
+ if (msr == msr_ops.status(bank))
return offsetof(struct mce, status);
- if (msr == MSR_IA32_MCx_ADDR(bank))
+ if (msr == msr_ops.addr(bank))
return offsetof(struct mce, addr);
- if (msr == MSR_IA32_MCx_MISC(bank))
+ if (msr == msr_ops.misc(bank))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@
static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
- m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ m->misc = mce_rdmsrl(msr_ops.misc(i));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ m->addr = mce_rdmsrl(msr_ops.addr(i));
/*
* Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@
m.tsc = 0;
barrier();
- m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
@@ -654,7 +698,7 @@
/*
* Clear state for this bank.
*/
- mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ mce_wrmsrl(msr_ops.status(i), 0);
}
/*
@@ -679,7 +723,7 @@
char *tmp;
for (i = 0; i < mca_cfg.banks; i++) {
- m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m->status = mce_rdmsrl(msr_ops.status(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
@@ -830,9 +874,9 @@
atomic_add(*no_way_out, &global_nwo);
/*
- * global_nwo should be updated before mce_callin
+ * Rely on the implied barrier below, such that global_nwo
+ * is updated before mce_callin.
*/
- smp_wmb();
order = atomic_inc_return(&mce_callin);
/*
@@ -957,7 +1001,7 @@
for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
- mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ mce_wrmsrl(msr_ops.status(i), 0);
}
}
@@ -994,11 +1038,12 @@
int i;
int worst = 0;
int severity;
+
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
- int order;
+ int order = -1;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
- int lmce = 0;
+
+ /*
+ * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
+ * on Intel.
+ */
+ int lmce = 1;
/* If this CPU is offline, just bail out. */
if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@
kill_it = 1;
/*
- * Check if this MCE is signaled to only this logical processor
+ * Check if this MCE is signaled to only this logical processor,
+ * on Intel only.
*/
- if (m.mcgstatus & MCG_STATUS_LMCES)
- lmce = 1;
- else {
- /*
- * Go through all the banks in exclusion of the other CPUs.
- * This way we don't report duplicated events on shared banks
- * because the first one to see it will clear it.
- * If this is a Local MCE, then no need to perform rendezvous.
- */
+ if (m.cpuvendor == X86_VENDOR_INTEL)
+ lmce = m.mcgstatus & MCG_STATUS_LMCES;
+
+ /*
+ * Go through all banks in exclusion of the other CPUs. This way we
+ * don't report duplicated events on shared banks because the first one
+ * to see it will clear it. If this is a Local MCE, then no need to
+ * perform rendezvous.
+ */
+ if (!lmce)
order = mce_start(&no_way_out);
- }
for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@
m.addr = 0;
m.bank = i;
- m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ m.status = mce_rdmsrl(msr_ops.status(i));
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
@@ -1420,7 +1471,6 @@
enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
- int i;
if (!mca_cfg.bootlog)
m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+}
+
+static void __mcheck_cpu_init_clear_banks(void)
+{
+ int i;
for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
continue;
- wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
- wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+ wrmsrl(msr_ops.ctl(i), b->ctl);
+ wrmsrl(msr_ops.status(i), 0);
}
}
@@ -1495,7 +1550,7 @@
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
- if (c->x86 <= 17 && cfg->bootlog < 0) {
+ if (c->x86 < 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@
break;
case X86_VENDOR_AMD: {
- u32 ebx = cpuid_ebx(0x80000007);
+ mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
+ mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
+ mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
- mce_flags.overflow_recov = !!(ebx & BIT(0));
- mce_flags.succor = !!(ebx & BIT(1));
- mce_flags.smca = !!(ebx & BIT(3));
+ /*
+ * Install proper ops for Scalable MCA enabled processors
+ */
+ if (mce_flags.smca) {
+ msr_ops.ctl = smca_ctl_reg;
+ msr_ops.status = smca_status_reg;
+ msr_ops.addr = smca_addr_reg;
+ msr_ops.misc = smca_misc_reg;
+ }
mce_amd_feature_init(c);
break;
@@ -1717,6 +1780,7 @@
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2082,7 +2146,7 @@
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), 0);
+ wrmsrl(msr_ops.ctl(i), 0);
}
return;
}
@@ -2121,6 +2185,7 @@
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
+ __mcheck_cpu_init_clear_banks();
}
static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@
if (!mce_available(raw_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
+ __mcheck_cpu_init_clear_banks();
__mcheck_cpu_init_timer();
}
@@ -2413,7 +2479,7 @@
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+ wrmsrl(msr_ops.ctl(i), b->ctl);
}
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9d656fd..10b0661 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -54,14 +54,6 @@
/* Threshold LVT offset is at MSR0xC0000410[15:12] */
#define SMCA_THR_LVT_OFF 0xF000
-/*
- * OS is required to set the MCAX bit to acknowledge that it is now using the
- * new MSR ranges and new registers under each bank. It also means that the OS
- * will configure deferred errors in the new MCx_CONFIG register. If the bit is
- * not set, uncorrectable errors will cause a system panic.
- */
-#define SMCA_MCAX_EN_OFF 0x1
-
static const char * const th_names[] = {
"load_store",
"insn_fetch",
@@ -333,7 +325,7 @@
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
- addr = MSR_IA32_MCx_MISC(bank);
+ addr = msr_ops.misc(bank);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@
int offset, u32 misc_high)
{
unsigned int cpu = smp_processor_id();
+ u32 smca_low, smca_high, smca_addr;
struct threshold_block b;
int new;
@@ -369,24 +362,49 @@
b.interrupt_enable = 1;
- if (mce_flags.smca) {
- u32 smca_low, smca_high;
- u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
-
- if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
- smca_high |= SMCA_MCAX_EN_OFF;
- wrmsr(smca_addr, smca_low, smca_high);
- }
-
- /* Gather LVT offset for thresholding: */
- if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
- goto out;
-
- new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
- } else {
+ if (!mce_flags.smca) {
new = (misc_high & MASK_LVTOFF_HI) >> 20;
+ goto set_offset;
}
+ smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+
+ if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+ /*
+ * OS is required to set the MCAX bit to acknowledge that it is
+ * now using the new MSR ranges and new registers under each
+ * bank. It also means that the OS will configure deferred
+ * errors in the new MCx_CONFIG register. If the bit is not set,
+ * uncorrectable errors will cause a system panic.
+ *
+ * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
+ */
+ smca_high |= BIT(0);
+
+ /*
+ * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
+ * registers with the option of additionally logging to
+ * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
+ *
+ * This bit is usually set by BIOS to retain the old behavior
+ * for OSes that don't use the new registers. Linux supports the
+ * new registers so let's disable that additional logging here.
+ *
+ * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
+ * portion of the MSR).
+ */
+ smca_high &= ~BIT(2);
+
+ wrmsr(smca_addr, smca_low, smca_high);
+ }
+
+ /* Gather LVT offset for thresholding: */
+ if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+ goto out;
+
+ new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+
+set_offset:
offset = setup_APIC_mce_threshold(offset, new);
if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
@@ -430,12 +448,23 @@
deferred_error_interrupt_enable(c);
}
-static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+static void
+__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
+ u32 msr_status = msr_ops.status(bank);
+ u32 msr_addr = msr_ops.addr(bank);
struct mce m;
u64 status;
- rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ WARN_ON_ONCE(deferred_err && threshold_err);
+
+ if (deferred_err && mce_flags.smca) {
+ msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
+ msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
+ }
+
+ rdmsrl(msr_status, status);
+
if (!(status & MCI_STATUS_VAL))
return;
@@ -448,10 +477,11 @@
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV)
- rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+ rdmsrl(msr_addr, m.addr);
mce_log(&m);
- wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+
+ wrmsrl(msr_status, 0);
}
static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
- u64 status;
unsigned int bank;
+ u32 msr_status;
+ u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) {
- rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+ msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
+ : msr_ops.status(bank);
+
+ rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL) ||
!(status & MCI_STATUS_DEFERRED))
continue;
- __log_error(bank, false, 0);
+ __log_error(bank, true, false, 0);
break;
}
}
@@ -544,7 +578,7 @@
return;
log:
- __log_error(bank, true, ((u64)high << 32) | low);
+ __log_error(bank, false, true, ((u64)high << 32) | low);
}
/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1e8bb6c..1defb8e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -84,7 +84,7 @@
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+ if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ac780ca..6b9dc4d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -450,7 +450,7 @@
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index f8c81ba..b1086f7 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -137,7 +137,7 @@
u32 cr0;
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -170,7 +170,7 @@
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 19f5736..16e37a25 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -444,11 +444,24 @@
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ prepare_set();
+
+ pat_init();
+
+ post_set();
+ local_irq_restore(flags);
+}
+
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
- unsigned long flags;
unsigned lo, dummy;
unsigned int i;
@@ -481,15 +494,6 @@
mtrr_state_set = 1;
- /* PAT setup for BP. We need to go through sync steps here */
- local_irq_save(flags);
- prepare_set();
-
- pat_init();
-
- post_set();
- local_irq_restore(flags);
-
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}
@@ -741,7 +745,7 @@
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -771,7 +775,7 @@
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d47..7d393ec 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -752,6 +752,9 @@
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
+ if (mtrr_enabled())
+ mtrr_bp_pat_init();
+
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -759,8 +762,16 @@
}
}
- if (!mtrr_enabled())
+ if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
+
+ /*
+ * PAT initialization relies on MTRR's rendezvous handler.
+ * Skip PAT init until the handler can initialize both
+ * features independently.
+ */
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ }
}
void mtrr_ap_init(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884d..6c7ced0 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -52,6 +52,7 @@
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 364e583..8cac429 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -94,7 +94,7 @@
*/
static uint32_t __init vmware_platform(void)
{
- if (cpu_has_hypervisor) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
unsigned int eax;
unsigned int hyper_vendor_id[3];
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 1f4acd6..3fe45f8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -151,7 +151,7 @@
return;
/* Did the boot loader setup the local APIC ? */
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
if (apic_force_enable(r.start))
return;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 8efa57a..2bb25c3f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -260,19 +260,12 @@
unsigned long sp;
#endif
printk(KERN_DEFAULT
- "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP ");
-#endif
- if (debug_pagealloc_enabled())
- printk("DEBUG_PAGEALLOC ");
-#ifdef CONFIG_KASAN
- printk("KASAN");
-#endif
- printk("\n");
+ "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+ IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+ IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
+
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
return 1;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/ebda.c
similarity index 97%
rename from arch/x86/kernel/head.c
rename to arch/x86/kernel/ebda.c
index 992f442..afe65df 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/ebda.c
@@ -38,7 +38,7 @@
* that the paravirt case can handle memory setup
* correctly, without our help.
*/
- if (paravirt_enabled())
+ if (!x86_platform.legacy.ebda_search)
return;
/* end of low (conventional) memory */
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index dd9ca9b6..aad34aa 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -21,11 +21,15 @@
* We should really only care about bugs here
* anyway. Not features.
*/
-static void __init check_fpu(void)
+void __init fpu__init_check_bugs(void)
{
u32 cr0_saved;
s32 fdiv_bug;
+ /* kernel_fpu_begin/end() relies on patched alternative instructions. */
+ if (!boot_cpu_has(X86_FEATURE_FPU))
+ return;
+
/* We might have CR0::TS set already, clear it: */
cr0_saved = read_cr0();
write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@
pr_warn("Hmm, FPU with FDIV bug\n");
}
}
-
-void __init fpu__init_check_bugs(void)
-{
- /*
- * kernel_fpu_begin/end() in check_fpu() relies on the patched
- * alternative instructions.
- */
- if (cpu_has_fpu)
- check_fpu();
-}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8e37cc8..9702754 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -217,14 +217,14 @@
void fpstate_init(union fpregs_state *state)
{
- if (!cpu_has_fpu) {
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
fpstate_init_soft(&state->soft);
return;
}
memset(state, 0, xstate_size);
- if (cpu_has_fxsr)
+ if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(&state->fxsave);
else
fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
- if (!src_fpu->fpstate_active || !cpu_has_fpu)
+ if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != ¤t->thread.fpu);
@@ -506,33 +506,6 @@
* x87 math exception handling:
*/
-static inline unsigned short get_fpu_cwd(struct fpu *fpu)
-{
- if (cpu_has_fxsr) {
- return fpu->state.fxsave.cwd;
- } else {
- return (unsigned short)fpu->state.fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct fpu *fpu)
-{
- if (cpu_has_fxsr) {
- return fpu->state.fxsave.swd;
- } else {
- return (unsigned short)fpu->state.fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
-{
- if (cpu_has_xmm) {
- return fpu->state.fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
int fpu__exception_code(struct fpu *fpu, int trap_nr)
{
int err;
@@ -547,10 +520,15 @@
* so if this combination doesn't produce any single exception,
* then we have a bad program that isn't synchronizing its FPU usage
* and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
+ * fully reproduce the context of the exception.
*/
- cwd = get_fpu_cwd(fpu);
- swd = get_fpu_swd(fpu);
+ if (boot_cpu_has(X86_FEATURE_FXSR)) {
+ cwd = fpu->state.fxsave.cwd;
+ swd = fpu->state.fxsave.swd;
+ } else {
+ cwd = (unsigned short)fpu->state.fsave.cwd;
+ swd = (unsigned short)fpu->state.fsave.swd;
+ }
err = swd & ~cwd;
} else {
@@ -560,7 +538,11 @@
* unmasked exception was caught we must mask the exception mask bits
* at 0x1f80, and then use these to mask the exception bits at 0x3f.
*/
- unsigned short mxcsr = get_fpu_mxcsr(fpu);
+ unsigned short mxcsr = MXCSR_DEFAULT;
+
+ if (boot_cpu_has(X86_FEATURE_XMM))
+ mxcsr = fpu->state.fxsave.mxcsr;
+
err = ~(mxcsr >> 7) & mxcsr;
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 54c86ff..aacfd7a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -29,22 +29,22 @@
unsigned long cr0;
unsigned long cr4_mask = 0;
- if (cpu_has_fxsr)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
cr4_mask |= X86_CR4_OSFXSR;
- if (cpu_has_xmm)
+ if (boot_cpu_has(X86_FEATURE_XMM))
cr4_mask |= X86_CR4_OSXMMEXCPT;
if (cr4_mask)
cr4_set_bits(cr4_mask);
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
- if (!cpu_has_fpu)
+ if (!boot_cpu_has(X86_FEATURE_FPU))
cr0 |= X86_CR0_EM;
write_cr0(cr0);
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu)
+ if (!boot_cpu_has(X86_FEATURE_FPU))
fpstate_init_soft(¤t->thread.fpu.state.soft);
else
#endif
@@ -89,7 +89,7 @@
}
#ifndef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu) {
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
for (;;)
asm volatile("hlt");
@@ -106,7 +106,7 @@
{
unsigned int mask = 0;
- if (cpu_has_fxsr) {
+ if (boot_cpu_has(X86_FEATURE_FXSR)) {
/* Static because GCC does not get 16-byte stack alignment right: */
static struct fxregs_state fxregs __initdata;
@@ -212,7 +212,7 @@
* fpu__init_system_xstate().
*/
- if (!cpu_has_fpu) {
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
/*
* Disable xsave as we do not support it if i387
* emulation is enabled.
@@ -221,7 +221,7 @@
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
xstate_size = sizeof(struct swregs_state);
} else {
- if (cpu_has_fxsr)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
xstate_size = sizeof(struct fxregs_state);
else
xstate_size = sizeof(struct fregs_state);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 8bd1c00..81422df 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -21,7 +21,10 @@
{
struct fpu *target_fpu = &target->thread.fpu;
- return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+ if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+ return regset->n;
+ else
+ return 0;
}
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@
{
struct fpu *fpu = &target->thread.fpu;
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@
struct fpu *fpu = &target->thread.fpu;
int ret;
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@
* update the header bits in the xsave header, indicating the
* presence of FP and SSE state.
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
return ret;
@@ -79,7 +82,7 @@
struct xregs_state *xsave;
int ret;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@
struct xregs_state *xsave;
int ret;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@
fpu__activate_fpstate_read(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
+ if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
@@ -306,10 +309,10 @@
fpu__activate_fpstate_write(fpu);
fpstate_sanitize_xstate(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
+ if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
@@ -325,7 +328,7 @@
* update the header bit in the xsave header, indicating the
* presence of FP.
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
return ret;
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b48ef35..4ea2a59 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -190,7 +190,7 @@
*/
void fpu__init_cpu_xstate(void)
{
- if (!cpu_has_xsave || !xfeatures_mask)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
return;
cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@
xstate_comp_offsets[0] = 0;
xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
- if (!cpu_has_xsaves) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (xfeature_enabled(i)) {
xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;
setup_xstate_features();
print_xstate_features();
- if (cpu_has_xsaves) {
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
init_fpstate.xsave.header.xfeatures = xfeatures_mask;
}
@@ -417,7 +417,7 @@
*/
static int using_compacted_format(void)
{
- return cpu_has_xsaves;
+ return boot_cpu_has(X86_FEATURE_XSAVES);
}
static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@
unsigned int eax, ebx, ecx, edx;
unsigned int calculated_xstate_size;
- if (!cpu_has_xsaves) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
/*
* - CPUID function 0DH, sub-function 0:
* EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- if (!cpu_has_xsave) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
pr_info("x86/fpu: Legacy x87 FPU detected.\n");
return;
}
@@ -667,7 +667,7 @@
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
xfeatures_mask,
xstate_size,
- cpu_has_xsaves ? "compacted" : "standard");
+ boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
}
/*
@@ -678,7 +678,7 @@
/*
* Restore XCR0 on xsave capable CPUs:
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2911ef3..d784bb5 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -34,6 +34,8 @@
cr4_init_shadow();
sanitize_boot_params(&boot_params);
+ x86_early_init_platform_quirks();
+
/* Call the subarch specific early setup function */
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1f4422d..b72fb0b 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -182,6 +182,7 @@
if (!boot_params.hdr.version)
copy_bootdata(__va(real_mode_data));
+ x86_early_init_platform_quirks();
reserve_ebda_region();
switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index af11129..6f8902b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -555,62 +555,53 @@
*/
cld
- cmpl $2,(%esp) # X86_TRAP_NMI
- je .Lis_nmi # Ignore NMI
-
- cmpl $2,%ss:early_recursion_flag
- je hlt_loop
incl %ss:early_recursion_flag
- push %eax # 16(%esp)
- push %ecx # 12(%esp)
- push %edx # 8(%esp)
- push %ds # 4(%esp)
- push %es # 0(%esp)
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
+ /* The vector number is in pt_regs->gs */
- cmpl $(__KERNEL_CS),32(%esp)
- jne 10f
+ cld
+ pushl %fs /* pt_regs->fs */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %es /* pt_regs->es */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %ds /* pt_regs->ds */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %eax /* pt_regs->ax */
+ pushl %ebp /* pt_regs->bp */
+ pushl %edi /* pt_regs->di */
+ pushl %esi /* pt_regs->si */
+ pushl %edx /* pt_regs->dx */
+ pushl %ecx /* pt_regs->cx */
+ pushl %ebx /* pt_regs->bx */
- leal 28(%esp),%eax # Pointer to %eip
- call early_fixup_exception
- andl %eax,%eax
- jnz ex_entry /* found an exception entry */
+ /* Fix up DS and ES */
+ movl $(__KERNEL_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
-10:
-#ifdef CONFIG_PRINTK
- xorl %eax,%eax
- movw %ax,2(%esp) /* clean up the segment values on some cpus */
- movw %ax,6(%esp)
- movw %ax,34(%esp)
- leal 40(%esp),%eax
- pushl %eax /* %esp before the exception */
- pushl %ebx
- pushl %ebp
- pushl %esi
- pushl %edi
- movl %cr2,%eax
- pushl %eax
- pushl (20+6*4)(%esp) /* trapno */
- pushl $fault_msg
- call printk
-#endif
- call dump_stack
-hlt_loop:
- hlt
- jmp hlt_loop
+ /* Load the vector number into EDX */
+ movl PT_GS(%esp), %edx
-ex_entry:
- pop %es
- pop %ds
- pop %edx
- pop %ecx
- pop %eax
- decl %ss:early_recursion_flag
-.Lis_nmi:
- addl $8,%esp /* drop vector number and error code */
+ /* Load GS into pt_regs->gs and clear high bits */
+ movw %gs, PT_GS(%esp)
+ movw $0, PT_GS+2(%esp)
+
+ movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */
+ call early_fixup_exception
+
+ popl %ebx /* pt_regs->bx */
+ popl %ecx /* pt_regs->cx */
+ popl %edx /* pt_regs->dx */
+ popl %esi /* pt_regs->si */
+ popl %edi /* pt_regs->di */
+ popl %ebp /* pt_regs->bp */
+ popl %eax /* pt_regs->ax */
+ popl %ds /* pt_regs->ds */
+ popl %es /* pt_regs->es */
+ popl %fs /* pt_regs->fs */
+ popl %gs /* pt_regs->gs */
+ decl %ss:early_recursion_flag
+ addl $4, %esp /* pop pt_regs->orig_ax */
iret
ENDPROC(early_idt_handler_common)
@@ -647,10 +638,14 @@
popl %eax
#endif
iret
+
+hlt_loop:
+ hlt
+ jmp hlt_loop
ENDPROC(ignore_int)
__INITDATA
.align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
.long 0
__REFDATA
@@ -715,19 +710,6 @@
int_msg:
.asciz "Unknown interrupt or fault at: %p %p %p\n"
-fault_msg:
-/* fault info: */
- .ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
- .ascii " EDI %p ESI %p EBP %p EBX %p\n"
- .ascii " ESP %p ES %p DS %p\n"
- .ascii " EDX %p ECX %p EAX %p\n"
-/* fault frame: */
- .ascii " vec %p err %p EIP %p CS %p flg %p\n"
- .ascii "Stack: %p %p %p %p %p %p %p %p\n"
- .ascii " %p %p %p %p %p %p %p %p\n"
- .asciz " %p %p %p %p %p %p %p %p\n"
-
#include "../../x86/xen/xen-head.S"
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22fbf9d..5df831e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -20,6 +20,7 @@
#include <asm/processor-flags.h>
#include <asm/percpu.h>
#include <asm/nops.h>
+#include "../entry/calling.h"
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@
* tables and then reload them.
*/
+ /*
+ * Setup stack for verify_cpu(). "-8" because stack_start is defined
+ * this way, see below. Our best guess is a NULL ptr for stack
+ * termination heuristics and we don't want to break anything which
+ * might depend on it (kgdb, ...).
+ */
+ leaq (__end_init_task - 8)(%rip), %rsp
+
/* Sanitize CPU configuration */
call verify_cpu
@@ -350,90 +359,48 @@
*/
cld
- cmpl $2,(%rsp) # X86_TRAP_NMI
- je .Lis_nmi # Ignore NMI
-
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
incl early_recursion_flag(%rip)
- pushq %rax # 64(%rsp)
- pushq %rcx # 56(%rsp)
- pushq %rdx # 48(%rsp)
- pushq %rsi # 40(%rsp)
- pushq %rdi # 32(%rsp)
- pushq %r8 # 24(%rsp)
- pushq %r9 # 16(%rsp)
- pushq %r10 # 8(%rsp)
- pushq %r11 # 0(%rsp)
+ /* The vector number is currently in the pt_regs->di slot. */
+ pushq %rsi /* pt_regs->si */
+ movq 8(%rsp), %rsi /* RSI = vector number */
+ movq %rdi, 8(%rsp) /* pt_regs->di = RDI */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq %rax /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ pushq %r9 /* pt_regs->r9 */
+ pushq %r10 /* pt_regs->r10 */
+ pushq %r11 /* pt_regs->r11 */
+ pushq %rbx /* pt_regs->bx */
+ pushq %rbp /* pt_regs->bp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
- cmpl $__KERNEL_CS,96(%rsp)
- jne 11f
-
- cmpl $14,72(%rsp) # Page fault?
+ cmpq $14,%rsi /* Page fault? */
jnz 10f
- GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
+ GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
call early_make_pgtable
andl %eax,%eax
- jz 20f # All good
+ jz 20f /* All good */
10:
- leaq 88(%rsp),%rdi # Pointer to %rip
+ movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */
call early_fixup_exception
- andl %eax,%eax
- jnz 20f # Found an exception entry
-11:
-#ifdef CONFIG_EARLY_PRINTK
- GET_CR2_INTO(%r9) # can clobber any volatile register if pv
- movl 80(%rsp),%r8d # error code
- movl 72(%rsp),%esi # vector number
- movl 96(%rsp),%edx # %cs
- movq 88(%rsp),%rcx # %rip
- xorl %eax,%eax
- leaq early_idt_msg(%rip),%rdi
- call early_printk
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
- call dump_stack
-#ifdef CONFIG_KALLSYMS
- leaq early_idt_ripmsg(%rip),%rdi
- movq 40(%rsp),%rsi # %rip again
- call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1: hlt
- jmp 1b
-
-20: # Exception table entry found or page table generated
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rdi
- popq %rsi
- popq %rdx
- popq %rcx
- popq %rax
+20:
decl early_recursion_flag(%rip)
-.Lis_nmi:
- addq $16,%rsp # drop vector number and error code
- INTERRUPT_RETURN
+ jmp restore_regs_and_iret
ENDPROC(early_idt_handler_common)
__INITDATA
.balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
.long 0
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
- .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
- .asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
GLOBAL(name)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a..f112af7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -54,7 +54,7 @@
char name[10];
};
-inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
+static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev)
{
return container_of(evtdev, struct hpet_dev, evt);
}
@@ -773,7 +773,6 @@
.mask = HPET_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.resume = hpet_resume_counter,
- .archdata = { .vclock_mode = VCLOCK_HPET },
};
static int hpet_clocksource_register(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e565e0e..fc25f69 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -13,6 +13,7 @@
#include <linux/cpu.h>
#include <asm/kprobes.h>
#include <asm/alternative.h>
+#include <asm/text-patching.h>
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 2da6ee9..04cde52 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -45,6 +45,7 @@
#include <linux/uaccess.h>
#include <linux/memory.h>
+#include <asm/text-patching.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ae703ac..38cf7a7 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -51,6 +51,7 @@
#include <linux/ftrace.h>
#include <linux/frame.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7b3b9d1..4425f59 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -29,6 +29,7 @@
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8079508..eea2a6f 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -285,14 +285,6 @@
{
pv_info.name = "KVM";
- /*
- * KVM isn't paravirt in the sense of paravirt_enabled. A KVM
- * guest kernel works like a bare metal kernel with additional
- * features, and paravirt_enabled is about features that are
- * missing.
- */
- pv_info.paravirt_enabled = 0;
-
if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
pv_cpu_ops.io_delay = kvm_io_delay;
@@ -522,7 +514,7 @@
if (boot_cpu_data.cpuid_level < 0)
return 0; /* So we don't blow up on old processors */
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
return 0;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 005c03e..477ae80 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -31,6 +31,7 @@
#include <linux/jump_label.h>
#include <linux/random.h>
+#include <asm/text-patching.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f08ac28..7b3b3f2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -294,7 +294,6 @@
struct pv_info pv_info = {
.name = "bare hardware",
- .paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
@@ -339,8 +338,10 @@
.write_cr8 = native_write_cr8,
#endif
.wbinvd = native_wbinvd,
- .read_msr = native_read_msr_safe,
- .write_msr = native_write_msr_safe,
+ .read_msr = native_read_msr,
+ .write_msr = native_write_msr,
+ .read_msr_safe = native_read_msr_safe,
+ .write_msr_safe = native_write_msr_safe,
.read_pmc = native_read_pmc,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index 35ccf75..f712dfd 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -72,7 +72,7 @@
}
}
#else
-inline void check_iommu_entries(struct iommu_table_entry *start,
+void __init check_iommu_entries(struct iommu_table_entry *start,
struct iommu_table_entry *finish)
{
}
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
new file mode 100644
index 0000000..b2f8a33
--- /dev/null
+++ b/arch/x86/kernel/platform-quirks.c
@@ -0,0 +1,35 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+#include <asm/bios_ebda.h>
+
+void __init x86_early_init_platform_quirks(void)
+{
+ x86_platform.legacy.rtc = 1;
+ x86_platform.legacy.ebda_search = 0;
+ x86_platform.legacy.devices.pnpbios = 1;
+
+ switch (boot_params.hdr.hardware_subarch) {
+ case X86_SUBARCH_PC:
+ x86_platform.legacy.ebda_search = 1;
+ break;
+ case X86_SUBARCH_XEN:
+ case X86_SUBARCH_LGUEST:
+ case X86_SUBARCH_INTEL_MID:
+ case X86_SUBARCH_CE4100:
+ x86_platform.legacy.devices.pnpbios = 0;
+ x86_platform.legacy.rtc = 0;
+ break;
+ }
+
+ if (x86_platform.set_legacy_features)
+ x86_platform.set_legacy_features();
+}
+
+#if defined(CONFIG_PNPBIOS)
+bool __init arch_pnpbios_disabled(void)
+{
+ return x86_platform.legacy.devices.pnpbios == 0;
+}
+#endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31..6b16c36 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,25 +136,6 @@
}
}
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
- struct user_desc ud = {
- .base_addr = addr,
- .limit = 0xfffff,
- .seg_32bit = 1,
- .limit_in_pages = 1,
- .useable = 1,
- };
- struct desc_struct *desc = t->thread.tls_array;
- desc += tls;
- fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
- return get_desc_base(&t->thread.tls_array[tls]);
-}
-
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -169,9 +150,9 @@
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
- p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
+ p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
savesegment(fs, p->thread.fsindex);
- p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
+ p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
- if (is_ia32_task())
+ if (in_ia32_syscall())
err = do_set_thread_area(p, -1,
(struct user_desc __user *)tls, 0);
else
@@ -282,7 +263,7 @@
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- unsigned fsindex, gsindex;
+ unsigned prev_fsindex, prev_gsindex;
fpu_switch_t fpu_switch;
fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@
*
* (e.g. xen_load_tls())
*/
- savesegment(fs, fsindex);
- savesegment(gs, gsindex);
+ savesegment(fs, prev_fsindex);
+ savesegment(gs, prev_gsindex);
/*
* Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@
* Switch FS and GS.
*
* These are even more complicated than DS and ES: they have
- * 64-bit bases are that controlled by arch_prctl. Those bases
- * only differ from the values in the GDT or LDT if the selector
- * is 0.
+ * 64-bit bases are that controlled by arch_prctl. The bases
+ * don't necessarily match the selectors, as user code can do
+ * any number of things to cause them to be inconsistent.
*
- * Loading the segment register resets the hidden base part of
- * the register to 0 or the value from the GDT / LDT. If the
- * next base address zero, writing 0 to the segment register is
- * much faster than using wrmsr to explicitly zero the base.
+ * We don't promise to preserve the bases if the selectors are
+ * nonzero. We also don't promise to preserve the base if the
+ * selector is zero and the base doesn't match whatever was
+ * most recently passed to ARCH_SET_FS/GS. (If/when the
+ * FSGSBASE instructions are enabled, we'll need to offer
+ * stronger guarantees.)
*
- * The thread_struct.fs and thread_struct.gs values are 0
- * if the fs and gs bases respectively are not overridden
- * from the values implied by fsindex and gsindex. They
- * are nonzero, and store the nonzero base addresses, if
- * the bases are overridden.
- *
- * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
- * be impossible.
- *
- * Therefore we need to reload the segment registers if either
- * the old or new selector is nonzero, and we need to override
- * the base address if next thread expects it to be overridden.
- *
- * This code is unnecessarily slow in the case where the old and
- * new indexes are zero and the new base is nonzero -- it will
- * unnecessarily write 0 to the selector before writing the new
- * base address.
- *
- * Note: This all depends on arch_prctl being the only way that
- * user code can override the segment base. Once wrfsbase and
- * wrgsbase are enabled, most of this code will need to change.
+ * As an invariant,
+ * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
+ * impossible.
*/
- if (unlikely(fsindex | next->fsindex | prev->fs)) {
+ if (next->fsindex) {
+ /* Loading a nonzero value into FS sets the index and base. */
loadsegment(fs, next->fsindex);
-
- /*
- * If user code wrote a nonzero value to FS, then it also
- * cleared the overridden base address.
- *
- * XXX: if user code wrote 0 to FS and cleared the base
- * address itself, we won't notice and we'll incorrectly
- * restore the prior base address next time we reschdule
- * the process.
- */
- if (fsindex)
- prev->fs = 0;
+ } else {
+ if (next->fsbase) {
+ /* Next index is zero but next base is nonzero. */
+ if (prev_fsindex)
+ loadsegment(fs, 0);
+ wrmsrl(MSR_FS_BASE, next->fsbase);
+ } else {
+ /* Next base and index are both zero. */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ /*
+ * We don't know the previous base and can't
+ * find out without RDMSR. Forcibly clear it.
+ */
+ loadsegment(fs, __USER_DS);
+ loadsegment(fs, 0);
+ } else {
+ /*
+ * If the previous index is zero and ARCH_SET_FS
+ * didn't change the base, then the base is
+ * also zero and we don't need to do anything.
+ */
+ if (prev->fsbase || prev_fsindex)
+ loadsegment(fs, 0);
+ }
+ }
}
- if (next->fs)
- wrmsrl(MSR_FS_BASE, next->fs);
- prev->fsindex = fsindex;
+ /*
+ * Save the old state and preserve the invariant.
+ * NB: if prev_fsindex == 0, then we can't reliably learn the base
+ * without RDMSR because Intel user code can zero it without telling
+ * us and AMD user code can program any 32-bit value without telling
+ * us.
+ */
+ if (prev_fsindex)
+ prev->fsbase = 0;
+ prev->fsindex = prev_fsindex;
- if (unlikely(gsindex | next->gsindex | prev->gs)) {
+ if (next->gsindex) {
+ /* Loading a nonzero value into GS sets the index and base. */
load_gs_index(next->gsindex);
-
- /* This works (and fails) the same way as fsindex above. */
- if (gsindex)
- prev->gs = 0;
+ } else {
+ if (next->gsbase) {
+ /* Next index is zero but next base is nonzero. */
+ if (prev_gsindex)
+ load_gs_index(0);
+ wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
+ } else {
+ /* Next base and index are both zero. */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ /*
+ * We don't know the previous base and can't
+ * find out without RDMSR. Forcibly clear it.
+ *
+ * This contains a pointless SWAPGS pair.
+ * Fixing it would involve an explicit check
+ * for Xen or a new pvop.
+ */
+ load_gs_index(__USER_DS);
+ load_gs_index(0);
+ } else {
+ /*
+ * If the previous index is zero and ARCH_SET_GS
+ * didn't change the base, then the base is
+ * also zero and we don't need to do anything.
+ */
+ if (prev->gsbase || prev_gsindex)
+ load_gs_index(0);
+ }
+ }
}
- if (next->gs)
- wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
- prev->gsindex = gsindex;
+ /*
+ * Save the old state and preserve the invariant.
+ * NB: if prev_gsindex == 0, then we can't reliably learn the base
+ * without RDMSR because Intel user code can zero it without telling
+ * us and AMD user code can program any 32-bit value without telling
+ * us.
+ */
+ if (prev_gsindex)
+ prev->gsbase = 0;
+ prev->gsindex = prev_gsindex;
switch_fpu_finish(next_fpu, fpu_switch);
@@ -516,23 +535,11 @@
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
- }
- task->thread.gsindex = GS_TLS_SEL;
- task->thread.gs = 0;
- } else {
- task->thread.gsindex = 0;
- task->thread.gs = addr;
- if (doit) {
- load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
- }
+ task->thread.gsindex = 0;
+ task->thread.gsbase = addr;
+ if (doit) {
+ load_gs_index(0);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
}
put_cpu();
break;
@@ -542,52 +549,30 @@
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- loadsegment(fs, FS_TLS_SEL);
- }
- task->thread.fsindex = FS_TLS_SEL;
- task->thread.fs = 0;
- } else {
- task->thread.fsindex = 0;
- task->thread.fs = addr;
- if (doit) {
- /* set the selector to 0 to not confuse
- __switch_to */
- loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
- }
+ task->thread.fsindex = 0;
+ task->thread.fsbase = addr;
+ if (doit) {
+ /* set the selector to 0 to not confuse __switch_to */
+ loadsegment(fs, 0);
+ ret = wrmsrl_safe(MSR_FS_BASE, addr);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
- if (task->thread.fsindex == FS_TLS_SEL)
- base = read_32bit_tls(task, FS_TLS);
- else if (doit)
+ if (doit)
rdmsrl(MSR_FS_BASE, base);
else
- base = task->thread.fs;
+ base = task->thread.fsbase;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
case ARCH_GET_GS: {
unsigned long base;
- unsigned gsindex;
- if (task->thread.gsindex == GS_TLS_SEL)
- base = read_32bit_tls(task, GS_TLS);
- else if (doit) {
- savesegment(gs, gsindex);
- if (gsindex)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
- base = task->thread.gs;
- } else
- base = task->thread.gs;
+ if (doit)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gsbase;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9c..e60ef91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -303,29 +303,11 @@
switch (offset) {
case offsetof(struct user_regs_struct,fs):
- /*
- * If this is setting fs as for normal 64-bit use but
- * setting fs_base has implicitly changed it, leave it.
- */
- if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
- task->thread.fs != 0) ||
- (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
- task->thread.fs == 0))
- break;
task->thread.fsindex = value;
if (task == current)
loadsegment(fs, task->thread.fsindex);
break;
case offsetof(struct user_regs_struct,gs):
- /*
- * If this is setting gs as for normal 64-bit use but
- * setting gs_base has implicitly changed it, leave it.
- */
- if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
- task->thread.gs != 0) ||
- (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
- task->thread.gs == 0))
- break;
task->thread.gsindex = value;
if (task == current)
load_gs_index(task->thread.gsindex);
@@ -417,7 +399,7 @@
* to set either thread.fs or thread.fsindex and the
* corresponding GDT slot.
*/
- if (child->thread.fs != value)
+ if (child->thread.fsbase != value)
return do_arch_prctl(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
@@ -426,7 +408,7 @@
*/
if (value >= TASK_SIZE_OF(child))
return -EIO;
- if (child->thread.gs != value)
+ if (child->thread.gsbase != value)
return do_arch_prctl(child, ARCH_SET_GS, value);
return 0;
#endif
@@ -453,31 +435,17 @@
#ifdef CONFIG_X86_64
case offsetof(struct user_regs_struct, fs_base): {
/*
- * do_arch_prctl may have used a GDT slot instead of
- * the MSR. To userland, it appears the same either
- * way, except the %fs segment selector might not be 0.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.fsindex;
- if (task->thread.fs != 0)
- return task->thread.fs;
- if (task == current)
- asm("movl %%fs,%0" : "=r" (seg));
- if (seg != FS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[FS_TLS]);
+ return task->thread.fsbase;
}
case offsetof(struct user_regs_struct, gs_base): {
/*
- * Exactly the same here as the %fs handling above.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.gsindex;
- if (task->thread.gs != 0)
- return task->thread.gs;
- if (task == current)
- asm("movl %%gs,%0" : "=r" (seg));
- if (seg != GS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[GS_TLS]);
+ return task->thread.gsbase;
}
#endif
}
@@ -1266,7 +1234,7 @@
compat_ulong_t caddr, compat_ulong_t cdata)
{
#ifdef CONFIG_X86_X32_ABI
- if (!is_ia32_task())
+ if (!in_ia32_syscall())
return x32_arch_ptrace(child, request, caddr, cdata);
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab0adc0..a9b31eb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -535,6 +535,15 @@
mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
*((unsigned short *)__va(0x472)) = mode;
+ /*
+ * If an EFI capsule has been registered with the firmware then
+ * override the reboot= parameter.
+ */
+ if (efi_capsule_pending(NULL)) {
+ pr_info("EFI capsule is pending, forcing EFI reboot.\n");
+ reboot_type = BOOT_EFI;
+ }
+
for (;;) {
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4af8d06..eceaa08 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -14,6 +14,7 @@
#include <asm/time.h>
#include <asm/intel-mid.h>
#include <asm/rtc.h>
+#include <asm/setup.h>
#ifdef CONFIG_X86_32
/*
@@ -185,22 +186,7 @@
}
}
#endif
- if (of_have_populated_dt())
- return 0;
-
- /* Intel MID platforms don't have ioport rtc */
- if (intel_mid_identify_cpu())
- return -ENODEV;
-
-#ifdef CONFIG_ACPI
- if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_CMOS_RTC) {
- /* This warning can likely go away again in a year or two. */
- pr_info("ACPI: not registering RTC platform device\n");
- return -ENODEV;
- }
-#endif
-
- if (paravirt_enabled() && !paravirt_has(RTC))
+ if (!x86_platform.legacy.rtc)
return -ENODEV;
platform_device_register(&rtc_device);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2367ae0..c4e7b39 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -398,6 +398,11 @@
memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
}
+
+static void __init early_initrd_acpi_init(void)
+{
+ early_acpi_table_init((void *)initrd_start, initrd_end - initrd_start);
+}
#else
static void __init early_reserve_initrd(void)
{
@@ -405,6 +410,9 @@
static void __init reserve_initrd(void)
{
}
+static void __init early_initrd_acpi_init(void)
+{
+}
#endif /* CONFIG_BLK_DEV_INITRD */
static void __init parse_setup_data(void)
@@ -1138,9 +1146,7 @@
reserve_initrd();
-#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
- acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
-#endif
+ early_initrd_acpi_init();
vsmp_init();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 548ddf7..22cc2f9 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -248,18 +248,17 @@
if (config_enabled(CONFIG_X86_64))
sp -= 128;
- if (!onsigstack) {
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa.sa_flags & SA_ONSTACK) {
- if (current->sas_ss_size)
- sp = current->sas_ss_sp + current->sas_ss_size;
- } else if (config_enabled(CONFIG_X86_32) &&
- (regs->ss & 0xffff) != __USER_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer) {
- /* This is the legacy signal stack switching. */
- sp = (unsigned long) ka->sa.sa_restorer;
- }
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (sas_ss_flags(sp) == 0)
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ } else if (config_enabled(CONFIG_X86_32) &&
+ !onsigstack &&
+ (regs->ss & 0xffff) != __USER_DS &&
+ !(ka->sa.sa_flags & SA_RESTORER) &&
+ ka->sa.sa_restorer) {
+ /* This is the legacy signal stack switching. */
+ sp = (unsigned long) ka->sa.sa_restorer;
}
if (fpu->fpstate_active) {
@@ -391,7 +390,7 @@
put_user_ex(&frame->uc, &frame->puc);
/* Create the ucontext. */
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
@@ -442,7 +441,7 @@
{
unsigned long flags;
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
else
flags = UC_SIGCONTEXT_SS;
@@ -762,7 +761,7 @@
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
- if (is_ia32_task())
+ if (in_ia32_syscall())
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0e4329e..fafe8b9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1236,7 +1236,7 @@
* If we couldn't find a local APIC, then get out of here now!
*/
if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
- !cpu_has_apic) {
+ !boot_cpu_has(X86_FEATURE_APIC)) {
if (!disable_apic) {
pr_err("BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 5da924b..623965e 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -68,6 +68,21 @@
[M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
};
+void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
+{
+ int i;
+
+ for (i = 0; i < M_UNKNOWN; i++) {
+ if (efifb_dmi_list[i].base != 0 &&
+ !strcmp(opt, efifb_dmi_list[i].optname)) {
+ si->lfb_base = efifb_dmi_list[i].base;
+ si->lfb_linelength = efifb_dmi_list[i].stride;
+ si->lfb_width = efifb_dmi_list[i].width;
+ si->lfb_height = efifb_dmi_list[i].height;
+ }
+ }
+}
+
#define choose_value(dmivalue, fwvalue, field, flags) ({ \
typeof(fwvalue) _ret_ = fwvalue; \
if ((flags) & (field)) \
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e72a07f..9b0185f 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -74,12 +74,6 @@
return;
}
- /* only a natively booted kernel should be using TXT */
- if (paravirt_enabled()) {
- pr_warning("non-0 tboot_addr but pv_ops is enabled\n");
- return;
- }
-
/* Map and check for tboot UUID. */
set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index ab40954..f386bad 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -40,7 +40,7 @@
static inline void flush_tce(void* tceaddr)
{
/* a single tce can't cross a cache line */
- if (cpu_has_clflush)
+ if (boot_cpu_has(X86_FEATURE_CLFLUSH))
clflush(tceaddr);
else
wbinvd();
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 7fc5e84..9692a5e 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -114,6 +114,7 @@
int can_allocate)
{
struct user_desc info;
+ unsigned short __maybe_unused sel, modified_sel;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
@@ -141,6 +142,47 @@
set_tls_desc(p, idx, &info, 1);
+ /*
+ * If DS, ES, FS, or GS points to the modified segment, forcibly
+ * refresh it. Only needed on x86_64 because x86_32 reloads them
+ * on return to user mode.
+ */
+ modified_sel = (idx << 3) | 3;
+
+ if (p == current) {
+#ifdef CONFIG_X86_64
+ savesegment(ds, sel);
+ if (sel == modified_sel)
+ loadsegment(ds, sel);
+
+ savesegment(es, sel);
+ if (sel == modified_sel)
+ loadsegment(es, sel);
+
+ savesegment(fs, sel);
+ if (sel == modified_sel)
+ loadsegment(fs, sel);
+
+ savesegment(gs, sel);
+ if (sel == modified_sel)
+ load_gs_index(sel);
+#endif
+
+#ifdef CONFIG_X86_32_LAZY_GS
+ savesegment(gs, sel);
+ if (sel == modified_sel)
+ loadsegment(gs, sel);
+#endif
+ } else {
+#ifdef CONFIG_X86_64
+ if (p->thread.fsindex == modified_sel)
+ p->thread.fsbase = info.base_addr;
+
+ if (p->thread.gsindex == modified_sel)
+ p->thread.gsbase = info.base_addr;
+#endif
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 06cbe25..d159048 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <linux/atomic.h>
+#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c9c4c7c..38ba6de 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -36,7 +36,7 @@
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
- erroneous rdtsc usage on !cpu_has_tsc processors */
+ erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
static int __read_mostly tsc_disabled = -1;
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@
#ifndef CONFIG_SMP
unsigned long cpu_khz_old = cpu_khz;
- if (cpu_has_tsc) {
- tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
- cpu_data(0).loops_per_jiffy =
- cpufreq_scale(cpu_data(0).loops_per_jiffy,
- cpu_khz_old, cpu_khz);
- return 0;
- } else
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV;
+
+ tsc_khz = x86_platform.calibrate_tsc();
+ cpu_khz = tsc_khz;
+ cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+
+ return 0;
#else
return -ENODEV;
#endif
@@ -922,9 +922,6 @@
struct cpufreq_freqs *freq = data;
unsigned long *lpj;
- if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
lpj = &boot_cpu_data.loops_per_jiffy;
#ifdef CONFIG_SMP
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@
.notifier_call = time_cpufreq_notifier
};
-static int __init cpufreq_tsc(void)
+static int __init cpufreq_register_tsc_scaling(void)
{
- if (!cpu_has_tsc)
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return 0;
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
@@ -965,7 +962,7 @@
return 0;
}
-core_initcall(cpufreq_tsc);
+core_initcall(cpufreq_register_tsc_scaling);
#endif /* CONFIG_CPU_FREQ */
@@ -1081,7 +1078,7 @@
*/
int unsynchronized_tsc(void)
{
- if (!cpu_has_tsc || tsc_unstable)
+ if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
return 1;
#ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@
static int __init init_tsc_clocksource(void)
{
- if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+ if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;
if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@
u64 lpj;
int cpu;
- if (!cpu_has_tsc) {
+ if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bf4db6e..6c1ff31 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@
static inline int sizeof_long(void)
{
- return is_ia32_task() ? 4 : 8;
+ return in_ia32_syscall() ? 4 : 8;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -578,7 +578,7 @@
riprel_post_xol(auprobe, regs);
}
-static struct uprobe_xol_ops default_xol_ops = {
+static const struct uprobe_xol_ops default_xol_ops = {
.pre_xol = default_pre_xol_op,
.post_xol = default_post_xol_op,
.abort = default_abort_op,
@@ -695,7 +695,7 @@
0, insn->immediate.nbytes);
}
-static struct uprobe_xol_ops branch_xol_ops = {
+static const struct uprobe_xol_ops branch_xol_ops = {
.emulate = branch_emulate_op,
.post_xol = branch_post_xol_op,
};
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4c941f8..9297a00 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -334,7 +334,7 @@
__brk_limit = .;
}
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
_end = .;
STABS_DEBUG
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbbaa802..769af90 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -75,7 +75,7 @@
return 0;
/* Update OSXSAVE bit */
- if (cpu_has_xsave && best->function == 0x1) {
+ if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1) {
best->ecx &= ~F(OSXSAVE);
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
best->ecx |= F(OSXSAVE);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b6f50e8..38c0c32 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3844,7 +3844,8 @@
__reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
boot_cpu_data.x86_phys_bits,
context->shadow_root_level, false,
- cpu_has_gbpages, true, true);
+ boot_cpu_has(X86_FEATURE_GBPAGES),
+ true, true);
else
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
boot_cpu_data.x86_phys_bits,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31346a3..fafd720 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1254,7 +1254,7 @@
kvm_load_ldt(svm->host.ldt);
#ifdef CONFIG_X86_64
loadsegment(fs, svm->host.fs);
- wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+ wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
load_gs_index(svm->host.gs);
#else
#ifdef CONFIG_X86_32_LAZY_GS
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 2f1ea2f..b72743c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -809,8 +809,7 @@
#define host_clocks \
{VCLOCK_NONE, "none"}, \
- {VCLOCK_TSC, "tsc"}, \
- {VCLOCK_HPET, "hpet"} \
+ {VCLOCK_TSC, "tsc"} \
TRACE_EVENT(kvm_update_master_clock,
TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 133679d..cb47fe3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3390,7 +3390,7 @@
}
}
- if (cpu_has_xsaves)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
rdmsrl(MSR_IA32_XSS, host_xss);
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b7798c..12f33e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2611,7 +2611,7 @@
r = KVM_MAX_MCE_BANKS;
break;
case KVM_CAP_XCRS:
- r = cpu_has_xsave;
+ r = boot_cpu_has(X86_FEATURE_XSAVE);
break;
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
@@ -3094,7 +3094,7 @@
/* Set XSTATE_BV and possibly XCOMP_BV. */
xsave->header.xfeatures = xstate_bv;
- if (cpu_has_xsaves)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
/*
@@ -3121,7 +3121,7 @@
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- if (cpu_has_xsave) {
+ if (boot_cpu_has(X86_FEATURE_XSAVE)) {
memset(guest_xsave, 0, sizeof(struct kvm_xsave));
fill_xsave((u8 *) guest_xsave->region, vcpu);
} else {
@@ -3139,7 +3139,7 @@
u64 xstate_bv =
*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
- if (cpu_has_xsave) {
+ if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
* Here we allow setting states that are not present in
* CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
@@ -3160,7 +3160,7 @@
static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
struct kvm_xcrs *guest_xcrs)
{
- if (!cpu_has_xsave) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
guest_xcrs->nr_xcrs = 0;
return;
}
@@ -3176,7 +3176,7 @@
{
int i, r = 0;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -EINVAL;
if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
@@ -5865,7 +5865,7 @@
perf_register_guest_info_callbacks(&kvm_guest_cbs);
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
@@ -7293,7 +7293,7 @@
static void fx_init(struct kvm_vcpu *vcpu)
{
fpstate_init(&vcpu->arch.guest_fpu.state);
- if (cpu_has_xsaves)
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
host_xcr0 | XSTATE_COMPACTION_ENABLED;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index fd57d3a..3847e73 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1408,13 +1408,10 @@
{
/* We're under lguest. */
pv_info.name = "lguest";
- /* Paravirt is enabled. */
- pv_info.paravirt_enabled = 1;
/* We're running at privilege level 1, not 0 as normal. */
pv_info.kernel_rpl = 1;
/* Everyone except Xen runs with this set. */
pv_info.shared_kernel_pmd = 1;
- pv_info.features = 0;
/*
* We set up all the lguest overrides for sensitive operations. These
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index be110ef..bf2c607 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -29,8 +29,10 @@
* there is contention on the semaphore.
*
* %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
+ * registers (%eax, %edx and %ecx) except %eax which is either a return
+ * value or just gets clobbered. Same is true for %edx so make sure GCC
+ * reloads it after the slow path, by making it hold a temporary, for
+ * example see ____down_write().
*/
#define save_common_regs \
@@ -106,6 +108,16 @@
ret
ENDPROC(call_rwsem_down_write_failed)
+ENTRY(call_rwsem_down_write_failed_killable)
+ FRAME_BEGIN
+ save_common_regs
+ movq %rax,%rdi
+ call rwsem_down_write_failed_killable
+ restore_common_regs
+ FRAME_END
+ ret
+ENDPROC(call_rwsem_down_write_failed_killable)
+
ENTRY(call_rwsem_wake)
FRAME_BEGIN
/* do nothing if still outstanding active readers */
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 91d93b9..b559d92 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -612,7 +612,7 @@
{
stac();
#ifdef CONFIG_X86_INTEL_USERCOPY
- if (n > 64 && cpu_has_xmm2)
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_zeroing_intel_nocache(to, from, n);
else
__copy_user_zeroing(to, from, n);
@@ -629,7 +629,7 @@
{
stac();
#ifdef CONFIG_X86_INTEL_USERCOPY
- if (n > 64 && cpu_has_xmm2)
+ if (n > 64 && static_cpu_has(X86_FEATURE_XMM2))
n = __copy_user_intel_nocache(to, from, n);
else
__copy_user(to, from, n);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index f989132..62c0043 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -2,7 +2,7 @@
KCOV_INSTRUMENT_tlb.o := n
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o gup.o setup_nx.o
+ pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -12,7 +12,6 @@
CFLAGS_fault.o := -I$(src)/../include/asm/trace
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
-obj-$(CONFIG_SMP) += tlb.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 82447b3..4bb53b8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,5 +1,6 @@
#include <linux/module.h>
#include <asm/uaccess.h>
+#include <asm/traps.h>
typedef bool (*ex_handler_t)(const struct exception_table_entry *,
struct pt_regs *, int);
@@ -42,6 +43,43 @@
}
EXPORT_SYMBOL(ex_handler_ext);
+bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ WARN_ONCE(1, "unchecked MSR access error: RDMSR from 0x%x\n",
+ (unsigned int)regs->cx);
+
+ /* Pretend that the read succeeded and returned 0. */
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = 0;
+ regs->dx = 0;
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
+
+bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ WARN_ONCE(1, "unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x)\n",
+ (unsigned int)regs->cx,
+ (unsigned int)regs->dx, (unsigned int)regs->ax);
+
+ /* Pretend that the write succeeded. */
+ regs->ip = ex_fixup_addr(fixup);
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
+
+bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ if (static_cpu_has(X86_BUG_NULL_SEG))
+ asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
+ asm volatile ("mov %0, %%fs" : : "rm" (0));
+ return ex_handler_default(fixup, regs, trapnr);
+}
+EXPORT_SYMBOL(ex_handler_clear_fs);
+
bool ex_has_fault_handler(unsigned long ip)
{
const struct exception_table_entry *e;
@@ -82,24 +120,46 @@
return handler(e, regs, trapnr);
}
+extern unsigned int early_recursion_flag;
+
/* Restricted version used during very early boot */
-int __init early_fixup_exception(unsigned long *ip)
+void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
{
- const struct exception_table_entry *e;
- unsigned long new_ip;
- ex_handler_t handler;
+ /* Ignore early NMIs. */
+ if (trapnr == X86_TRAP_NMI)
+ return;
- e = search_exception_tables(*ip);
- if (!e)
- return 0;
+ if (early_recursion_flag > 2)
+ goto halt_loop;
- new_ip = ex_fixup_addr(e);
- handler = ex_fixup_handler(e);
+ if (regs->cs != __KERNEL_CS)
+ goto fail;
- /* special handling not supported during early boot */
- if (handler != ex_handler_default)
- return 0;
+ /*
+ * The full exception fixup machinery is available as soon as
+ * the early IDT is loaded. This means that it is the
+ * responsibility of extable users to either function correctly
+ * when handlers are invoked early or to simply avoid causing
+ * exceptions before they're ready to handle them.
+ *
+ * This is better than filtering which handlers can be used,
+ * because refusing to call a handler here is guaranteed to
+ * result in a hard-to-debug panic.
+ *
+ * Keep in mind that not all vectors actually get here. Early
+ * fage faults, for example, are special.
+ */
+ if (fixup_exception(regs, trapnr))
+ return;
- *ip = new_ip;
- return 1;
+fail:
+ early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
+ (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
+ regs->orig_ax, read_cr2());
+
+ show_regs(regs);
+
+halt_loop:
+ while (true)
+ halt();
}
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 740d7ac..14a9505 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -162,7 +162,7 @@
unsigned long ps = memparse(opt, &opt);
if (ps == PMD_SIZE) {
hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (ps == PUD_SIZE && cpu_has_gbpages) {
+ } else if (ps == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES)) {
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
} else {
printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
@@ -177,7 +177,7 @@
static __init int gigantic_pages_init(void)
{
/* With compaction or CMA we can allocate gigantic pages at runtime */
- if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
+ if (boot_cpu_has(X86_FEATURE_GBPAGES) && !size_to_hstate(1UL << PUD_SHIFT))
hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
return 0;
}
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
new file mode 100644
index 0000000..ec21796
--- /dev/null
+++ b/arch/x86/mm/ident_map.c
@@ -0,0 +1,79 @@
+/*
+ * Helper routines for building identity mapping page tables. This is
+ * included by both the compressed kernel and the regular kernel.
+ */
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+ unsigned long addr, unsigned long end)
+{
+ addr &= PMD_MASK;
+ for (; addr < end; addr += PMD_SIZE) {
+ pmd_t *pmd = pmd_page + pmd_index(addr);
+
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | pmd_flag));
+ }
+}
+
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+
+ next = (addr & PUD_MASK) + PUD_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pud_present(*pud)) {
+ pmd = pmd_offset(pud, 0);
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ continue;
+ }
+ pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+ if (!pmd)
+ return -ENOMEM;
+ ident_pmd_init(info->pmd_flag, pmd, addr, next);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ int result;
+ int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+ for (; addr < end; addr = next) {
+ pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+ pud_t *pud;
+
+ next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+
+ if (pgd_present(*pgd)) {
+ pud = pud_offset(pgd, 0);
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ continue;
+ }
+
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ result = ident_pud_init(info, pud, addr, next);
+ if (result)
+ return result;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 9d56f27..372aad2 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -157,23 +157,23 @@
* This will simplify cpa(), which otherwise needs to support splitting
* large pages into small in interrupt context, etc.
*/
- if (cpu_has_pse && !debug_pagealloc_enabled())
+ if (boot_cpu_has(X86_FEATURE_PSE) && !debug_pagealloc_enabled())
page_size_mask |= 1 << PG_LEVEL_2M;
#endif
/* Enable PSE if available */
- if (cpu_has_pse)
+ if (boot_cpu_has(X86_FEATURE_PSE))
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
} else
__supported_pte_mask &= ~_PAGE_GLOBAL;
/* Enable 1 GB linear kernel mappings if available: */
- if (direct_gbpages && cpu_has_gbpages) {
+ if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
printk(KERN_INFO "Using GB pages for direct mapping\n");
page_size_mask |= 1 << PG_LEVEL_1G;
} else {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bd7a9b9..84df150 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -284,7 +284,7 @@
*/
mapping_iter = 1;
- if (!cpu_has_pse)
+ if (!boot_cpu_has(X86_FEATURE_PSE))
use_pse = 0;
repeat:
@@ -804,9 +804,6 @@
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory
#undef __FIXADDR_TOP
-#ifdef CONFIG_RANDOMIZE_BASE
- BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
-#endif
#ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 214afda..bce2e5d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -58,79 +58,7 @@
#include "mm_internal.h"
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
- unsigned long addr, unsigned long end)
-{
- addr &= PMD_MASK;
- for (; addr < end; addr += PMD_SIZE) {
- pmd_t *pmd = pmd_page + pmd_index(addr);
-
- if (!pmd_present(*pmd))
- set_pmd(pmd, __pmd(addr | pmd_flag));
- }
-}
-static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
-
- for (; addr < end; addr = next) {
- pud_t *pud = pud_page + pud_index(addr);
- pmd_t *pmd;
-
- next = (addr & PUD_MASK) + PUD_SIZE;
- if (next > end)
- next = end;
-
- if (pud_present(*pud)) {
- pmd = pmd_offset(pud, 0);
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- continue;
- }
- pmd = (pmd_t *)info->alloc_pgt_page(info->context);
- if (!pmd)
- return -ENOMEM;
- ident_pmd_init(info->pmd_flag, pmd, addr, next);
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- }
-
- return 0;
-}
-
-int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- int result;
- int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
-
- for (; addr < end; addr = next) {
- pgd_t *pgd = pgd_page + pgd_index(addr) + off;
- pud_t *pud;
-
- next = (addr & PGDIR_MASK) + PGDIR_SIZE;
- if (next > end)
- next = end;
-
- if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- continue;
- }
-
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
- return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
- if (result)
- return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
- }
-
- return 0;
-}
+#include "ident_map.c"
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
@@ -1295,7 +1223,7 @@
struct vmem_altmap *altmap = to_vmem_altmap(start);
int err;
- if (cpu_has_pse)
+ if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
pr_err_once("%s: no cpu support for altmap allocations\n",
@@ -1338,7 +1266,7 @@
}
get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
- if (!cpu_has_pse) {
+ if (!boot_cpu_has(X86_FEATURE_PSE)) {
next = (addr + PAGE_SIZE) & PAGE_MASK;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 0d8d53d..f089491 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -378,7 +378,7 @@
int __init arch_ioremap_pud_supported(void)
{
#ifdef CONFIG_X86_64
- return cpu_has_gbpages;
+ return boot_cpu_has(X86_FEATURE_GBPAGES);
#else
return 0;
#endif
@@ -386,7 +386,7 @@
int __init arch_ioremap_pmd_supported(void)
{
- return cpu_has_pse;
+ return boot_cpu_has(X86_FEATURE_PSE);
}
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 01be9ec..7a1f7bb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1055,7 +1055,7 @@
/*
* Map everything starting from the Gb boundary, possibly with 1G pages
*/
- while (cpu_has_gbpages && end - start >= PUD_SIZE) {
+ while (boot_cpu_has(X86_FEATURE_GBPAGES) && end - start >= PUD_SIZE) {
set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE |
massage_pgprot(pud_pgprot)));
@@ -1125,8 +1125,14 @@
static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
int primary)
{
- if (cpa->pgd)
+ if (cpa->pgd) {
+ /*
+ * Right now, we only execute this code path when mapping
+ * the EFI virtual memory map regions, no other users
+ * provide a ->pgd value. This may change in the future.
+ */
return populate_pgd(cpa, vaddr);
+ }
/*
* Ignore all non primary paths.
@@ -1460,7 +1466,7 @@
* error case we fall back to cpa_flush_all (which uses
* WBINVD):
*/
- if (!ret && cpu_has_clflush) {
+ if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) {
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index faec01e..fb0604f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -40,11 +40,22 @@
static bool boot_cpu_done;
static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
+static void init_cache_modes(void);
-static inline void pat_disable(const char *reason)
+void pat_disable(const char *reason)
{
+ if (!__pat_enabled)
+ return;
+
+ if (boot_cpu_done) {
+ WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
+ return;
+ }
+
__pat_enabled = 0;
pr_info("x86/PAT: %s\n", reason);
+
+ init_cache_modes();
}
static int __init nopat(char *str)
@@ -181,7 +192,7 @@
* configuration.
* Using lower indices is preferred, so we start with highest index.
*/
-void pat_init_cache_modes(u64 pat)
+static void __init_cache_modes(u64 pat)
{
enum page_cache_mode cache;
char pat_msg[33];
@@ -202,14 +213,11 @@
{
u64 tmp_pat;
- if (!cpu_has_pat) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
pat_disable("PAT not supported by CPU.");
return;
}
- if (!pat_enabled())
- goto done;
-
rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
pat_disable("PAT MSR is 0, disabled.");
@@ -218,16 +226,12 @@
wrmsrl(MSR_IA32_CR_PAT, pat);
-done:
- pat_init_cache_modes(pat);
+ __init_cache_modes(pat);
}
static void pat_ap_init(u64 pat)
{
- if (!pat_enabled())
- return;
-
- if (!cpu_has_pat) {
+ if (!boot_cpu_has(X86_FEATURE_PAT)) {
/*
* If this happens we are on a secondary CPU, but switched to
* PAT on the boot CPU. We have no way to undo PAT.
@@ -238,18 +242,32 @@
wrmsrl(MSR_IA32_CR_PAT, pat);
}
-void pat_init(void)
+static void init_cache_modes(void)
{
- u64 pat;
- struct cpuinfo_x86 *c = &boot_cpu_data;
+ u64 pat = 0;
+ static int init_cm_done;
- if (!pat_enabled()) {
+ if (init_cm_done)
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_PAT)) {
+ /*
+ * CPU supports PAT. Set PAT table to be consistent with
+ * PAT MSR. This case supports "nopat" boot option, and
+ * virtual machine environments which support PAT without
+ * MTRRs. In specific, Xen has unique setup to PAT MSR.
+ *
+ * If PAT MSR returns 0, it is considered invalid and emulates
+ * as No PAT.
+ */
+ rdmsrl(MSR_IA32_CR_PAT, pat);
+ }
+
+ if (!pat) {
/*
* No PAT. Emulate the PAT table that corresponds to the two
- * cache bits, PWT (Write Through) and PCD (Cache Disable). This
- * setup is the same as the BIOS default setup when the system
- * has PAT but the "nopat" boot option has been specified. This
- * emulated PAT table is used when MSR_IA32_CR_PAT returns 0.
+ * cache bits, PWT (Write Through) and PCD (Cache Disable).
+ * This setup is also the same as the BIOS default setup.
*
* PTE encoding:
*
@@ -266,10 +284,36 @@
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ }
- } else if ((c->x86_vendor == X86_VENDOR_INTEL) &&
- (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
- ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
+ __init_cache_modes(pat);
+
+ init_cm_done = 1;
+}
+
+/**
+ * pat_init - Initialize PAT MSR and PAT table
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC and WT.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+ u64 pat;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (!pat_enabled()) {
+ init_cache_modes();
+ return;
+ }
+
+ if ((c->x86_vendor == X86_VENDOR_INTEL) &&
+ (((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
+ ((c->x86 == 0xf) && (c->x86_model <= 0x6)))) {
/*
* PAT support with the lower four entries. Intel Pentium 2,
* 3, M, and 4 are affected by PAT errata, which makes the
@@ -734,25 +778,6 @@
if (file->f_flags & O_DSYNC)
pcm = _PAGE_CACHE_MODE_UC_MINUS;
-#ifdef CONFIG_X86_32
- /*
- * On the PPro and successors, the MTRRs are used to set
- * memory types for physical addresses outside main memory,
- * so blindly setting UC or PWT on those pages is wrong.
- * For Pentiums and earlier, the surround logic should disable
- * caching for the high addresses through the KEN pin, but
- * we maintain the tradition of paranoia in this code.
- */
- if (!pat_enabled() &&
- !(boot_cpu_has(X86_FEATURE_MTRR) ||
- boot_cpu_has(X86_FEATURE_K6_MTRR) ||
- boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
- boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
- pcm = _PAGE_CACHE_MODE_UC;
- }
-#endif
-
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
cachemode2protval(pcm));
return 1;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index fe9b9f7..5643fd0 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -28,6 +28,8 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
+#ifdef CONFIG_SMP
+
struct flush_tlb_info {
struct mm_struct *flush_mm;
unsigned long flush_start;
@@ -57,6 +59,118 @@
}
EXPORT_SYMBOL_GPL(leave_mm);
+#endif /* CONFIG_SMP */
+
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm_irqs_off(prev, next, tsk);
+ local_irq_restore(flags);
+}
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned cpu = smp_processor_id();
+
+ if (likely(prev != next)) {
+#ifdef CONFIG_SMP
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ this_cpu_write(cpu_tlbstate.active_mm, next);
+#endif
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * Re-load page tables.
+ *
+ * This logic has an ordering constraint:
+ *
+ * CPU 0: Write to a PTE for 'next'
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
+ * CPU 1: set bit 1 in next's mm_cpumask
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
+ *
+ * We need to prevent an outcome in which CPU 1 observes
+ * the new PTE value and CPU 0 observes bit 1 clear in
+ * mm_cpumask. (If that occurs, then the IPI will never
+ * be sent, and CPU 0's TLB will contain a stale entry.)
+ *
+ * The bad outcome can occur if either CPU's load is
+ * reordered before that CPU's store, so both CPUs must
+ * execute full barriers to prevent this from happening.
+ *
+ * Thus, switch_mm needs a full barrier between the
+ * store to mm_cpumask and any operation that could load
+ * from next->pgd. TLB fills are special and can happen
+ * due to instruction fetches or for no reason at all,
+ * and neither LOCK nor MFENCE orders them.
+ * Fortunately, load_cr3() is serializing and gives the
+ * ordering guarantee we need.
+ *
+ */
+ load_cr3(next->pgd);
+
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+
+ /* Stop flush ipis for the previous mm */
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+
+ /* Load per-mm CR4 state */
+ load_mm_cr4(next);
+
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ /*
+ * Load the LDT, if the LDT is different.
+ *
+ * It's possible that prev->context.ldt doesn't match
+ * the LDT register. This can happen if leave_mm(prev)
+ * was called and then modify_ldt changed
+ * prev->context.ldt but suppressed an IPI to this CPU.
+ * In this case, prev->context.ldt != NULL, because we
+ * never set context.ldt to NULL while the mm still
+ * exists. That means that next->context.ldt !=
+ * prev->context.ldt, because mms never share an LDT.
+ */
+ if (unlikely(prev->context.ldt != next->context.ldt))
+ load_mm_ldt(next);
+#endif
+ }
+#ifdef CONFIG_SMP
+ else {
+ this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+
+ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ /*
+ * On established mms, the mm_cpumask is only changed
+ * from irq context, from ptep_clear_flush() while in
+ * lazy tlb mode, and here. Irqs are blocked during
+ * schedule, protecting us from simultaneous changes.
+ */
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ /*
+ * We were in lazy tlb mode and leave_mm disabled
+ * tlb flush IPI delivery. We must reload CR3
+ * to make sure to use no freed page tables.
+ *
+ * As above, load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask write.
+ */
+ load_cr3(next->pgd);
+ trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
+ load_mm_ldt(next);
+ }
+ }
+#endif
+}
+
+#ifdef CONFIG_SMP
+
/*
* The flush IPI assumes that a thread switch happens in this order:
* [cpu0: the cpu that switches]
@@ -353,3 +467,5 @@
return 0;
}
late_initcall(create_tlb_single_page_flush_ceiling);
+
+#endif /* CONFIG_SMP */
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 0e07e09..28c0412 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -636,7 +636,7 @@
__u8 cpu_model = boot_cpu_data.x86_model;
struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
- if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
+ if (force_cpu_type == arch_perfmon && boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return 0;
/*
@@ -700,7 +700,7 @@
char *cpu_type = NULL;
int ret = 0;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
if (force_cpu_type == timer)
@@ -761,7 +761,7 @@
if (cpu_type)
break;
- if (!cpu_has_arch_perfmon)
+ if (!boot_cpu_has(X86_FEATURE_ARCH_PERFMON))
return -ENODEV;
/* use arch perfmon as fallback */
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index d90528e..350f709 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -75,7 +75,7 @@
u64 val;
int i;
- if (cpu_has_arch_perfmon) {
+ if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) {
union cpuid10_eax eax;
eax.full = cpuid_eax(0xa);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3cd6983..b2a4e2a 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -396,7 +396,6 @@
return -ENODEV;
printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
- acpi_irq_penalty_init();
pcibios_enable_irq = acpi_pci_irq_enable;
pcibios_disable_irq = acpi_pci_irq_disable;
x86_init.pci.init_irq = x86_init_noop;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index beac4df..4bd08b0 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -445,7 +445,7 @@
uint32_t eax = cpuid_eax(xen_cpuid_base() + 4);
if (((eax & XEN_HVM_CPUID_X2APIC_VIRT) && x2apic_mode) ||
- ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && cpu_has_apic))
+ ((eax & XEN_HVM_CPUID_APIC_ACCESS_VIRT) && boot_cpu_has(X86_FEATURE_APIC)))
return;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 994a7df8..f93545e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -54,10 +54,6 @@
#include <asm/rtc.h>
#include <asm/uv/uv.h>
-#define EFI_DEBUG
-
-struct efi_memory_map memmap;
-
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
@@ -119,11 +115,10 @@
void __init efi_find_mirror(void)
{
- void *p;
+ efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -146,10 +141,9 @@
static void __init do_add_efi_memmap(void)
{
- void *p;
+ efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
int e820_type;
@@ -209,47 +203,47 @@
#else
pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
#endif
- memmap.phys_map = pmap;
- memmap.nr_map = e->efi_memmap_size /
+ efi.memmap.phys_map = pmap;
+ efi.memmap.nr_map = e->efi_memmap_size /
e->efi_memdesc_size;
- memmap.desc_size = e->efi_memdesc_size;
- memmap.desc_version = e->efi_memdesc_version;
+ efi.memmap.desc_size = e->efi_memdesc_size;
+ efi.memmap.desc_version = e->efi_memdesc_version;
- memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+ WARN(efi.memmap.desc_version != 1,
+ "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+ efi.memmap.desc_version);
- efi.memmap = &memmap;
+ memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
return 0;
}
void __init efi_print_memmap(void)
{
-#ifdef EFI_DEBUG
efi_memory_desc_t *md;
- void *p;
- int i;
+ int i = 0;
- for (p = memmap.map, i = 0;
- p < memmap.map_end;
- p += memmap.desc_size, i++) {
+ for_each_efi_memory_desc(md) {
char buf[64];
- md = p;
pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
- i, efi_md_typeattr_format(buf, sizeof(buf), md),
+ i++, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
-#endif /* EFI_DEBUG */
}
void __init efi_unmap_memmap(void)
{
+ unsigned long size;
+
clear_bit(EFI_MEMMAP, &efi.flags);
- if (memmap.map) {
- early_memunmap(memmap.map, memmap.nr_map * memmap.desc_size);
- memmap.map = NULL;
+
+ size = efi.memmap.nr_map * efi.memmap.desc_size;
+ if (efi.memmap.map) {
+ early_memunmap(efi.memmap.map, size);
+ efi.memmap.map = NULL;
}
}
@@ -352,8 +346,6 @@
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
-
return 0;
}
@@ -440,17 +432,22 @@
static int __init efi_memmap_init(void)
{
+ unsigned long addr, size;
+
if (efi_enabled(EFI_PARAVIRT))
return 0;
/* Map the EFI memory map */
- memmap.map = early_memremap((unsigned long)memmap.phys_map,
- memmap.nr_map * memmap.desc_size);
- if (memmap.map == NULL) {
+ size = efi.memmap.nr_map * efi.memmap.desc_size;
+ addr = (unsigned long)efi.memmap.phys_map;
+
+ efi.memmap.map = early_memremap(addr, size);
+ if (efi.memmap.map == NULL) {
pr_err("Could not map the memory map!\n");
return -ENOMEM;
}
- memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
+
+ efi.memmap.map_end = efi.memmap.map + size;
if (add_efi_memmap)
do_add_efi_memmap();
@@ -552,12 +549,9 @@
void __init runtime_code_page_mkexec(void)
{
efi_memory_desc_t *md;
- void *p;
/* Make EFI runtime service code area executable */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
-
+ for_each_efi_memory_desc(md) {
if (md->type != EFI_RUNTIME_SERVICES_CODE)
continue;
@@ -604,12 +598,10 @@
/* Merge contiguous regions of the same type and attribute */
static void __init efi_merge_regions(void)
{
- void *p;
efi_memory_desc_t *md, *prev_md = NULL;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ for_each_efi_memory_desc(md) {
u64 prev_size;
- md = p;
if (!prev_md) {
prev_md = md;
@@ -651,30 +643,31 @@
static void __init save_runtime_map(void)
{
#ifdef CONFIG_KEXEC_CORE
+ unsigned long desc_size;
efi_memory_desc_t *md;
- void *tmp, *p, *q = NULL;
+ void *tmp, *q = NULL;
int count = 0;
if (efi_enabled(EFI_OLD_MEMMAP))
return;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ desc_size = efi.memmap.desc_size;
+ for_each_efi_memory_desc(md) {
if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
(md->type == EFI_BOOT_SERVICES_CODE) ||
(md->type == EFI_BOOT_SERVICES_DATA))
continue;
- tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
+ tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
if (!tmp)
goto out;
q = tmp;
- memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
+ memcpy(q + count * desc_size, md, desc_size);
count++;
}
- efi_runtime_map_setup(q, count, memmap.desc_size);
+ efi_runtime_map_setup(q, count, desc_size);
return;
out:
@@ -714,10 +707,10 @@
{
/* Initial call */
if (!entry)
- return memmap.map_end - memmap.desc_size;
+ return efi.memmap.map_end - efi.memmap.desc_size;
- entry -= memmap.desc_size;
- if (entry < memmap.map)
+ entry -= efi.memmap.desc_size;
+ if (entry < efi.memmap.map)
return NULL;
return entry;
@@ -759,10 +752,10 @@
/* Initial call */
if (!entry)
- return memmap.map;
+ return efi.memmap.map;
- entry += memmap.desc_size;
- if (entry >= memmap.map_end)
+ entry += efi.memmap.desc_size;
+ if (entry >= efi.memmap.map_end)
return NULL;
return entry;
@@ -776,8 +769,11 @@
{
void *p, *new_memmap = NULL;
unsigned long left = 0;
+ unsigned long desc_size;
efi_memory_desc_t *md;
+ desc_size = efi.memmap.desc_size;
+
p = NULL;
while ((p = efi_map_next_entry(p))) {
md = p;
@@ -792,7 +788,7 @@
efi_map_region(md);
get_systab_virt_addr(md);
- if (left < memmap.desc_size) {
+ if (left < desc_size) {
new_memmap = realloc_pages(new_memmap, *pg_shift);
if (!new_memmap)
return NULL;
@@ -801,10 +797,9 @@
(*pg_shift)++;
}
- memcpy(new_memmap + (*count * memmap.desc_size), md,
- memmap.desc_size);
+ memcpy(new_memmap + (*count * desc_size), md, desc_size);
- left -= memmap.desc_size;
+ left -= desc_size;
(*count)++;
}
@@ -816,7 +811,6 @@
#ifdef CONFIG_KEXEC_CORE
efi_memory_desc_t *md;
unsigned int num_pages;
- void *p;
efi.systab = NULL;
@@ -840,8 +834,7 @@
* Map efi regions which were passed via setup_data. The virt_addr is a
* fixed addr which was used in first kernel of a kexec boot.
*/
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
efi_map_region_fixed(md); /* FIXME: add error handling */
get_systab_virt_addr(md);
}
@@ -850,10 +843,10 @@
BUG_ON(!efi.systab);
- num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE);
+ num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
num_pages >>= PAGE_SHIFT;
- if (efi_setup_page_tables(memmap.phys_map, num_pages)) {
+ if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return;
}
@@ -937,16 +930,16 @@
if (efi_is_native()) {
status = phys_efi_set_virtual_address_map(
- memmap.desc_size * count,
- memmap.desc_size,
- memmap.desc_version,
+ efi.memmap.desc_size * count,
+ efi.memmap.desc_size,
+ efi.memmap.desc_version,
(efi_memory_desc_t *)__pa(new_memmap));
} else {
status = efi_thunk_set_virtual_address_map(
efi_phys.set_virtual_address_map,
- memmap.desc_size * count,
- memmap.desc_size,
- memmap.desc_version,
+ efi.memmap.desc_size * count,
+ efi.memmap.desc_size,
+ efi.memmap.desc_version,
(efi_memory_desc_t *)__pa(new_memmap));
}
@@ -1011,13 +1004,11 @@
u32 efi_mem_type(unsigned long phys_addr)
{
efi_memory_desc_t *md;
- void *p;
if (!efi_enabled(EFI_MEMMAP))
return 0;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
if ((md->phys_addr <= phys_addr) &&
(phys_addr < (md->phys_addr +
(md->num_pages << EFI_PAGE_SHIFT))))
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 49e4dd4..6e7242b 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -55,14 +55,12 @@
static void __init early_code_mapping_set_exec(int executable)
{
efi_memory_desc_t *md;
- void *p;
if (!(__supported_pte_mask & _PAGE_NX))
return;
/* Make EFI service code area executable */
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
if (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_BOOT_SERVICES_CODE)
efi_set_executable(md, executable);
@@ -253,7 +251,7 @@
* Map all of RAM so that we can access arguments in the 1:1
* mapping when making EFI runtime calls.
*/
- for_each_efi_memory_desc(&memmap, md) {
+ for_each_efi_memory_desc(md) {
if (md->type != EFI_CONVENTIONAL_MEMORY &&
md->type != EFI_LOADER_DATA &&
md->type != EFI_LOADER_CODE)
@@ -398,7 +396,6 @@
unsigned long pfn;
pgd_t *pgd = efi_pgd;
efi_memory_desc_t *md;
- void *p;
if (efi_enabled(EFI_OLD_MEMMAP)) {
if (__supported_pte_mask & _PAGE_NX)
@@ -409,9 +406,8 @@
if (!efi_enabled(EFI_NX_PE_DATA))
return;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ for_each_efi_memory_desc(md) {
unsigned long pf = 0;
- md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index ab50ada..4480c06 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -195,10 +195,9 @@
*/
void __init efi_reserve_boot_services(void)
{
- void *p;
+ efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
u64 start = md->phys_addr;
u64 size = md->num_pages << EFI_PAGE_SHIFT;
bool already_reserved;
@@ -250,10 +249,9 @@
void __init efi_free_boot_services(void)
{
- void *p;
+ efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
- efi_memory_desc_t *md = p;
+ for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -373,5 +371,5 @@
bool efi_poweroff_required(void)
{
- return !!acpi_gbl_reduced_hardware;
+ return acpi_gbl_reduced_hardware || acpi_no_s5;
}
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 1584cbe..815fec6 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -21,19 +21,20 @@
#include <linux/efi.h>
#include <linux/export.h>
+#include <linux/slab.h>
#include <asm/efi.h>
#include <linux/io.h>
#include <asm/uv/bios.h>
#include <asm/uv/uv_hub.h>
-static struct uv_systab uv_systab;
+struct uv_systab *uv_systab;
s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
{
- struct uv_systab *tab = &uv_systab;
+ struct uv_systab *tab = uv_systab;
s64 ret;
- if (!tab->function)
+ if (!tab || !tab->function)
/*
* BIOS does not support UV systab
*/
@@ -183,34 +184,31 @@
}
EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
-
#ifdef CONFIG_EFI
void uv_bios_init(void)
{
- struct uv_systab *tab;
-
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
- (efi.uv_systab == (unsigned long)NULL)) {
- printk(KERN_CRIT "No EFI UV System Table.\n");
- uv_systab.function = (unsigned long)NULL;
+ uv_systab = NULL;
+ if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+ pr_crit("UV: UVsystab: missing\n");
return;
}
- tab = (struct uv_systab *)ioremap(efi.uv_systab,
- sizeof(struct uv_systab));
- if (strncmp(tab->signature, "UVST", 4) != 0)
- printk(KERN_ERR "bad signature in UV system table!");
+ uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab));
+ if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
+ pr_err("UV: UVsystab: bad signature!\n");
+ iounmap(uv_systab);
+ return;
+ }
- /*
- * Copy table to permanent spot for later use.
- */
- memcpy(&uv_systab, tab, sizeof(struct uv_systab));
- iounmap(tab);
-
- printk(KERN_INFO "EFI UV System Table Revision %d\n",
- uv_systab.revision);
+ if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+ iounmap(uv_systab);
+ uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+ if (!uv_systab) {
+ pr_err("UV: UVsystab: ioremap(%d) failed!\n",
+ uv_systab->size);
+ return;
+ }
+ }
+ pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
}
-#else /* !CONFIG_EFI */
-
-void uv_bios_init(void) { }
#endif
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 3b6ec42..fdb4d42 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -37,7 +37,7 @@
};
static int timeout_us;
-static int nobau;
+static bool nobau = true;
static int nobau_perm;
static cycles_t congested_cycles;
@@ -106,13 +106,28 @@
"enable: number times use of the BAU was re-enabled"
};
-static int __init
-setup_nobau(char *arg)
+static int __init setup_bau(char *arg)
{
- nobau = 1;
+ int result;
+
+ if (!arg)
+ return -EINVAL;
+
+ result = strtobool(arg, &nobau);
+ if (result)
+ return result;
+
+ /* we need to flip the logic here, so that bau=y sets nobau to false */
+ nobau = !nobau;
+
+ if (!nobau)
+ pr_info("UV BAU Enabled\n");
+ else
+ pr_info("UV BAU Disabled\n");
+
return 0;
}
-early_param("nobau", setup_nobau);
+early_param("bau", setup_bau);
/* base pnode in this partition */
static int uv_base_pnode __read_mostly;
@@ -131,10 +146,10 @@
pr_info("BAU not initialized; cannot be turned on\n");
return;
}
- nobau = 0;
+ nobau = false;
for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu);
- bcp->nobau = 0;
+ bcp->nobau = false;
}
pr_info("BAU turned on\n");
return;
@@ -146,10 +161,10 @@
int cpu;
struct bau_control *bcp;
- nobau = 1;
+ nobau = true;
for_each_present_cpu(cpu) {
bcp = &per_cpu(bau_control, cpu);
- bcp->nobau = 1;
+ bcp->nobau = true;
}
pr_info("BAU turned off\n");
return;
@@ -1886,7 +1901,7 @@
bcp = &per_cpu(bau_control, cpu);
bcp->baudisabled = 0;
if (nobau)
- bcp->nobau = 1;
+ bcp->nobau = true;
bcp->statp = &per_cpu(ptcstats, cpu);
/* time interval to catch a hardware stay-busy bug */
bcp->timeout_interval = usec_2_cycles(2*timeout_us);
@@ -2025,7 +2040,8 @@
return 1;
}
bcp->uvhub_master = *hmasterp;
- bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ bcp->uvhub_cpu = uv_cpu_blade_processor_id(cpu);
+
if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
printk(KERN_EMERG "%d cpus per uvhub invalid\n",
bcp->uvhub_cpu);
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c
index 5d4ba30..e9da9eb 100644
--- a/arch/x86/platform/uv/uv_sysfs.c
+++ b/arch/x86/platform/uv/uv_sysfs.c
@@ -34,7 +34,7 @@
static ssize_t coherence_id_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
+ return snprintf(buf, PAGE_SIZE, "%ld\n", uv_partition_coherence_id());
}
static struct kobj_attribute partition_id_attr =
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 2b158a9..b333fc4 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -165,7 +165,7 @@
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
int bid = uv_cpu_to_blade_id(cpu);
- int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ int bcpu = uv_cpu_blade_processor_id(cpu);
struct uv_rtc_timer_head *head = blade_info[bid];
if (!head) {
@@ -226,7 +226,7 @@
int pnode = uv_cpu_to_pnode(cpu);
int bid = uv_cpu_to_blade_id(cpu);
struct uv_rtc_timer_head *head = blade_info[bid];
- int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ int bcpu = uv_cpu_blade_processor_id(cpu);
u64 *t = &head->cpu[bcpu].expires;
unsigned long flags;
int next_cpu;
@@ -262,7 +262,7 @@
int pnode = uv_cpu_to_pnode(cpu);
int bid = uv_cpu_to_blade_id(cpu);
struct uv_rtc_timer_head *head = blade_info[bid];
- int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
+ int bcpu = uv_cpu_blade_processor_id(cpu);
u64 *t = &head->cpu[bcpu].expires;
unsigned long flags;
int rc = 0;
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 291226b..9f14bd3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -106,7 +106,7 @@
* normal page tables.
* NOTE: We can mark everything as executable here
*/
- if (cpu_has_pse) {
+ if (boot_cpu_has(X86_FEATURE_PSE)) {
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
pfn += PTRS_PER_PTE;
} else {
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index df280da..d957d5f 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -1,4 +1,4 @@
-config AMD_MCE_INJ
+config MCE_AMD_INJ
tristate "Simple MCE injection interface for AMD processors"
depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
default n
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
index dd2c98b..5f94546 100644
--- a/arch/x86/ras/Makefile
+++ b/arch/x86/ras/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o
+obj-$(CONFIG_MCE_AMD_INJ) += mce_amd_inj.o
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 9e02dca..e69f470 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -290,14 +290,33 @@
wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
(u32)mcg_status, (u32)(mcg_status >> 32));
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
- (u32)i_mce.status, (u32)(i_mce.status >> 32));
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
+ if (inj_type == DFR_INT_INJ) {
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
+ (u32)i_mce.status, (u32)(i_mce.status >> 32));
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
- (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
+ (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+ } else {
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
+ (u32)i_mce.status, (u32)(i_mce.status >> 32));
- wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
- (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
+ (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+ }
+
+ wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
+ (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+ } else {
+ wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
+ (u32)i_mce.status, (u32)(i_mce.status >> 32));
+
+ wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
+ (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+
+ wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
+ (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+ }
toggle_hw_mce_inject(cpu, false);
diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh
deleted file mode 100644
index 1a4c17b..0000000
--- a/arch/x86/tools/calc_run_size.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/sh
-#
-# Calculate the amount of space needed to run the kernel, including room for
-# the .bss and .brk sections.
-#
-# Usage:
-# objdump -h a.out | sh calc_run_size.sh
-
-NUM='\([0-9a-fA-F]*[ \t]*\)'
-OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p')
-if [ -z "$OUT" ] ; then
- echo "Never found .bss or .brk file offset" >&2
- exit 1
-fi
-
-OUT=$(echo ${OUT# })
-sizeA=$(printf "%d" 0x${OUT%% *})
-OUT=${OUT#* }
-offsetA=$(printf "%d" 0x${OUT%% *})
-OUT=${OUT#* }
-sizeB=$(printf "%d" 0x${OUT%% *})
-OUT=${OUT#* }
-offsetB=$(printf "%d" 0x${OUT%% *})
-
-run_size=$(( $offsetA + $sizeA + $sizeB ))
-
-# BFD linker shows the same file offset in ELF.
-if [ "$offsetA" -ne "$offsetB" ] ; then
- # Gold linker shows them as consecutive.
- endB=$(( $offsetB + $sizeB ))
- if [ "$endB" != "$run_size" ] ; then
- printf "sizeA: 0x%x\n" $sizeA >&2
- printf "offsetA: 0x%x\n" $offsetA >&2
- printf "sizeB: 0x%x\n" $sizeB >&2
- printf "offsetB: 0x%x\n" $offsetB >&2
- echo ".bss and .brk are non-contiguous" >&2
- exit 1
- fi
-fi
-
-printf "%d\n" $run_size
-exit 0
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 880862c..760789a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,7 +75,6 @@
#include <asm/mach_traps.h>
#include <asm/mwait.h>
#include <asm/pci_x86.h>
-#include <asm/pat.h>
#include <asm/cpu.h>
#ifdef CONFIG_ACPI
@@ -1093,6 +1092,26 @@
return ret;
}
+static u64 xen_read_msr(unsigned int msr)
+{
+ /*
+ * This will silently swallow a #GP from RDMSR. It may be worth
+ * changing that.
+ */
+ int err;
+
+ return xen_read_msr_safe(msr, &err);
+}
+
+static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
+{
+ /*
+ * This will silently swallow a #GP from WRMSR. It may be worth
+ * changing that.
+ */
+ xen_write_msr_safe(msr, low, high);
+}
+
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1187,13 +1206,11 @@
}
static const struct pv_info xen_info __initconst = {
- .paravirt_enabled = 1,
.shared_kernel_pmd = 0,
#ifdef CONFIG_X86_64
.extra_user_64bit_cs = FLAT_USER_CS64,
#endif
- .features = 0,
.name = "Xen",
};
@@ -1223,8 +1240,11 @@
.wbinvd = native_wbinvd,
- .read_msr = xen_read_msr_safe,
- .write_msr = xen_write_msr_safe,
+ .read_msr = xen_read_msr,
+ .write_msr = xen_write_msr,
+
+ .read_msr_safe = xen_read_msr_safe,
+ .write_msr_safe = xen_write_msr_safe,
.read_pmc = xen_read_pmc,
@@ -1469,10 +1489,10 @@
* For BSP, PSE PGE are set in probe_page_size_mask(), for APs
* set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
*/
- if (cpu_has_pse)
+ if (boot_cpu_has(X86_FEATURE_PSE))
cr4_set_bits_and_update_boot(X86_CR4_PSE);
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
cr4_set_bits_and_update_boot(X86_CR4_PGE);
}
@@ -1506,12 +1526,16 @@
}
#endif /* CONFIG_XEN_PVH */
+static void __init xen_dom0_set_legacy_features(void)
+{
+ x86_platform.legacy.rtc = 1;
+}
+
/* First C function to be called on Xen boot */
asmlinkage __visible void __init xen_start_kernel(void)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
- u64 pat;
int rc;
if (!xen_start_info)
@@ -1527,8 +1551,6 @@
/* Install Xen paravirt ops */
pv_info = xen_info;
- if (xen_initial_domain())
- pv_info.features |= PV_SUPPORTED_RTC;
pv_init_ops = xen_init_ops;
if (!xen_pvh_domain()) {
pv_cpu_ops = xen_cpu_ops;
@@ -1618,13 +1640,6 @@
xen_start_info->nr_pages);
xen_reserve_special_pages();
- /*
- * Modify the cache mode translation tables to match Xen's PAT
- * configuration.
- */
- rdmsrl(MSR_IA32_CR_PAT, pat);
- pat_init_cache_modes(pat);
-
/* keep using Xen gdt for now; no urgent need to change it */
#ifdef CONFIG_X86_32
@@ -1670,6 +1685,7 @@
boot_params.hdr.ramdisk_image = initrd_start;
boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
+ boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL);
@@ -1687,6 +1703,8 @@
.u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
};
+ x86_platform.set_legacy_features =
+ xen_dom0_set_legacy_features;
xen_init_vga(info, xen_start_info->console.dom0.info_size);
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index b56855a..28cf4c5 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -22,6 +22,7 @@
generic-y += percpu.h
generic-y += preempt.h
generic-y += resource.h
+generic-y += rwsem.h
generic-y += sections.h
generic-y += siginfo.h
generic-y += statfs.h
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
deleted file mode 100644
index 249619e..0000000
--- a/arch/xtensa/include/asm/rwsem.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * include/asm-xtensa/rwsem.h
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Largely copied from include/asm-ppc/rwsem.h
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- */
-
-#ifndef _XTENSA_RWSEM_H
-#define _XTENSA_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000
-#define RWSEM_ACTIVE_BIAS 0x00000001
-#define RWSEM_ACTIVE_MASK 0x0000ffff
-#define RWSEM_WAITING_BIAS (-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (atomic_add_return(1,(atomic_t *)(&sem->count)) > 0)
- smp_wmb();
- else
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- int tmp;
-
- while ((tmp = sem->count) >= 0) {
- if (tmp == cmpxchg(&sem->count, tmp,
- tmp + RWSEM_ACTIVE_READ_BIAS)) {
- smp_wmb();
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
- int tmp;
-
- tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_t *)(&sem->count));
- if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
- smp_wmb();
- else
- rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- int tmp;
-
- tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- smp_wmb();
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- int tmp;
-
- smp_wmb();
- tmp = atomic_sub_return(1,(atomic_t *)(&sem->count));
- if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- smp_wmb();
- if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
- (atomic_t *)(&sem->count)) < 0)
- rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
- atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- int tmp;
-
- smp_wmb();
- tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
- if (tmp < 0)
- rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
- smp_mb();
- return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/perf_event.c b/arch/xtensa/kernel/perf_event.c
index 54f0118..a6b00b3 100644
--- a/arch/xtensa/kernel/perf_event.c
+++ b/arch/xtensa/kernel/perf_event.c
@@ -332,14 +332,14 @@
void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
- xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
+ xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
callchain_trace, NULL, entry);
}
void perf_callchain_user(struct perf_callchain_entry *entry,
struct pt_regs *regs)
{
- xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
+ xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
callchain_trace, entry);
}
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 82b96ee..b7e2e77 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -5,10 +5,10 @@
menuconfig ACPI
bool "ACPI (Advanced Configuration and Power Interface) Support"
depends on !IA64_HP_SIM
- depends on IA64 || X86 || (ARM64 && EXPERT)
+ depends on IA64 || X86 || ARM64
depends on PCI
select PNP
- default y
+ default y if (IA64 || X86)
help
Advanced Configuration and Power Interface (ACPI) support for
Linux requires an ACPI-compliant platform (hardware/firmware),
@@ -311,12 +311,12 @@
bool
default ACPI_CUSTOM_DSDT_FILE != ""
-config ACPI_INITRD_TABLE_OVERRIDE
- bool "ACPI tables override via initrd"
+config ACPI_TABLE_UPGRADE
+ bool "Allow upgrading ACPI tables via initrd"
depends on BLK_DEV_INITRD && X86
- default n
+ default y
help
- This option provides functionality to override arbitrary ACPI tables
+ This option provides functionality to upgrade arbitrary ACPI tables
via initrd. No functional change if no ACPI tables are passed via
initrd, therefore it's safe to say Y.
See Documentation/acpi/initrd_table_override.txt for details
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index edeb2d1..251ce85 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -18,7 +18,7 @@
acpica/
# All the builtin files are in the "acpi." module_param namespace.
-acpi-y += osl.o utils.o reboot.o
+acpi-y += osi.o osl.o utils.o reboot.o
acpi-y += nvs.o
# Power management related files
@@ -47,6 +47,7 @@
acpi-y += int340x_thermal.o
acpi-y += power.o
acpi-y += event.o
+acpi-$(CONFIG_ACPI_REDUCED_HARDWARE_ONLY) += evged.o
acpi-y += sysfs.o
acpi-y += property.o
acpi-$(CONFIG_X86) += acpi_cmos_rtc.o
diff --git a/drivers/acpi/acpi_amba.c b/drivers/acpi/acpi_amba.c
index 2a61b54..7f77c07 100644
--- a/drivers/acpi/acpi_amba.c
+++ b/drivers/acpi/acpi_amba.c
@@ -35,8 +35,7 @@
if (amba_dummy_clk)
return;
- amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL,
- CLK_IS_ROOT, 0);
+ amba_dummy_clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0);
clk_register_clkdev(amba_dummy_clk, "apb_pclk", NULL);
}
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index f245bf3..1daf9c4 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -62,8 +62,7 @@
if (dev_desc->fixed_clk_rate) {
clk = clk_register_fixed_rate(&pdata->adev->dev,
dev_name(&pdata->adev->dev),
- NULL, CLK_IS_ROOT,
- dev_desc->fixed_clk_rate);
+ NULL, 0, dev_desc->fixed_clk_rate);
clk_register_clkdev(clk, NULL, dev_name(&pdata->adev->dev));
pdata->clk = clk;
}
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 4361bc9..3d5b8a0 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -191,19 +191,6 @@
u8 _DDC:1; /* Return the EDID for this device */
};
-struct acpi_video_brightness_flags {
- u8 _BCL_no_ac_battery_levels:1; /* no AC/Battery levels in _BCL */
- u8 _BCL_reversed:1; /* _BCL package is in a reversed order */
- u8 _BQC_use_index:1; /* _BQC returns an index value */
-};
-
-struct acpi_video_device_brightness {
- int curr;
- int count;
- int *levels;
- struct acpi_video_brightness_flags flags;
-};
-
struct acpi_video_device {
unsigned long device_id;
struct acpi_video_device_flags flags;
@@ -325,7 +312,7 @@
*/
static int
-acpi_video_device_lcd_query_levels(struct acpi_video_device *device,
+acpi_video_device_lcd_query_levels(acpi_handle handle,
union acpi_object **levels)
{
int status;
@@ -335,7 +322,7 @@
*levels = NULL;
- status = acpi_evaluate_object(device->dev->handle, "_BCL", NULL, &buffer);
+ status = acpi_evaluate_object(handle, "_BCL", NULL, &buffer);
if (!ACPI_SUCCESS(status))
return status;
obj = (union acpi_object *)buffer.pointer;
@@ -766,36 +753,28 @@
return 0;
}
-
-/*
- * Arg:
- * device : video output device (LCD, CRT, ..)
- *
- * Return Value:
- * Maximum brightness level
- *
- * Allocate and initialize device->brightness.
- */
-
-static int
-acpi_video_init_brightness(struct acpi_video_device *device)
+int acpi_video_get_levels(struct acpi_device *device,
+ struct acpi_video_device_brightness **dev_br)
{
union acpi_object *obj = NULL;
int i, max_level = 0, count = 0, level_ac_battery = 0;
- unsigned long long level, level_old;
union acpi_object *o;
struct acpi_video_device_brightness *br = NULL;
- int result = -EINVAL;
+ int result = 0;
u32 value;
- if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) {
+ if (!ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device->handle,
+ &obj))) {
ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available "
"LCD brightness level\n"));
+ result = -ENODEV;
goto out;
}
- if (obj->package.count < 2)
+ if (obj->package.count < 2) {
+ result = -EINVAL;
goto out;
+ }
br = kzalloc(sizeof(*br), GFP_KERNEL);
if (!br) {
@@ -861,6 +840,38 @@
"Found unordered _BCL package"));
br->count = count;
+ *dev_br = br;
+
+out:
+ kfree(obj);
+ return result;
+out_free:
+ kfree(br);
+ goto out;
+}
+EXPORT_SYMBOL(acpi_video_get_levels);
+
+/*
+ * Arg:
+ * device : video output device (LCD, CRT, ..)
+ *
+ * Return Value:
+ * Maximum brightness level
+ *
+ * Allocate and initialize device->brightness.
+ */
+
+static int
+acpi_video_init_brightness(struct acpi_video_device *device)
+{
+ int i, max_level = 0;
+ unsigned long long level, level_old;
+ struct acpi_video_device_brightness *br = NULL;
+ int result = -EINVAL;
+
+ result = acpi_video_get_levels(device->dev, &br);
+ if (result)
+ return result;
device->brightness = br;
/* _BQC uses INDEX while _BCL uses VALUE in some laptops */
@@ -903,17 +914,13 @@
goto out_free_levels;
ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "found %d brightness levels\n", count - 2));
- kfree(obj);
- return result;
+ "found %d brightness levels\n", br->count - 2));
+ return 0;
out_free_levels:
kfree(br->levels);
-out_free:
kfree(br);
-out:
device->brightness = NULL;
- kfree(obj);
return result;
}
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index f682374..227bb7b 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -43,6 +43,7 @@
evxfregn.o
acpi-y += \
+ exconcat.o \
exconfig.o \
exconvrt.o \
excreate.o \
@@ -149,6 +150,7 @@
acpi-y += \
utaddress.o \
utalloc.o \
+ utascii.o \
utbuffer.o \
utcopy.o \
utexcep.o \
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 993af9e..f6404ea 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -53,7 +53,7 @@
#define ACPI_DEBUG_BUFFER_SIZE 0x4000 /* 16K buffer for return objects */
struct acpi_db_command_info {
- char *name; /* Command Name */
+ const char *name; /* Command Name */
u8 min_args; /* Minimum arguments required */
};
@@ -64,7 +64,7 @@
};
struct acpi_db_argument_info {
- char *name; /* Argument Name */
+ const char *name; /* Argument Name */
};
struct acpi_db_execute_walk {
@@ -196,7 +196,7 @@
acpi_walk_state
*walk_state))
- acpi_status acpi_db_display_all_methods(char *display_count_arg);
+acpi_status acpi_db_display_all_methods(char *display_count_arg);
void acpi_db_display_arguments(void);
@@ -220,7 +220,7 @@
* dbexec - debugger control method execution
*/
void
-acpi_db_execute(char *name, char **args, acpi_object_type * types, u32 flags);
+acpi_db_execute(char *name, char **args, acpi_object_type *types, u32 flags);
void
acpi_db_create_execution_threads(char *num_threads_arg,
@@ -271,7 +271,7 @@
acpi_status acpi_db_user_commands(void);
char *acpi_db_get_next_token(char *string,
- char **next, acpi_object_type * return_type);
+ char **next, acpi_object_type *return_type);
/*
* dbobject
diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 010cf81..77af91c 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -72,6 +72,7 @@
ACPI_HW_DEPENDENT_RETURN_OK(acpi_status
acpi_ev_acquire_global_lock(u16 timeout))
ACPI_HW_DEPENDENT_RETURN_OK(acpi_status acpi_ev_release_global_lock(void))
+
acpi_status acpi_ev_remove_global_lock_handler(void);
/*
@@ -198,8 +199,6 @@
acpi_ev_detach_region(union acpi_operand_object *region_obj,
u8 acpi_ns_is_locked);
-void acpi_ev_associate_reg_method(union acpi_operand_object *region_obj);
-
void
acpi_ev_execute_reg_methods(struct acpi_namespace_node *node,
acpi_adr_space_type space_id, u32 function);
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 51b073b..fded776 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -187,6 +187,8 @@
extern const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS];
extern const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS];
extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS];
+extern const char acpi_gbl_lower_hex_digits[];
+extern const char acpi_gbl_upper_hex_digits[];
extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES];
#ifdef ACPI_DBG_TRACK_ALLOCATIONS
@@ -361,6 +363,15 @@
#endif /* ACPI_DEBUGGER */
+#if defined (ACPI_DISASSEMBLER) || defined (ACPI_ASL_COMPILER)
+
+ACPI_GLOBAL(const char, *acpi_gbl_pld_panel_list[]);
+ACPI_GLOBAL(const char, *acpi_gbl_pld_vertical_position_list[]);
+ACPI_GLOBAL(const char, *acpi_gbl_pld_horizontal_position_list[]);
+ACPI_GLOBAL(const char, *acpi_gbl_pld_shape_list[]);
+
+#endif
+
/*****************************************************************************
*
* Application globals
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index bae1a35..7ead235 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -67,7 +67,7 @@
typedef const struct acpi_exdump_info {
u8 opcode;
u8 offset;
- char *name;
+ const char *name;
} acpi_exdump_info;
@@ -370,7 +370,7 @@
acpi_status
acpi_ex_resolve_multiple(struct acpi_walk_state *walk_state,
union acpi_operand_object *operand,
- acpi_object_type * return_type,
+ acpi_object_type *return_type,
union acpi_operand_object **return_desc);
/*
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 9562a10..13331d7 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -278,7 +278,7 @@
};
typedef
-acpi_status(*acpi_internal_method) (struct acpi_walk_state * walk_state);
+acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state);
/*
* Bitmapped ACPI types. Used internally only
@@ -395,11 +395,12 @@
/* Return object auto-repair info */
-typedef acpi_status(*acpi_object_converter) (struct acpi_namespace_node * scope,
- union acpi_operand_object
- *original_object,
- union acpi_operand_object
- **converted_object);
+typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node *
+ scope,
+ union acpi_operand_object *
+ original_object,
+ union acpi_operand_object **
+ converted_object);
struct acpi_simple_repair_info {
char name[ACPI_NAME_SIZE];
@@ -539,10 +540,10 @@
struct acpi_namespace_node *gpe_device;
};
-typedef acpi_status(*acpi_gpe_callback) (struct acpi_gpe_xrupt_info *
- gpe_xrupt_info,
- struct acpi_gpe_block_info *gpe_block,
- void *context);
+typedef acpi_status (*acpi_gpe_callback) (struct acpi_gpe_xrupt_info *
+ gpe_xrupt_info,
+ struct acpi_gpe_block_info *
+ gpe_block, void *context);
/* Information about each particular fixed event */
@@ -657,10 +658,11 @@
};
typedef
-acpi_status(*acpi_parse_downwards) (struct acpi_walk_state * walk_state,
- union acpi_parse_object ** out_op);
+acpi_status (*acpi_parse_downwards) (struct acpi_walk_state * walk_state,
+ union acpi_parse_object ** out_op);
-typedef acpi_status(*acpi_parse_upwards) (struct acpi_walk_state * walk_state);
+typedef
+acpi_status (*acpi_parse_upwards) (struct acpi_walk_state * walk_state);
/* Global handlers for AML Notifies */
@@ -700,7 +702,8 @@
*
****************************************************************************/
-typedef acpi_status(*acpi_execute_op) (struct acpi_walk_state * walk_state);
+typedef
+acpi_status (*acpi_execute_op) (struct acpi_walk_state * walk_state);
/* Address Range info block */
@@ -853,24 +856,24 @@
/* Parse object flags */
-#define ACPI_PARSEOP_GENERIC 0x01
-#define ACPI_PARSEOP_NAMED 0x02
-#define ACPI_PARSEOP_DEFERRED 0x04
-#define ACPI_PARSEOP_BYTELIST 0x08
-#define ACPI_PARSEOP_IN_STACK 0x10
-#define ACPI_PARSEOP_TARGET 0x20
-#define ACPI_PARSEOP_IN_CACHE 0x80
+#define ACPI_PARSEOP_GENERIC 0x01
+#define ACPI_PARSEOP_NAMED_OBJECT 0x02
+#define ACPI_PARSEOP_DEFERRED 0x04
+#define ACPI_PARSEOP_BYTELIST 0x08
+#define ACPI_PARSEOP_IN_STACK 0x10
+#define ACPI_PARSEOP_TARGET 0x20
+#define ACPI_PARSEOP_IN_CACHE 0x80
/* Parse object disasm_flags */
-#define ACPI_PARSEOP_IGNORE 0x01
-#define ACPI_PARSEOP_PARAMLIST 0x02
-#define ACPI_PARSEOP_EMPTY_TERMLIST 0x04
-#define ACPI_PARSEOP_PREDEF_CHECKED 0x08
-#define ACPI_PARSEOP_CLOSING_PAREN 0x10
-#define ACPI_PARSEOP_COMPOUND 0x20
-#define ACPI_PARSEOP_ASSIGNMENT 0x40
-#define ACPI_PARSEOP_ELSEIF 0x80
+#define ACPI_PARSEOP_IGNORE 0x01
+#define ACPI_PARSEOP_PARAMETER_LIST 0x02
+#define ACPI_PARSEOP_EMPTY_TERMLIST 0x04
+#define ACPI_PARSEOP_PREDEFINED_CHECKED 0x08
+#define ACPI_PARSEOP_CLOSING_PAREN 0x10
+#define ACPI_PARSEOP_COMPOUND_ASSIGNMENT 0x20
+#define ACPI_PARSEOP_ASSIGNMENT 0x40
+#define ACPI_PARSEOP_ELSEIF 0x80
/*****************************************************************************
*
@@ -1096,6 +1099,7 @@
#define ACPI_EXT_ORIGIN_FROM_FILE 0x02 /* External came from a file */
#define ACPI_EXT_INTERNAL_PATH_ALLOCATED 0x04 /* Deallocate internal path on completion */
#define ACPI_EXT_EXTERNAL_EMITTED 0x08 /* External() statement has been emitted */
+#define ACPI_EXT_ORIGIN_FROM_OPCODE 0x10 /* External came from a External() opcode */
struct acpi_external_file {
char *path;
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index 411c18b..a3b9543 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -260,14 +260,31 @@
#define ACPI_IS_MISALIGNED(value) (((acpi_size) value) & (sizeof(acpi_size)-1))
+/* Generic (power-of-two) rounding */
+
+#define ACPI_IS_ALIGNED(a, s) (((a) & ((s) - 1)) == 0)
+#define ACPI_IS_POWER_OF_TWO(a) ACPI_IS_ALIGNED(a, a)
+
/*
* Bitmask creation
* Bit positions start at zero.
* MASK_BITS_ABOVE creates a mask starting AT the position and above
* MASK_BITS_BELOW creates a mask starting one bit BELOW the position
+ * MASK_BITS_ABOVE/BELOW accpets a bit offset to create a mask
+ * MASK_BITS_ABOVE/BELOW_32/64 accpets a bit width to create a mask
+ * Note: The ACPI_INTEGER_BIT_SIZE check is used to bypass compiler
+ * differences with the shift operator
*/
#define ACPI_MASK_BITS_ABOVE(position) (~((ACPI_UINT64_MAX) << ((u32) (position))))
#define ACPI_MASK_BITS_BELOW(position) ((ACPI_UINT64_MAX) << ((u32) (position)))
+#define ACPI_MASK_BITS_ABOVE_32(width) ((u32) ACPI_MASK_BITS_ABOVE(width))
+#define ACPI_MASK_BITS_BELOW_32(width) ((u32) ACPI_MASK_BITS_BELOW(width))
+#define ACPI_MASK_BITS_ABOVE_64(width) ((width) == ACPI_INTEGER_BIT_SIZE ? \
+ ACPI_UINT64_MAX : \
+ ACPI_MASK_BITS_ABOVE(width))
+#define ACPI_MASK_BITS_BELOW_64(width) ((width) == ACPI_INTEGER_BIT_SIZE ? \
+ (u64) 0 : \
+ ACPI_MASK_BITS_BELOW(width))
/* Bitfields within ACPI registers */
@@ -283,10 +300,10 @@
/* Generic bitfield macros and masks */
#define ACPI_GET_BITS(source_ptr, position, mask) \
- ((*source_ptr >> position) & mask)
+ ((*(source_ptr) >> (position)) & (mask))
#define ACPI_SET_BITS(target_ptr, position, mask, value) \
- (*target_ptr |= ((value & mask) << position))
+ (*(target_ptr) |= (((value) & (mask)) << (position)))
#define ACPI_1BIT_MASK 0x00000001
#define ACPI_2BIT_MASK 0x00000003
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 022d69c..f33a4ba 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -206,9 +206,10 @@
void acpi_ns_dump_entry(acpi_handle handle, u32 debug_level);
void
-acpi_ns_dump_pathname(acpi_handle handle, char *msg, u32 level, u32 component);
+acpi_ns_dump_pathname(acpi_handle handle,
+ const char *msg, u32 level, u32 component);
-void acpi_ns_print_pathname(u32 num_segments, char *pathname);
+void acpi_ns_print_pathname(u32 num_segments, const char *pathname);
acpi_status
acpi_ns_dump_one_object(acpi_handle obj_handle,
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index 7da639d..fc30577 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -139,7 +139,7 @@
*/
const struct acpi_opcode_info *acpi_ps_get_opcode_info(u16 opcode);
-char *acpi_ps_get_opcode_name(u16 opcode);
+const char *acpi_ps_get_opcode_name(u16 opcode);
u8 acpi_ps_get_argument_count(u32 op_type);
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index 5faeab4..888440b 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -129,7 +129,8 @@
ACPI_PTYPE2_REV_FIXED = 9,
ACPI_PTYPE2_FIX_VAR = 10,
ACPI_PTYPE2_VAR_VAR = 11,
- ACPI_PTYPE2_UUID_PAIR = 12
+ ACPI_PTYPE2_UUID_PAIR = 12,
+ ACPI_PTYPE_CUSTOM = 13
};
/* Support macros for users of the predefined info table */
@@ -340,7 +341,7 @@
{{"_BIX", METHOD_0ARGS,
METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}}, /* Fixed-length (16 Int),(4 Str) */
- PACKAGE_INFO(ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16,
+ PACKAGE_INFO(ACPI_PTYPE_CUSTOM, ACPI_RTYPE_INTEGER, 16,
ACPI_RTYPE_STRING, 4, 0),
{{"_BLT",
@@ -523,6 +524,9 @@
METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}}, /* Fixed-length (4 Int) */
PACKAGE_INFO(ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 4, 0, 0, 0),
+ {{"_FIT", METHOD_0ARGS,
+ METHOD_RETURNS(ACPI_RTYPE_BUFFER)}}, /* ACPI 6.0 */
+
{{"_FIX", METHOD_0ARGS,
METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}}, /* Variable-length (Ints) */
PACKAGE_INFO(ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0, 0, 0, 0),
@@ -1053,6 +1057,12 @@
METHOD_RETURNS(ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING |
ACPI_RTYPE_BUFFER)}},
+ {{"_WPC", METHOD_0ARGS,
+ METHOD_RETURNS(ACPI_RTYPE_INTEGER)}}, /* ACPI 6.1 */
+
+ {{"_WPP", METHOD_0ARGS,
+ METHOD_RETURNS(ACPI_RTYPE_INTEGER)}}, /* ACPI 6.1 */
+
PACKAGE_INFO(0, 0, 0, 0, 0, 0) /* Table terminator */
};
#else
diff --git a/drivers/acpi/acpica/acresrc.h b/drivers/acpi/acpica/acresrc.h
index 5dd58be..63da1e3 100644
--- a/drivers/acpi/acpica/acresrc.h
+++ b/drivers/acpi/acpica/acresrc.h
@@ -124,7 +124,7 @@
typedef const struct acpi_rsdump_info {
u8 opcode;
u8 offset;
- char *name;
+ const char *name;
const char **pointer;
} acpi_rsdump_info;
@@ -209,7 +209,7 @@
acpi_status
acpi_rs_get_method_data(acpi_handle handle,
- char *path, struct acpi_buffer *ret_buffer);
+ const char *path, struct acpi_buffer *ret_buffer);
acpi_status
acpi_rs_set_srs_method_data(struct acpi_namespace_node *node,
@@ -223,16 +223,16 @@
* rscalc
*/
acpi_status
-acpi_rs_get_list_length(u8 * aml_buffer,
- u32 aml_buffer_length, acpi_size * size_needed);
+acpi_rs_get_list_length(u8 *aml_buffer,
+ u32 aml_buffer_length, acpi_size *size_needed);
acpi_status
acpi_rs_get_aml_length(struct acpi_resource *resource_list,
- acpi_size resource_list_size, acpi_size * size_needed);
+ acpi_size resource_list_size, acpi_size *size_needed);
acpi_status
acpi_rs_get_pci_routing_table_length(union acpi_operand_object *package_object,
- acpi_size * buffer_size_needed);
+ acpi_size *buffer_size_needed);
acpi_status
acpi_rs_convert_aml_to_resources(u8 * aml,
diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h
index b3b386e..6235642 100644
--- a/drivers/acpi/acpica/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h
@@ -184,7 +184,7 @@
/* The first 3 elements are passed by the caller to acpi_ns_evaluate */
struct acpi_namespace_node *prefix_node; /* Input: starting node */
- char *relative_pathname; /* Input: path relative to prefix_node */
+ const char *relative_pathname; /* Input: path relative to prefix_node */
union acpi_operand_object **parameters; /* Input: argument list */
struct acpi_namespace_node *node; /* Resolved node (prefix_node:relative_pathname) */
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index 848ad3a..cd5a135 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -161,8 +161,6 @@
acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address);
-u8 acpi_is_valid_signature(char *signature);
-
/*
* tbxfload
*/
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index e43ab6f..a7dbb2b 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -136,16 +136,16 @@
#define ACPI_SMALL_VARIABLE_LENGTH 3
typedef
-acpi_status(*acpi_walk_aml_callback) (u8 *aml,
- u32 length,
- u32 offset,
- u8 resource_index, void **context);
+acpi_status (*acpi_walk_aml_callback) (u8 *aml,
+ u32 length,
+ u32 offset,
+ u8 resource_index, void **context);
typedef
-acpi_status(*acpi_pkg_callback) (u8 object_type,
- union acpi_operand_object *source_object,
- union acpi_generic_state * state,
- void *context);
+acpi_status (*acpi_pkg_callback) (u8 object_type,
+ union acpi_operand_object * source_object,
+ union acpi_generic_state * state,
+ void *context);
struct acpi_pkg_info {
u8 *free_space;
@@ -167,6 +167,15 @@
#define DB_QWORD_DISPLAY 8
/*
+ * utascii - ASCII utilities
+ */
+u8 acpi_ut_valid_nameseg(char *signature);
+
+u8 acpi_ut_valid_name_char(char character, u32 position);
+
+void acpi_ut_check_and_repair_ascii(u8 *name, char *repaired_name, u32 count);
+
+/*
* utnonansi - Non-ANSI C library functions
*/
void acpi_ut_strupr(char *src_string);
@@ -175,7 +184,14 @@
int acpi_ut_stricmp(char *string1, char *string2);
-acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer);
+acpi_status
+acpi_ut_strtoul64(char *string,
+ u32 base, u32 max_integer_byte_width, u64 *ret_integer);
+
+/* Values for max_integer_byte_width above */
+
+#define ACPI_MAX32_BYTE_WIDTH 4
+#define ACPI_MAX64_BYTE_WIDTH 8
/*
* utglobal - Global data structures and procedures
@@ -266,7 +282,8 @@
void
acpi_ut_trace_ptr(u32 line_number,
const char *function_name,
- const char *module_name, u32 component_id, void *pointer);
+ const char *module_name,
+ u32 component_id, const void *pointer);
void
acpi_ut_trace_u32(u32 line_number,
@@ -276,7 +293,8 @@
void
acpi_ut_trace_str(u32 line_number,
const char *function_name,
- const char *module_name, u32 component_id, char *string);
+ const char *module_name,
+ u32 component_id, const char *string);
void
acpi_ut_exit(u32 line_number,
@@ -335,12 +353,12 @@
*/
acpi_status
acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
- char *path,
+ const char *path,
u32 expected_return_btypes,
union acpi_operand_object **return_desc);
acpi_status
-acpi_ut_evaluate_numeric_object(char *object_name,
+acpi_ut_evaluate_numeric_object(const char *object_name,
struct acpi_namespace_node *device_node,
u64 *value);
@@ -415,7 +433,7 @@
union acpi_operand_object *acpi_ut_create_string_object(acpi_size string_size);
acpi_status
-acpi_ut_get_object_size(union acpi_operand_object *obj, acpi_size * obj_length);
+acpi_ut_get_object_size(union acpi_operand_object *obj, acpi_size *obj_length);
/*
* utosi - Support for the _OSI predefined control method
@@ -526,15 +544,15 @@
void
acpi_ut_display_init_pathname(u8 type,
struct acpi_namespace_node *obj_handle,
- char *path);
+ const char *path);
#endif
/*
* utownerid - Support for Table/Method Owner IDs
*/
-acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id);
+acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id);
-void acpi_ut_release_owner_id(acpi_owner_id * owner_id);
+void acpi_ut_release_owner_id(acpi_owner_id *owner_id);
/*
* utresrc
@@ -570,10 +588,6 @@
void ut_convert_backslashes(char *pathname);
#endif
-u8 acpi_ut_valid_acpi_name(char *name);
-
-u8 acpi_ut_valid_acpi_char(char character, u32 position);
-
void acpi_ut_repair_name(char *name);
#if defined (ACPI_DEBUGGER) || defined (ACPI_APPLICATION)
@@ -628,7 +642,7 @@
void acpi_ut_dump_allocations(u32 component, const char *module);
acpi_status
-acpi_ut_create_list(char *list_name,
+acpi_ut_create_list(const char *list_name,
u16 object_size, struct acpi_memory_list **return_cache);
#endif /* ACPI_DBG_TRACK_ALLOCATIONS */
diff --git a/drivers/acpi/acpica/dbcmds.c b/drivers/acpi/acpica/dbcmds.c
index 772178c..62bd446 100644
--- a/drivers/acpi/acpica/dbcmds.c
+++ b/drivers/acpi/acpica/dbcmds.c
@@ -738,9 +738,9 @@
original_aml = return_buffer.pointer;
acpi_dm_compare_aml_resources(original_aml->buffer.pointer,
- (acpi_rsdesc_size) original_aml->buffer.
+ (acpi_rsdesc_size)original_aml->buffer.
length, new_aml.pointer,
- (acpi_rsdesc_size) new_aml.length);
+ (acpi_rsdesc_size)new_aml.length);
/* Cleanup and exit */
diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c
index 68f4e0f4..7cd07b2 100644
--- a/drivers/acpi/acpica/dbconvert.c
+++ b/drivers/acpi/acpica/dbconvert.c
@@ -194,7 +194,7 @@
*
******************************************************************************/
-acpi_status acpi_db_convert_to_package(char *string, union acpi_object * object)
+acpi_status acpi_db_convert_to_package(char *string, union acpi_object *object)
{
char *this;
char *next;
@@ -252,7 +252,7 @@
acpi_status
acpi_db_convert_to_object(acpi_object_type type,
- char *string, union acpi_object * object)
+ char *string, union acpi_object *object)
{
acpi_status status = AE_OK;
@@ -277,7 +277,9 @@
default:
object->type = ACPI_TYPE_INTEGER;
- status = acpi_ut_strtoul64(string, 16, &object->integer.value);
+ status =
+ acpi_ut_strtoul64(string, 16, acpi_gbl_integer_byte_width,
+ &object->integer.value);
break;
}
diff --git a/drivers/acpi/acpica/dbexec.c b/drivers/acpi/acpica/dbexec.c
index c814855..12df291 100644
--- a/drivers/acpi/acpica/dbexec.c
+++ b/drivers/acpi/acpica/dbexec.c
@@ -361,7 +361,7 @@
******************************************************************************/
void
-acpi_db_execute(char *name, char **args, acpi_object_type * types, u32 flags)
+acpi_db_execute(char *name, char **args, acpi_object_type *types, u32 flags)
{
acpi_status status;
struct acpi_buffer return_obj;
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 417c02a..7cd5d2e 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -57,12 +57,12 @@
static u32 acpi_db_match_command(char *user_command);
-static void acpi_db_display_command_info(char *command, u8 display_all);
+static void acpi_db_display_command_info(const char *command, u8 display_all);
static void acpi_db_display_help(char *command);
static u8
-acpi_db_match_command_help(char *command,
+acpi_db_match_command_help(const char *command,
const struct acpi_db_command_help *help);
/*
@@ -348,7 +348,7 @@
******************************************************************************/
static u8
-acpi_db_match_command_help(char *command,
+acpi_db_match_command_help(const char *command,
const struct acpi_db_command_help *help)
{
char *invocation = help->invocation;
@@ -402,7 +402,7 @@
*
******************************************************************************/
-static void acpi_db_display_command_info(char *command, u8 display_all)
+static void acpi_db_display_command_info(const char *command, u8 display_all)
{
const struct acpi_db_command_help *next;
u8 matched;
@@ -466,7 +466,7 @@
******************************************************************************/
char *acpi_db_get_next_token(char *string,
- char **next, acpi_object_type * return_type)
+ char **next, acpi_object_type *return_type)
{
char *start;
u32 depth;
@@ -656,8 +656,9 @@
}
for (i = CMD_FIRST_VALID; acpi_gbl_db_commands[i].name; i++) {
- if (strstr(acpi_gbl_db_commands[i].name, user_command) ==
- acpi_gbl_db_commands[i].name) {
+ if (strstr
+ (ACPI_CAST_PTR(char, acpi_gbl_db_commands[i].name),
+ user_command) == acpi_gbl_db_commands[i].name) {
return (i);
}
}
@@ -683,8 +684,8 @@
acpi_status
acpi_db_command_dispatch(char *input_buffer,
- struct acpi_walk_state * walk_state,
- union acpi_parse_object * op)
+ struct acpi_walk_state *walk_state,
+ union acpi_parse_object *op)
{
u32 temp;
u32 command_index;
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index 3c23b5a..8667f14 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -285,7 +285,7 @@
u32 max_depth = ACPI_UINT32_MAX;
acpi_owner_id owner_id;
- owner_id = (acpi_owner_id) strtoul(owner_arg, NULL, 0);
+ owner_id = (acpi_owner_id)strtoul(owner_arg, NULL, 0);
/* Now we can check for the depth argument */
@@ -709,7 +709,7 @@
return (AE_OK);
}
- if (!acpi_ut_valid_acpi_name(node->name.ascii)) {
+ if (!acpi_ut_valid_nameseg(node->name.ascii)) {
acpi_os_printf("Invalid AcpiName for Node %p\n", node);
return (AE_OK);
}
diff --git a/drivers/acpi/acpica/dbutils.c b/drivers/acpi/acpica/dbutils.c
index b37a2c7..ae80106 100644
--- a/drivers/acpi/acpica/dbutils.c
+++ b/drivers/acpi/acpica/dbutils.c
@@ -56,8 +56,6 @@
void acpi_db_dump_buffer(u32 address);
#endif
-static char *gbl_hex_to_ascii = "0123456789ABCDEF";
-
/*******************************************************************************
*
* FUNCTION: acpi_db_match_argument
@@ -82,8 +80,9 @@
}
for (i = 0; arguments[i].name; i++) {
- if (strstr(arguments[i].name, user_argument) ==
- arguments[i].name) {
+ if (strstr(ACPI_CAST_PTR(char, arguments[i].name),
+ ACPI_CAST_PTR(char,
+ user_argument)) == arguments[i].name) {
return (i);
}
}
@@ -339,7 +338,7 @@
buffer[8] = '\0';
for (i = 7; i >= 0; i--) {
- buffer[i] = gbl_hex_to_ascii[value & 0x0F];
+ buffer[i] = acpi_gbl_upper_hex_digits[value & 0x0F];
value = value >> 4;
}
}
diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index e94e0d8..124db23 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c
@@ -162,8 +162,8 @@
******************************************************************************/
acpi_status
-acpi_db_single_step(struct acpi_walk_state * walk_state,
- union acpi_parse_object * op, u32 opcode_class)
+acpi_db_single_step(struct acpi_walk_state *walk_state,
+ union acpi_parse_object *op, u32 opcode_class)
{
union acpi_parse_object *next;
acpi_status status = AE_OK;
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index c9a663f..4ddcbf1 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -163,8 +163,8 @@
******************************************************************************/
acpi_status
-acpi_ds_exec_end_control_op(struct acpi_walk_state * walk_state,
- union acpi_parse_object * op)
+acpi_ds_exec_end_control_op(struct acpi_walk_state *walk_state,
+ union acpi_parse_object *op)
{
acpi_status status = AE_OK;
union acpi_generic_state *control_state;
diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index 5aa1c5f..f1e6dcc 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -188,7 +188,7 @@
acpi_status
acpi_ds_initialize_objects(u32 table_index,
- struct acpi_namespace_node * start_node)
+ struct acpi_namespace_node *start_node)
{
acpi_status status;
struct acpi_init_walk_info info;
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index da198b8..47c7b52 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -209,7 +209,7 @@
******************************************************************************/
acpi_status
-acpi_ds_method_error(acpi_status status, struct acpi_walk_state * walk_state)
+acpi_ds_method_error(acpi_status status, struct acpi_walk_state *walk_state)
{
u32 aml_offset;
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index 8ca9416..f393de9 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -569,7 +569,7 @@
/* TBD: May only be temporary */
obj_desc =
- acpi_ut_create_string_object((acpi_size) name_length);
+ acpi_ut_create_string_object((acpi_size)name_length);
strncpy(obj_desc->string.pointer,
name_string, name_length);
diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index d1cedcf..fd34040 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c
@@ -137,8 +137,8 @@
******************************************************************************/
acpi_status
-acpi_ds_load1_begin_op(struct acpi_walk_state * walk_state,
- union acpi_parse_object ** out_op)
+acpi_ds_load1_begin_op(struct acpi_walk_state *walk_state,
+ union acpi_parse_object **out_op)
{
union acpi_parse_object *op;
struct acpi_namespace_node *node;
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 0bac6e1..762db3f 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -490,8 +490,8 @@
status =
acpi_ds_create_index_field(op,
- (acpi_handle) arg->
- common.node, walk_state);
+ (acpi_handle)arg->common.
+ node, walk_state);
break;
case AML_BANK_FIELD_OP:
diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
index 3a26ddb..e333869 100644
--- a/drivers/acpi/acpica/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c
@@ -143,8 +143,8 @@
******************************************************************************/
acpi_status
-acpi_ds_result_push(union acpi_operand_object * object,
- struct acpi_walk_state * walk_state)
+acpi_ds_result_push(union acpi_operand_object *object,
+ struct acpi_walk_state *walk_state)
{
union acpi_generic_state *state;
acpi_status status;
@@ -307,7 +307,7 @@
******************************************************************************/
acpi_status
-acpi_ds_obj_stack_push(void *object, struct acpi_walk_state * walk_state)
+acpi_ds_obj_stack_push(void *object, struct acpi_walk_state *walk_state)
{
ACPI_FUNCTION_NAME(ds_obj_stack_push);
@@ -354,7 +354,7 @@
******************************************************************************/
acpi_status
-acpi_ds_obj_stack_pop(u32 pop_count, struct acpi_walk_state * walk_state)
+acpi_ds_obj_stack_pop(u32 pop_count, struct acpi_walk_state *walk_state)
{
u32 i;
@@ -411,7 +411,7 @@
return;
}
- for (i = (s32) pop_count - 1; i >= 0; i--) {
+ for (i = (s32)pop_count - 1; i >= 0; i--) {
if (walk_state->num_operands == 0) {
return;
}
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index b47e62aaf..4b4949c 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -440,7 +440,7 @@
gpe_event_info =
&gpe_block->
- event_info[((acpi_size) i *
+ event_info[((acpi_size)i *
ACPI_GPE_REGISTER_WIDTH) + j];
gpe_number =
j + gpe_register_info->base_gpe_number;
@@ -652,7 +652,7 @@
*
******************************************************************************/
-acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info * gpe_event_info)
+acpi_status acpi_ev_finish_gpe(struct acpi_gpe_event_info *gpe_event_info)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 447fa1c..d54014c 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -211,7 +211,7 @@
/* Allocate the GPE register information block */
- gpe_register_info = ACPI_ALLOCATE_ZEROED((acpi_size) gpe_block->
+ gpe_register_info = ACPI_ALLOCATE_ZEROED((acpi_size)gpe_block->
register_count *
sizeof(struct
acpi_gpe_register_info));
@@ -225,7 +225,7 @@
* Allocate the GPE event_info block. There are eight distinct GPEs
* per register. Initialization to zeros is sufficient.
*/
- gpe_event_info = ACPI_ALLOCATE_ZEROED((acpi_size) gpe_block->gpe_count *
+ gpe_event_info = ACPI_ALLOCATE_ZEROED((acpi_size)gpe_block->gpe_count *
sizeof(struct
acpi_gpe_event_info));
if (!gpe_event_info) {
diff --git a/drivers/acpi/acpica/evgpeutil.c b/drivers/acpi/acpica/evgpeutil.c
index 66c4b5b..3f150d5 100644
--- a/drivers/acpi/acpica/evgpeutil.c
+++ b/drivers/acpi/acpica/evgpeutil.c
@@ -163,7 +163,7 @@
acpi_status
acpi_ev_get_gpe_xrupt_block(u32 interrupt_number,
- struct acpi_gpe_xrupt_info ** gpe_xrupt_block)
+ struct acpi_gpe_xrupt_info **gpe_xrupt_block)
{
struct acpi_gpe_xrupt_info *next_gpe_xrupt;
struct acpi_gpe_xrupt_info *gpe_xrupt;
@@ -320,7 +320,7 @@
/* Now look at the individual GPEs in this byte register */
for (j = 0; j < ACPI_GPE_REGISTER_WIDTH; j++) {
- gpe_event_info = &gpe_block->event_info[((acpi_size) i *
+ gpe_event_info = &gpe_block->event_info[((acpi_size)i *
ACPI_GPE_REGISTER_WIDTH)
+ j];
diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c
index 0f6be89..24768ca 100644
--- a/drivers/acpi/acpica/evhandler.c
+++ b/drivers/acpi/acpica/evhandler.c
@@ -359,7 +359,7 @@
******************************************************************************/
acpi_status
-acpi_ev_install_space_handler(struct acpi_namespace_node * node,
+acpi_ev_install_space_handler(struct acpi_namespace_node *node,
acpi_adr_space_type space_id,
acpi_adr_space_handler handler,
acpi_adr_space_setup setup, void *context)
diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index c67d78c..f51d43a 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -99,8 +99,7 @@
******************************************************************************/
acpi_status
-acpi_ev_queue_notify_request(struct acpi_namespace_node * node,
- u32 notify_value)
+acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value)
{
union acpi_operand_object *obj_desc;
union acpi_operand_object *handler_list_head = NULL;
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index 63924d1..4c6f795 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -526,52 +526,6 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ev_associate_reg_method
- *
- * PARAMETERS: region_obj - Region object
- *
- * RETURN: Status
- *
- * DESCRIPTION: Find and associate _REG method to a region
- *
- ******************************************************************************/
-
-void acpi_ev_associate_reg_method(union acpi_operand_object *region_obj)
-{
- acpi_name *reg_name_ptr = (acpi_name *) METHOD_NAME__REG;
- struct acpi_namespace_node *method_node;
- struct acpi_namespace_node *node;
- union acpi_operand_object *region_obj2;
- acpi_status status;
-
- ACPI_FUNCTION_TRACE(ev_associate_reg_method);
-
- region_obj2 = acpi_ns_get_secondary_object(region_obj);
- if (!region_obj2) {
- return_VOID;
- }
-
- node = region_obj->region.node->parent;
-
- /* Find any "_REG" method associated with this region definition */
-
- status =
- acpi_ns_search_one_scope(*reg_name_ptr, node, ACPI_TYPE_METHOD,
- &method_node);
- if (ACPI_SUCCESS(status)) {
- /*
- * The _REG method is optional and there can be only one per region
- * definition. This will be executed when the handler is attached
- * or removed
- */
- region_obj2->extra.method_REG = method_node;
- }
-
- return_VOID;
-}
-
-/*******************************************************************************
- *
* FUNCTION: acpi_ev_execute_reg_method
*
* PARAMETERS: region_obj - Region object
@@ -589,18 +543,42 @@
struct acpi_evaluate_info *info;
union acpi_operand_object *args[3];
union acpi_operand_object *region_obj2;
+ const acpi_name *reg_name_ptr =
+ ACPI_CAST_PTR(acpi_name, METHOD_NAME__REG);
+ struct acpi_namespace_node *method_node;
+ struct acpi_namespace_node *node;
acpi_status status;
ACPI_FUNCTION_TRACE(ev_execute_reg_method);
+ if (!acpi_gbl_namespace_initialized ||
+ region_obj->region.handler == NULL) {
+ return_ACPI_STATUS(AE_OK);
+ }
+
region_obj2 = acpi_ns_get_secondary_object(region_obj);
if (!region_obj2) {
return_ACPI_STATUS(AE_NOT_EXIST);
}
- if (region_obj2->extra.method_REG == NULL ||
- region_obj->region.handler == NULL ||
- !acpi_gbl_namespace_initialized) {
+ /*
+ * Find any "_REG" method associated with this region definition.
+ * The method should always be updated as this function may be
+ * invoked after a namespace change.
+ */
+ node = region_obj->region.node->parent;
+ status =
+ acpi_ns_search_one_scope(*reg_name_ptr, node, ACPI_TYPE_METHOD,
+ &method_node);
+ if (ACPI_SUCCESS(status)) {
+ /*
+ * The _REG method is optional and there can be only one per
+ * region definition. This will be executed when the handler is
+ * attached or removed.
+ */
+ region_obj2->extra.method_REG = method_node;
+ }
+ if (region_obj2->extra.method_REG == NULL) {
return_ACPI_STATUS(AE_OK);
}
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index fda869c..b6ea9c0 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -227,7 +227,7 @@
/* Install a handler for this PCI root bridge */
- status = acpi_install_address_space_handler((acpi_handle) pci_root_node, ACPI_ADR_SPACE_PCI_CONFIG, ACPI_DEFAULT_HANDLER, NULL, NULL);
+ status = acpi_install_address_space_handler((acpi_handle)pci_root_node, ACPI_ADR_SPACE_PCI_CONFIG, ACPI_DEFAULT_HANDLER, NULL, NULL);
if (ACPI_FAILURE(status)) {
if (status == AE_SAME_HANDLER) {
/*
@@ -518,7 +518,6 @@
return_ACPI_STATUS(AE_OK);
}
- acpi_ev_associate_reg_method(region_obj);
region_obj->common.flags |= AOPOBJ_OBJECT_INITIALIZED;
node = region_obj->region.node->parent;
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 9045671..17cfef7 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -917,7 +917,7 @@
* the FADT-defined gpe blocks. Otherwise, the GPE block device.
*
******************************************************************************/
-acpi_status acpi_get_gpe_device(u32 index, acpi_handle * gpe_device)
+acpi_status acpi_get_gpe_device(u32 index, acpi_handle *gpe_device)
{
struct acpi_gpe_device_info info;
acpi_status status;
diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
new file mode 100644
index 0000000..2423fe0
--- /dev/null
+++ b/drivers/acpi/acpica/exconcat.c
@@ -0,0 +1,439 @@
+/******************************************************************************
+ *
+ * Module Name: exconcat - Concatenate-type AML operators
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acinterp.h"
+#include "amlresrc.h"
+
+#define _COMPONENT ACPI_EXECUTER
+ACPI_MODULE_NAME("exconcat")
+
+/* Local Prototypes */
+static acpi_status
+acpi_ex_convert_to_object_type_string(union acpi_operand_object *obj_desc,
+ union acpi_operand_object **result_desc);
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ex_do_concatenate
+ *
+ * PARAMETERS: operand0 - First source object
+ * operand1 - Second source object
+ * actual_return_desc - Where to place the return object
+ * walk_state - Current walk state
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Concatenate two objects with the ACPI-defined conversion
+ * rules as necessary.
+ * NOTE:
+ * Per the ACPI spec (up to 6.1), Concatenate only supports Integer,
+ * String, and Buffer objects. However, we support all objects here
+ * as an extension. This improves the usefulness of both Concatenate
+ * and the Printf/Fprintf macros. The extension returns a string
+ * describing the object type for the other objects.
+ * 02/2016.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ex_do_concatenate(union acpi_operand_object *operand0,
+ union acpi_operand_object *operand1,
+ union acpi_operand_object **actual_return_desc,
+ struct acpi_walk_state *walk_state)
+{
+ union acpi_operand_object *local_operand0 = operand0;
+ union acpi_operand_object *local_operand1 = operand1;
+ union acpi_operand_object *temp_operand1 = NULL;
+ union acpi_operand_object *return_desc;
+ char *buffer;
+ acpi_object_type operand0_type;
+ acpi_object_type operand1_type;
+ acpi_status status;
+
+ ACPI_FUNCTION_TRACE(ex_do_concatenate);
+
+ /* Operand 0 preprocessing */
+
+ switch (operand0->common.type) {
+ case ACPI_TYPE_INTEGER:
+ case ACPI_TYPE_STRING:
+ case ACPI_TYPE_BUFFER:
+
+ operand0_type = operand0->common.type;
+ break;
+
+ default:
+
+ /* For all other types, get the "object type" string */
+
+ status =
+ acpi_ex_convert_to_object_type_string(operand0,
+ &local_operand0);
+ if (ACPI_FAILURE(status)) {
+ goto cleanup;
+ }
+
+ operand0_type = ACPI_TYPE_STRING;
+ break;
+ }
+
+ /* Operand 1 preprocessing */
+
+ switch (operand1->common.type) {
+ case ACPI_TYPE_INTEGER:
+ case ACPI_TYPE_STRING:
+ case ACPI_TYPE_BUFFER:
+
+ operand1_type = operand1->common.type;
+ break;
+
+ default:
+
+ /* For all other types, get the "object type" string */
+
+ status =
+ acpi_ex_convert_to_object_type_string(operand1,
+ &local_operand1);
+ if (ACPI_FAILURE(status)) {
+ goto cleanup;
+ }
+
+ operand1_type = ACPI_TYPE_STRING;
+ break;
+ }
+
+ /*
+ * Convert the second operand if necessary. The first operand (0)
+ * determines the type of the second operand (1) (See the Data Types
+ * section of the ACPI specification). Both object types are
+ * guaranteed to be either Integer/String/Buffer by the operand
+ * resolution mechanism.
+ */
+ switch (operand0_type) {
+ case ACPI_TYPE_INTEGER:
+
+ status =
+ acpi_ex_convert_to_integer(local_operand1, &temp_operand1,
+ 16);
+ break;
+
+ case ACPI_TYPE_BUFFER:
+
+ status =
+ acpi_ex_convert_to_buffer(local_operand1, &temp_operand1);
+ break;
+
+ case ACPI_TYPE_STRING:
+
+ switch (operand1_type) {
+ case ACPI_TYPE_INTEGER:
+ case ACPI_TYPE_STRING:
+ case ACPI_TYPE_BUFFER:
+
+ /* Other types have already been converted to string */
+
+ status =
+ acpi_ex_convert_to_string(local_operand1,
+ &temp_operand1,
+ ACPI_IMPLICIT_CONVERT_HEX);
+ break;
+
+ default:
+
+ status = AE_OK;
+ break;
+ }
+ break;
+
+ default:
+
+ ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
+ operand0->common.type));
+ status = AE_AML_INTERNAL;
+ }
+
+ if (ACPI_FAILURE(status)) {
+ goto cleanup;
+ }
+
+ /* Take care with any newly created operand objects */
+
+ if ((local_operand1 != operand1) && (local_operand1 != temp_operand1)) {
+ acpi_ut_remove_reference(local_operand1);
+ }
+
+ local_operand1 = temp_operand1;
+
+ /*
+ * Both operands are now known to be the same object type
+ * (Both are Integer, String, or Buffer), and we can now perform
+ * the concatenation.
+ *
+ * There are three cases to handle, as per the ACPI spec:
+ *
+ * 1) Two Integers concatenated to produce a new Buffer
+ * 2) Two Strings concatenated to produce a new String
+ * 3) Two Buffers concatenated to produce a new Buffer
+ */
+ switch (operand0_type) {
+ case ACPI_TYPE_INTEGER:
+
+ /* Result of two Integers is a Buffer */
+ /* Need enough buffer space for two integers */
+
+ return_desc = acpi_ut_create_buffer_object((acpi_size)
+ ACPI_MUL_2
+ (acpi_gbl_integer_byte_width));
+ if (!return_desc) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ buffer = (char *)return_desc->buffer.pointer;
+
+ /* Copy the first integer, LSB first */
+
+ memcpy(buffer, &operand0->integer.value,
+ acpi_gbl_integer_byte_width);
+
+ /* Copy the second integer (LSB first) after the first */
+
+ memcpy(buffer + acpi_gbl_integer_byte_width,
+ &local_operand1->integer.value,
+ acpi_gbl_integer_byte_width);
+ break;
+
+ case ACPI_TYPE_STRING:
+
+ /* Result of two Strings is a String */
+
+ return_desc = acpi_ut_create_string_object(((acpi_size)
+ local_operand0->
+ string.length +
+ local_operand1->
+ string.length));
+ if (!return_desc) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ buffer = return_desc->string.pointer;
+
+ /* Concatenate the strings */
+
+ strcpy(buffer, local_operand0->string.pointer);
+ strcat(buffer, local_operand1->string.pointer);
+ break;
+
+ case ACPI_TYPE_BUFFER:
+
+ /* Result of two Buffers is a Buffer */
+
+ return_desc = acpi_ut_create_buffer_object(((acpi_size)
+ operand0->buffer.
+ length +
+ local_operand1->
+ buffer.length));
+ if (!return_desc) {
+ status = AE_NO_MEMORY;
+ goto cleanup;
+ }
+
+ buffer = (char *)return_desc->buffer.pointer;
+
+ /* Concatenate the buffers */
+
+ memcpy(buffer, operand0->buffer.pointer,
+ operand0->buffer.length);
+ memcpy(buffer + operand0->buffer.length,
+ local_operand1->buffer.pointer,
+ local_operand1->buffer.length);
+ break;
+
+ default:
+
+ /* Invalid object type, should not happen here */
+
+ ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
+ operand0->common.type));
+ status = AE_AML_INTERNAL;
+ goto cleanup;
+ }
+
+ *actual_return_desc = return_desc;
+
+cleanup:
+ if (local_operand0 != operand0) {
+ acpi_ut_remove_reference(local_operand0);
+ }
+
+ if (local_operand1 != operand1) {
+ acpi_ut_remove_reference(local_operand1);
+ }
+
+ return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ex_convert_to_object_type_string
+ *
+ * PARAMETERS: obj_desc - Object to be converted
+ * return_desc - Where to place the return object
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Convert an object of arbitrary type to a string object that
+ * contains the namestring for the object. Used for the
+ * concatenate operator.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_ex_convert_to_object_type_string(union acpi_operand_object *obj_desc,
+ union acpi_operand_object **result_desc)
+{
+ union acpi_operand_object *return_desc;
+ const char *type_string;
+
+ type_string = acpi_ut_get_type_name(obj_desc->common.type);
+
+ return_desc = acpi_ut_create_string_object(((acpi_size)strlen(type_string) + 9)); /* 9 For "[ Object]" */
+ if (!return_desc) {
+ return (AE_NO_MEMORY);
+ }
+
+ strcpy(return_desc->string.pointer, "[");
+ strcat(return_desc->string.pointer, type_string);
+ strcat(return_desc->string.pointer, " Object]");
+
+ *result_desc = return_desc;
+ return (AE_OK);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ex_concat_template
+ *
+ * PARAMETERS: operand0 - First source object
+ * operand1 - Second source object
+ * actual_return_desc - Where to place the return object
+ * walk_state - Current walk state
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Concatenate two resource templates
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ex_concat_template(union acpi_operand_object *operand0,
+ union acpi_operand_object *operand1,
+ union acpi_operand_object **actual_return_desc,
+ struct acpi_walk_state *walk_state)
+{
+ acpi_status status;
+ union acpi_operand_object *return_desc;
+ u8 *new_buf;
+ u8 *end_tag;
+ acpi_size length0;
+ acpi_size length1;
+ acpi_size new_length;
+
+ ACPI_FUNCTION_TRACE(ex_concat_template);
+
+ /*
+ * Find the end_tag descriptor in each resource template.
+ * Note1: returned pointers point TO the end_tag, not past it.
+ * Note2: zero-length buffers are allowed; treated like one end_tag
+ */
+
+ /* Get the length of the first resource template */
+
+ status = acpi_ut_get_resource_end_tag(operand0, &end_tag);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ length0 = ACPI_PTR_DIFF(end_tag, operand0->buffer.pointer);
+
+ /* Get the length of the second resource template */
+
+ status = acpi_ut_get_resource_end_tag(operand1, &end_tag);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ length1 = ACPI_PTR_DIFF(end_tag, operand1->buffer.pointer);
+
+ /* Combine both lengths, minimum size will be 2 for end_tag */
+
+ new_length = length0 + length1 + sizeof(struct aml_resource_end_tag);
+
+ /* Create a new buffer object for the result (with one end_tag) */
+
+ return_desc = acpi_ut_create_buffer_object(new_length);
+ if (!return_desc) {
+ return_ACPI_STATUS(AE_NO_MEMORY);
+ }
+
+ /*
+ * Copy the templates to the new buffer, 0 first, then 1 follows. One
+ * end_tag descriptor is copied from Operand1.
+ */
+ new_buf = return_desc->buffer.pointer;
+ memcpy(new_buf, operand0->buffer.pointer, length0);
+ memcpy(new_buf + length0, operand1->buffer.pointer, length1);
+
+ /* Insert end_tag and set the checksum to zero, means "ignore checksum" */
+
+ new_buf[new_length - 1] = 0;
+ new_buf[new_length - 2] = ACPI_RESOURCE_NAME_END_TAG | 1;
+
+ /* Return the completed resource template */
+
+ *actual_return_desc = return_desc;
+ return_ACPI_STATUS(AE_OK);
+}
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index f741613..a1d177d 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -118,7 +118,9 @@
/* Execute any module-level code that was found in the table */
acpi_ex_exit_interpreter();
- acpi_ns_exec_module_code_list();
+ if (acpi_gbl_group_module_level_code) {
+ acpi_ns_exec_module_code_list();
+ }
acpi_ex_enter_interpreter();
/*
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index 0b9f2c1..b7e9b3d 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -124,7 +124,9 @@
* of ACPI 3.0) is that the to_integer() operator allows both decimal
* and hexadecimal strings (hex prefixed with "0x").
*/
- status = acpi_ut_strtoul64((char *)pointer, flags, &result);
+ status = acpi_ut_strtoul64((char *)pointer, flags,
+ acpi_gbl_integer_byte_width,
+ &result);
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
@@ -439,7 +441,7 @@
* Need enough space for one ASCII integer (plus null terminator)
*/
return_desc =
- acpi_ut_create_string_object((acpi_size) string_length);
+ acpi_ut_create_string_object((acpi_size)string_length);
if (!return_desc) {
return_ACPI_STATUS(AE_NO_MEMORY);
}
@@ -518,7 +520,7 @@
}
return_desc =
- acpi_ut_create_string_object((acpi_size) string_length);
+ acpi_ut_create_string_object((acpi_size)string_length);
if (!return_desc) {
return_ACPI_STATUS(AE_NO_MEMORY);
}
diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index bea9612..613ba6e 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c
@@ -394,7 +394,7 @@
obj_desc->processor.proc_id = (u8) operand[1]->integer.value;
obj_desc->processor.length = (u8) operand[3]->integer.value;
obj_desc->processor.address =
- (acpi_io_address) operand[2]->integer.value;
+ (acpi_io_address)operand[2]->integer.value;
/* Install the processor object in the parent Node */
diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index ee30974..fce6b2e 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -55,9 +55,9 @@
*/
#if defined(ACPI_DEBUG_OUTPUT) || defined(ACPI_DEBUGGER)
/* Local prototypes */
-static void acpi_ex_out_string(char *title, char *value);
+static void acpi_ex_out_string(const char *title, const char *value);
-static void acpi_ex_out_pointer(char *title, void *value);
+static void acpi_ex_out_pointer(const char *title, const void *value);
static void
acpi_ex_dump_object(union acpi_operand_object *obj_desc,
@@ -365,8 +365,7 @@
struct acpi_exdump_info *info)
{
u8 *target;
- char *name;
- const char *reference_name;
+ const char *name;
u8 count;
union acpi_operand_object *start;
union acpi_operand_object *data = NULL;
@@ -459,9 +458,9 @@
case ACPI_EXD_REFERENCE:
- reference_name = acpi_ut_get_reference_name(obj_desc);
acpi_ex_out_string("Class Name",
- ACPI_CAST_PTR(char, reference_name));
+ acpi_ut_get_reference_name
+ (obj_desc));
acpi_ex_dump_reference_obj(obj_desc);
break;
@@ -934,12 +933,12 @@
*
******************************************************************************/
-static void acpi_ex_out_string(char *title, char *value)
+static void acpi_ex_out_string(const char *title, const char *value)
{
acpi_os_printf("%20s : %s\n", title, value);
}
-static void acpi_ex_out_pointer(char *title, void *value)
+static void acpi_ex_out_pointer(const char *title, const void *value)
{
acpi_os_printf("%20s : %p\n", title, value);
}
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index d5d8020..d7d3ee3 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -126,7 +126,7 @@
******************************************************************************/
acpi_status
-acpi_ex_read_data_from_field(struct acpi_walk_state * walk_state,
+acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
union acpi_operand_object *obj_desc,
union acpi_operand_object **ret_buffer_desc)
{
@@ -233,7 +233,7 @@
* Note: Field.length is in bits.
*/
length =
- (acpi_size) ACPI_ROUND_BITS_UP_TO_BYTES(obj_desc->field.bit_length);
+ (acpi_size)ACPI_ROUND_BITS_UP_TO_BYTES(obj_desc->field.bit_length);
if (length > acpi_gbl_integer_byte_width) {
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index f0c5ed0..ee76d29 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -164,7 +164,7 @@
if (ACPI_ROUND_UP(rgn_desc->region.length,
obj_desc->common_field.
access_byte_width) >=
- ((acpi_size) obj_desc->common_field.
+ ((acpi_size)obj_desc->common_field.
base_byte_offset +
obj_desc->common_field.access_byte_width +
field_datum_byte_offset)) {
@@ -897,17 +897,9 @@
access_bit_width = ACPI_MUL_8(obj_desc->common_field.access_byte_width);
- /*
- * Create the bitmasks used for bit insertion.
- * Note: This if/else is used to bypass compiler differences with the
- * shift operator
- */
- if (access_bit_width == ACPI_INTEGER_BIT_SIZE) {
- width_mask = ACPI_UINT64_MAX;
- } else {
- width_mask = ACPI_MASK_BITS_ABOVE(access_bit_width);
- }
+ /* Create the bitmasks used for bit insertion */
+ width_mask = ACPI_MASK_BITS_ABOVE_64(access_bit_width);
mask = width_mask &
ACPI_MASK_BITS_BELOW(obj_desc->common_field.start_field_bit_offset);
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index db30ae4..4f7e667 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -45,7 +45,6 @@
#include "accommon.h"
#include "acinterp.h"
#include "amlcode.h"
-#include "amlresrc.h"
#define _COMPONENT ACPI_EXECUTER
ACPI_MODULE_NAME("exmisc")
@@ -140,295 +139,6 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ex_concat_template
- *
- * PARAMETERS: operand0 - First source object
- * operand1 - Second source object
- * actual_return_desc - Where to place the return object
- * walk_state - Current walk state
- *
- * RETURN: Status
- *
- * DESCRIPTION: Concatenate two resource templates
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ex_concat_template(union acpi_operand_object *operand0,
- union acpi_operand_object *operand1,
- union acpi_operand_object **actual_return_desc,
- struct acpi_walk_state *walk_state)
-{
- acpi_status status;
- union acpi_operand_object *return_desc;
- u8 *new_buf;
- u8 *end_tag;
- acpi_size length0;
- acpi_size length1;
- acpi_size new_length;
-
- ACPI_FUNCTION_TRACE(ex_concat_template);
-
- /*
- * Find the end_tag descriptor in each resource template.
- * Note1: returned pointers point TO the end_tag, not past it.
- * Note2: zero-length buffers are allowed; treated like one end_tag
- */
-
- /* Get the length of the first resource template */
-
- status = acpi_ut_get_resource_end_tag(operand0, &end_tag);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
-
- length0 = ACPI_PTR_DIFF(end_tag, operand0->buffer.pointer);
-
- /* Get the length of the second resource template */
-
- status = acpi_ut_get_resource_end_tag(operand1, &end_tag);
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
-
- length1 = ACPI_PTR_DIFF(end_tag, operand1->buffer.pointer);
-
- /* Combine both lengths, minimum size will be 2 for end_tag */
-
- new_length = length0 + length1 + sizeof(struct aml_resource_end_tag);
-
- /* Create a new buffer object for the result (with one end_tag) */
-
- return_desc = acpi_ut_create_buffer_object(new_length);
- if (!return_desc) {
- return_ACPI_STATUS(AE_NO_MEMORY);
- }
-
- /*
- * Copy the templates to the new buffer, 0 first, then 1 follows. One
- * end_tag descriptor is copied from Operand1.
- */
- new_buf = return_desc->buffer.pointer;
- memcpy(new_buf, operand0->buffer.pointer, length0);
- memcpy(new_buf + length0, operand1->buffer.pointer, length1);
-
- /* Insert end_tag and set the checksum to zero, means "ignore checksum" */
-
- new_buf[new_length - 1] = 0;
- new_buf[new_length - 2] = ACPI_RESOURCE_NAME_END_TAG | 1;
-
- /* Return the completed resource template */
-
- *actual_return_desc = return_desc;
- return_ACPI_STATUS(AE_OK);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_ex_do_concatenate
- *
- * PARAMETERS: operand0 - First source object
- * operand1 - Second source object
- * actual_return_desc - Where to place the return object
- * walk_state - Current walk state
- *
- * RETURN: Status
- *
- * DESCRIPTION: Concatenate two objects OF THE SAME TYPE.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ex_do_concatenate(union acpi_operand_object *operand0,
- union acpi_operand_object *operand1,
- union acpi_operand_object **actual_return_desc,
- struct acpi_walk_state *walk_state)
-{
- union acpi_operand_object *local_operand1 = operand1;
- union acpi_operand_object *return_desc;
- char *new_buf;
- const char *type_string;
- acpi_status status;
-
- ACPI_FUNCTION_TRACE(ex_do_concatenate);
-
- /*
- * Convert the second operand if necessary. The first operand
- * determines the type of the second operand, (See the Data Types
- * section of the ACPI specification.) Both object types are
- * guaranteed to be either Integer/String/Buffer by the operand
- * resolution mechanism.
- */
- switch (operand0->common.type) {
- case ACPI_TYPE_INTEGER:
-
- status =
- acpi_ex_convert_to_integer(operand1, &local_operand1, 16);
- break;
-
- case ACPI_TYPE_STRING:
- /*
- * Per the ACPI spec, Concatenate only supports int/str/buf.
- * However, we support all objects here as an extension.
- * This improves the usefulness of the Printf() macro.
- * 12/2015.
- */
- switch (operand1->common.type) {
- case ACPI_TYPE_INTEGER:
- case ACPI_TYPE_STRING:
- case ACPI_TYPE_BUFFER:
-
- status =
- acpi_ex_convert_to_string(operand1, &local_operand1,
- ACPI_IMPLICIT_CONVERT_HEX);
- break;
-
- default:
- /*
- * Just emit a string containing the object type.
- */
- type_string =
- acpi_ut_get_type_name(operand1->common.type);
-
- local_operand1 = acpi_ut_create_string_object(((acpi_size) strlen(type_string) + 9)); /* 9 For "[Object]" */
- if (!local_operand1) {
- status = AE_NO_MEMORY;
- goto cleanup;
- }
-
- strcpy(local_operand1->string.pointer, "[");
- strcat(local_operand1->string.pointer, type_string);
- strcat(local_operand1->string.pointer, " Object]");
- status = AE_OK;
- break;
- }
- break;
-
- case ACPI_TYPE_BUFFER:
-
- status = acpi_ex_convert_to_buffer(operand1, &local_operand1);
- break;
-
- default:
-
- ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
- operand0->common.type));
- status = AE_AML_INTERNAL;
- }
-
- if (ACPI_FAILURE(status)) {
- goto cleanup;
- }
-
- /*
- * Both operands are now known to be the same object type
- * (Both are Integer, String, or Buffer), and we can now perform the
- * concatenation.
- */
-
- /*
- * There are three cases to handle:
- *
- * 1) Two Integers concatenated to produce a new Buffer
- * 2) Two Strings concatenated to produce a new String
- * 3) Two Buffers concatenated to produce a new Buffer
- */
- switch (operand0->common.type) {
- case ACPI_TYPE_INTEGER:
-
- /* Result of two Integers is a Buffer */
- /* Need enough buffer space for two integers */
-
- return_desc = acpi_ut_create_buffer_object((acpi_size)
- ACPI_MUL_2
- (acpi_gbl_integer_byte_width));
- if (!return_desc) {
- status = AE_NO_MEMORY;
- goto cleanup;
- }
-
- new_buf = (char *)return_desc->buffer.pointer;
-
- /* Copy the first integer, LSB first */
-
- memcpy(new_buf, &operand0->integer.value,
- acpi_gbl_integer_byte_width);
-
- /* Copy the second integer (LSB first) after the first */
-
- memcpy(new_buf + acpi_gbl_integer_byte_width,
- &local_operand1->integer.value,
- acpi_gbl_integer_byte_width);
- break;
-
- case ACPI_TYPE_STRING:
-
- /* Result of two Strings is a String */
-
- return_desc = acpi_ut_create_string_object(((acpi_size)
- operand0->string.
- length +
- local_operand1->
- string.length));
- if (!return_desc) {
- status = AE_NO_MEMORY;
- goto cleanup;
- }
-
- new_buf = return_desc->string.pointer;
-
- /* Concatenate the strings */
-
- strcpy(new_buf, operand0->string.pointer);
- strcat(new_buf, local_operand1->string.pointer);
- break;
-
- case ACPI_TYPE_BUFFER:
-
- /* Result of two Buffers is a Buffer */
-
- return_desc = acpi_ut_create_buffer_object(((acpi_size)
- operand0->buffer.
- length +
- local_operand1->
- buffer.length));
- if (!return_desc) {
- status = AE_NO_MEMORY;
- goto cleanup;
- }
-
- new_buf = (char *)return_desc->buffer.pointer;
-
- /* Concatenate the buffers */
-
- memcpy(new_buf, operand0->buffer.pointer,
- operand0->buffer.length);
- memcpy(new_buf + operand0->buffer.length,
- local_operand1->buffer.pointer,
- local_operand1->buffer.length);
- break;
-
- default:
-
- /* Invalid object type, should not happen here */
-
- ACPI_ERROR((AE_INFO, "Invalid object type: 0x%X",
- operand0->common.type));
- status = AE_AML_INTERNAL;
- goto cleanup;
- }
-
- *actual_return_desc = return_desc;
-
-cleanup:
- if (local_operand1 != operand1) {
- acpi_ut_remove_reference(local_operand1);
- }
- return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
* FUNCTION: acpi_ex_do_math_op
*
* PARAMETERS: opcode - AML opcode
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index 27c11ab..3d6af93 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -178,7 +178,7 @@
for (index = 0;
(index < ACPI_NAME_SIZE)
- && (acpi_ut_valid_acpi_char(*aml_address, 0)); index++) {
+ && (acpi_ut_valid_name_char(*aml_address, 0)); index++) {
char_buf[index] = *aml_address++;
ACPI_DEBUG_PRINT((ACPI_DB_LOAD, "%c\n", char_buf[index]));
}
diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index 5aa21c4..69e4e26 100644
--- a/drivers/acpi/acpica/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c
@@ -184,7 +184,7 @@
/* Get the Integer values from the objects */
index = operand[1]->integer.value;
- length = (acpi_size) operand[2]->integer.value;
+ length = (acpi_size)operand[2]->integer.value;
/*
* If the index is beyond the length of the String/Buffer, or if the
@@ -198,8 +198,8 @@
else if ((index + length) > operand[0]->string.length) {
length =
- (acpi_size) operand[0]->string.length -
- (acpi_size) index;
+ (acpi_size)operand[0]->string.length -
+ (acpi_size)index;
}
/* Strings always have a sub-pointer, not so for buffers */
@@ -209,7 +209,7 @@
/* Always allocate a new buffer for the String */
- buffer = ACPI_ALLOCATE_ZEROED((acpi_size) length + 1);
+ buffer = ACPI_ALLOCATE_ZEROED((acpi_size)length + 1);
if (!buffer) {
status = AE_NO_MEMORY;
goto cleanup;
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index e2b6348..786d53b 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -207,7 +207,7 @@
*
******************************************************************************/
-acpi_status acpi_ex_opcode_6A_0T_1R(struct acpi_walk_state * walk_state)
+acpi_status acpi_ex_opcode_6A_0T_1R(struct acpi_walk_state *walk_state)
{
union acpi_operand_object **operand = &walk_state->operands[0];
union acpi_operand_object *return_desc = NULL;
diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index 076074d..31b381c 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -325,15 +325,15 @@
switch (function) {
case ACPI_READ:
- status = acpi_hw_read_port((acpi_io_address) address,
+ status = acpi_hw_read_port((acpi_io_address)address,
&value32, bit_width);
*value = value32;
break;
case ACPI_WRITE:
- status = acpi_hw_write_port((acpi_io_address) address,
- (u32) * value, bit_width);
+ status = acpi_hw_write_port((acpi_io_address)address,
+ (u32)*value, bit_width);
break;
default:
diff --git a/drivers/acpi/acpica/exresnte.c b/drivers/acpi/acpica/exresnte.c
index c1e8bfb..a183cb7 100644
--- a/drivers/acpi/acpica/exresnte.c
+++ b/drivers/acpi/acpica/exresnte.c
@@ -93,7 +93,7 @@
*/
node = *object_ptr;
source_desc = acpi_ns_get_attached_object(node);
- entry_type = acpi_ns_get_type((acpi_handle) node);
+ entry_type = acpi_ns_get_type((acpi_handle)node);
ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Entry=%p SourceDesc=%p [%s]\n",
node, source_desc,
@@ -106,7 +106,7 @@
node = ACPI_CAST_PTR(struct acpi_namespace_node, node->object);
source_desc = acpi_ns_get_attached_object(node);
- entry_type = acpi_ns_get_type((acpi_handle) node);
+ entry_type = acpi_ns_get_type((acpi_handle)node);
*object_ptr = node;
}
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index fedacf1..e1d3878 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -334,7 +334,7 @@
acpi_status
acpi_ex_resolve_multiple(struct acpi_walk_state *walk_state,
union acpi_operand_object *operand,
- acpi_object_type * return_type,
+ acpi_object_type *return_type,
union acpi_operand_object **return_desc)
{
union acpi_operand_object *obj_desc = ACPI_CAST_PTR(void, operand);
diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index cc2c26c..27b41fd 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -131,8 +131,8 @@
acpi_status
acpi_ex_resolve_operands(u16 opcode,
- union acpi_operand_object ** stack_ptr,
- struct acpi_walk_state * walk_state)
+ union acpi_operand_object **stack_ptr,
+ struct acpi_walk_state *walk_state)
{
union acpi_operand_object *obj_desc;
acpi_status status = AE_OK;
diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c
index 28b7248..1dab827 100644
--- a/drivers/acpi/acpica/exstorob.c
+++ b/drivers/acpi/acpica/exstorob.c
@@ -188,7 +188,7 @@
* Clear old string and copy in the new one
*/
memset(target_desc->string.pointer, 0,
- (acpi_size) target_desc->string.length + 1);
+ (acpi_size)target_desc->string.length + 1);
memcpy(target_desc->string.pointer, buffer, length);
} else {
/*
@@ -204,7 +204,7 @@
}
target_desc->string.pointer =
- ACPI_ALLOCATE_ZEROED((acpi_size) length + 1);
+ ACPI_ALLOCATE_ZEROED((acpi_size)length + 1);
if (!target_desc->string.pointer) {
return_ACPI_STATUS(AE_NO_MEMORY);
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 4d44bc1..425f133 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -301,8 +301,8 @@
*
* FUNCTION: acpi_ex_eisa_id_to_string
*
- * PARAMETERS: compressed_id - EISAID to be converted
- * out_string - Where to put the converted string (8 bytes)
+ * PARAMETERS: out_string - Where to put the converted string (8 bytes)
+ * compressed_id - EISAID to be converted
*
* RETURN: None
*
@@ -354,7 +354,7 @@
* possible 64-bit integer.
* value - Value to be converted
*
- * RETURN: None, string
+ * RETURN: Converted string in out_string
*
* DESCRIPTION: Convert a 64-bit integer to decimal string representation.
* Assumes string buffer is large enough to hold the string. The
@@ -384,9 +384,9 @@
* FUNCTION: acpi_ex_pci_cls_to_string
*
* PARAMETERS: out_string - Where to put the converted string (7 bytes)
- * PARAMETERS: class_code - PCI class code to be converted (3 bytes)
+ * class_code - PCI class code to be converted (3 bytes)
*
- * RETURN: None
+ * RETURN: Converted string in out_string
*
* DESCRIPTION: Convert 3-bytes PCI class code to string representation.
* Return buffer must be large enough to hold the string. The
@@ -417,7 +417,7 @@
*
* PARAMETERS: space_id - ID to be validated
*
- * RETURN: TRUE if valid/supported ID.
+ * RETURN: TRUE if space_id is a valid/supported ID.
*
* DESCRIPTION: Validate an operation region space_ID.
*
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index 1c4f451..bdecd5e 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -166,7 +166,7 @@
*
******************************************************************************/
-acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
+acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info *gpe_event_info)
{
struct acpi_gpe_register_info *gpe_register_info;
acpi_status status;
@@ -206,7 +206,7 @@
******************************************************************************/
acpi_status
-acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
+acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
acpi_event_status *event_status)
{
u32 in_byte;
@@ -391,7 +391,7 @@
acpi_status
acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
- struct acpi_gpe_block_info * gpe_block,
+ struct acpi_gpe_block_info *gpe_block,
void *context)
{
u32 i;
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 5ba0498..0f18dbc 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -51,6 +51,10 @@
#if (!ACPI_REDUCED_HARDWARE)
/* Local Prototypes */
+static u8
+acpi_hw_get_access_bit_width(struct acpi_generic_address *reg,
+ u8 max_bit_width);
+
static acpi_status
acpi_hw_read_multiple(u32 *value,
struct acpi_generic_address *register_a,
@@ -65,6 +69,48 @@
/******************************************************************************
*
+ * FUNCTION: acpi_hw_get_access_bit_width
+ *
+ * PARAMETERS: reg - GAS register structure
+ * max_bit_width - Max bit_width supported (32 or 64)
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Obtain optimal access bit width
+ *
+ ******************************************************************************/
+
+static u8
+acpi_hw_get_access_bit_width(struct acpi_generic_address *reg, u8 max_bit_width)
+{
+ u64 address;
+
+ if (!reg->access_width) {
+ /*
+ * Detect old register descriptors where only the bit_width field
+ * makes senses. The target address is copied to handle possible
+ * alignment issues.
+ */
+ ACPI_MOVE_64_TO_64(&address, ®->address);
+ if (!reg->bit_offset && reg->bit_width &&
+ ACPI_IS_POWER_OF_TWO(reg->bit_width) &&
+ ACPI_IS_ALIGNED(reg->bit_width, 8) &&
+ ACPI_IS_ALIGNED(address, reg->bit_width)) {
+ return (reg->bit_width);
+ } else {
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+ return (32);
+ } else {
+ return (max_bit_width);
+ }
+ }
+ } else {
+ return (1 << (reg->access_width + 2));
+ }
+}
+
+/******************************************************************************
+ *
* FUNCTION: acpi_hw_validate_register
*
* PARAMETERS: reg - GAS register structure
@@ -83,6 +129,8 @@
acpi_hw_validate_register(struct acpi_generic_address *reg,
u8 max_bit_width, u64 *address)
{
+ u8 bit_width;
+ u8 access_width;
/* Must have a valid pointer to a GAS structure */
@@ -109,23 +157,25 @@
return (AE_SUPPORT);
}
- /* Validate the bit_width */
+ /* Validate the access_width */
- if ((reg->bit_width != 8) &&
- (reg->bit_width != 16) &&
- (reg->bit_width != 32) && (reg->bit_width != max_bit_width)) {
+ if (reg->access_width > 4) {
ACPI_ERROR((AE_INFO,
- "Unsupported register bit width: 0x%X",
- reg->bit_width));
+ "Unsupported register access width: 0x%X",
+ reg->access_width));
return (AE_SUPPORT);
}
- /* Validate the bit_offset. Just a warning for now. */
+ /* Validate the bit_width, convert access_width into number of bits */
- if (reg->bit_offset != 0) {
+ access_width = acpi_hw_get_access_bit_width(reg, max_bit_width);
+ bit_width =
+ ACPI_ROUND_UP(reg->bit_offset + reg->bit_width, access_width);
+ if (max_bit_width < bit_width) {
ACPI_WARNING((AE_INFO,
- "Unsupported register bit offset: 0x%X",
- reg->bit_offset));
+ "Requested bit width 0x%X is smaller than register bit width 0x%X",
+ max_bit_width, bit_width));
+ return (AE_SUPPORT);
}
return (AE_OK);
@@ -145,17 +195,19 @@
* 64-bit values is not needed.
*
* LIMITATIONS: <These limitations also apply to acpi_hw_write>
- * bit_width must be exactly 8, 16, or 32.
* space_ID must be system_memory or system_IO.
- * bit_offset and access_width are currently ignored, as there has
- * not been a need to implement these.
*
******************************************************************************/
acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
{
u64 address;
+ u8 access_width;
+ u32 bit_width;
+ u8 bit_offset;
u64 value64;
+ u32 value32;
+ u8 index;
acpi_status status;
ACPI_FUNCTION_NAME(hw_read);
@@ -167,28 +219,75 @@
return (status);
}
- /* Initialize entire 32-bit return value to zero */
-
+ /*
+ * Initialize entire 32-bit return value to zero, convert access_width
+ * into number of bits based
+ */
*value = 0;
+ access_width = acpi_hw_get_access_bit_width(reg, 32);
+ bit_width = reg->bit_offset + reg->bit_width;
+ bit_offset = reg->bit_offset;
/*
* Two address spaces supported: Memory or IO. PCI_Config is
* not supported here because the GAS structure is insufficient
*/
- if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
- status = acpi_os_read_memory((acpi_physical_address)
- address, &value64, reg->bit_width);
+ index = 0;
+ while (bit_width) {
+ if (bit_offset >= access_width) {
+ value32 = 0;
+ bit_offset -= access_width;
+ } else {
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ status =
+ acpi_os_read_memory((acpi_physical_address)
+ address +
+ index *
+ ACPI_DIV_8
+ (access_width),
+ &value64, access_width);
+ value32 = (u32)value64;
+ } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
- *value = (u32)value64;
- } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+ status = acpi_hw_read_port((acpi_io_address)
+ address +
+ index *
+ ACPI_DIV_8
+ (access_width),
+ &value32,
+ access_width);
+ }
- status = acpi_hw_read_port((acpi_io_address)
- address, value, reg->bit_width);
+ /*
+ * Use offset style bit masks because:
+ * bit_offset < access_width/bit_width < access_width, and
+ * access_width is ensured to be less than 32-bits by
+ * acpi_hw_validate_register().
+ */
+ if (bit_offset) {
+ value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
+ bit_offset = 0;
+ }
+ if (bit_width < access_width) {
+ value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
+ }
+ }
+
+ /*
+ * Use offset style bit writes because "Index * AccessWidth" is
+ * ensured to be less than 32-bits by acpi_hw_validate_register().
+ */
+ ACPI_SET_BITS(value, index * access_width,
+ ACPI_MASK_BITS_ABOVE_32(access_width), value32);
+
+ bit_width -=
+ bit_width > access_width ? access_width : bit_width;
+ index++;
}
ACPI_DEBUG_PRINT((ACPI_DB_IO,
"Read: %8.8X width %2d from %8.8X%8.8X (%s)\n",
- *value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+ *value, access_width, ACPI_FORMAT_UINT64(address),
acpi_ut_get_region_name(reg->space_id)));
return (status);
@@ -212,6 +311,12 @@
acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
{
u64 address;
+ u8 access_width;
+ u32 bit_width;
+ u8 bit_offset;
+ u64 value64;
+ u32 new_value32, old_value32;
+ u8 index;
acpi_status status;
ACPI_FUNCTION_NAME(hw_write);
@@ -223,23 +328,145 @@
return (status);
}
+ /* Convert access_width into number of bits based */
+
+ access_width = acpi_hw_get_access_bit_width(reg, 32);
+ bit_width = reg->bit_offset + reg->bit_width;
+ bit_offset = reg->bit_offset;
+
/*
* Two address spaces supported: Memory or IO. PCI_Config is
* not supported here because the GAS structure is insufficient
*/
- if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
- status = acpi_os_write_memory((acpi_physical_address)
- address, (u64)value,
- reg->bit_width);
- } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+ index = 0;
+ while (bit_width) {
+ /*
+ * Use offset style bit reads because "Index * AccessWidth" is
+ * ensured to be less than 32-bits by acpi_hw_validate_register().
+ */
+ new_value32 = ACPI_GET_BITS(&value, index * access_width,
+ ACPI_MASK_BITS_ABOVE_32
+ (access_width));
- status = acpi_hw_write_port((acpi_io_address)
- address, value, reg->bit_width);
+ if (bit_offset >= access_width) {
+ bit_offset -= access_width;
+ } else {
+ /*
+ * Use offset style bit masks because access_width is ensured
+ * to be less than 32-bits by acpi_hw_validate_register() and
+ * bit_offset/bit_width is less than access_width here.
+ */
+ if (bit_offset) {
+ new_value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
+ }
+ if (bit_width < access_width) {
+ new_value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
+ }
+
+ if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+ if (bit_offset || bit_width < access_width) {
+ /*
+ * Read old values in order not to modify the bits that
+ * are beyond the register bit_width/bit_offset setting.
+ */
+ status =
+ acpi_os_read_memory((acpi_physical_address)
+ address +
+ index *
+ ACPI_DIV_8
+ (access_width),
+ &value64,
+ access_width);
+ old_value32 = (u32)value64;
+
+ /*
+ * Use offset style bit masks because access_width is
+ * ensured to be less than 32-bits by
+ * acpi_hw_validate_register() and bit_offset/bit_width is
+ * less than access_width here.
+ */
+ if (bit_offset) {
+ old_value32 &=
+ ACPI_MASK_BITS_ABOVE
+ (bit_offset);
+ bit_offset = 0;
+ }
+ if (bit_width < access_width) {
+ old_value32 &=
+ ACPI_MASK_BITS_BELOW
+ (bit_width);
+ }
+
+ new_value32 |= old_value32;
+ }
+
+ value64 = (u64)new_value32;
+ status =
+ acpi_os_write_memory((acpi_physical_address)
+ address +
+ index *
+ ACPI_DIV_8
+ (access_width),
+ value64, access_width);
+ } else { /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+ if (bit_offset || bit_width < access_width) {
+ /*
+ * Read old values in order not to modify the bits that
+ * are beyond the register bit_width/bit_offset setting.
+ */
+ status =
+ acpi_hw_read_port((acpi_io_address)
+ address +
+ index *
+ ACPI_DIV_8
+ (access_width),
+ &old_value32,
+ access_width);
+
+ /*
+ * Use offset style bit masks because access_width is
+ * ensured to be less than 32-bits by
+ * acpi_hw_validate_register() and bit_offset/bit_width is
+ * less than access_width here.
+ */
+ if (bit_offset) {
+ old_value32 &=
+ ACPI_MASK_BITS_ABOVE
+ (bit_offset);
+ bit_offset = 0;
+ }
+ if (bit_width < access_width) {
+ old_value32 &=
+ ACPI_MASK_BITS_BELOW
+ (bit_width);
+ }
+
+ new_value32 |= old_value32;
+ }
+
+ status = acpi_hw_write_port((acpi_io_address)
+ address +
+ index *
+ ACPI_DIV_8
+ (access_width),
+ new_value32,
+ access_width);
+ }
+ }
+
+ /*
+ * Index * access_width is ensured to be less than 32-bits by
+ * acpi_hw_validate_register().
+ */
+ bit_width -=
+ bit_width > access_width ? access_width : bit_width;
+ index++;
}
ACPI_DEBUG_PRINT((ACPI_DB_IO,
"Wrote: %8.8X width %2d to %8.8X%8.8X (%s)\n",
- value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+ value, access_width, ACPI_FORMAT_UINT64(address),
acpi_ut_get_region_name(reg->space_id)));
return (status);
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index a01ddb3..98c26ff 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -91,10 +91,9 @@
* compatibility with other ACPI implementations that have allowed
* BIOS code with bad register width values to go unnoticed.
*/
- status =
- acpi_os_write_port((acpi_io_address) reset_reg->address,
- acpi_gbl_FADT.reset_value,
- ACPI_RESET_REGISTER_WIDTH);
+ status = acpi_os_write_port((acpi_io_address)reset_reg->address,
+ acpi_gbl_FADT.reset_value,
+ ACPI_RESET_REGISTER_WIDTH);
} else {
/* Write the reset value to the reset register */
@@ -504,9 +503,7 @@
* Evaluate the \_Sx namespace object containing the register values
* for this state
*/
- info->relative_pathname = ACPI_CAST_PTR(char,
- acpi_gbl_sleep_state_names
- [sleep_state]);
+ info->relative_pathname = acpi_gbl_sleep_state_names[sleep_state];
status = acpi_ns_evaluate(info);
if (ACPI_FAILURE(status)) {
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 697af81..426a630 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -107,9 +107,10 @@
continue;
}
- status = acpi_ns_lookup(NULL, init_val->name, init_val->type,
- ACPI_IMODE_LOAD_PASS2,
- ACPI_NS_NO_UPSEARCH, NULL, &new_node);
+ status =
+ acpi_ns_lookup(NULL, (char *)init_val->name, init_val->type,
+ ACPI_IMODE_LOAD_PASS2, ACPI_NS_NO_UPSEARCH,
+ NULL, &new_node);
if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status,
"Could not create predefined name %s",
diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index 878e8fb..c803bda 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c
@@ -79,7 +79,8 @@
/* String-to-Integer conversion */
status = acpi_ut_strtoul64(original_object->string.pointer,
- ACPI_ANY_BASE, &value);
+ ACPI_ANY_BASE,
+ acpi_gbl_integer_byte_width, &value);
if (ACPI_FAILURE(status)) {
return (status);
}
@@ -317,7 +318,7 @@
******************************************************************************/
acpi_status
-acpi_ns_convert_to_unicode(struct acpi_namespace_node * scope,
+acpi_ns_convert_to_unicode(struct acpi_namespace_node *scope,
union acpi_operand_object *original_object,
union acpi_operand_object **return_object)
{
@@ -384,7 +385,7 @@
******************************************************************************/
acpi_status
-acpi_ns_convert_to_resource(struct acpi_namespace_node * scope,
+acpi_ns_convert_to_resource(struct acpi_namespace_node *scope,
union acpi_operand_object *original_object,
union acpi_operand_object **return_object)
{
@@ -463,7 +464,7 @@
******************************************************************************/
acpi_status
-acpi_ns_convert_to_reference(struct acpi_namespace_node * scope,
+acpi_ns_convert_to_reference(struct acpi_namespace_node *scope,
union acpi_operand_object *original_object,
union acpi_operand_object **return_object)
{
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index af236e3..ce1f860 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -81,7 +81,7 @@
*
******************************************************************************/
-void acpi_ns_print_pathname(u32 num_segments, char *pathname)
+void acpi_ns_print_pathname(u32 num_segments, const char *pathname)
{
u32 i;
@@ -114,6 +114,9 @@
acpi_os_printf("]\n");
}
+#ifdef ACPI_OBSOLETE_FUNCTIONS
+/* Not used at this time, perhaps later */
+
/*******************************************************************************
*
* FUNCTION: acpi_ns_dump_pathname
@@ -131,7 +134,8 @@
******************************************************************************/
void
-acpi_ns_dump_pathname(acpi_handle handle, char *msg, u32 level, u32 component)
+acpi_ns_dump_pathname(acpi_handle handle,
+ const char *msg, u32 level, u32 component)
{
ACPI_FUNCTION_TRACE(ns_dump_pathname);
@@ -148,6 +152,7 @@
acpi_os_printf("\n");
return_VOID;
}
+#endif
/*******************************************************************************
*
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index d4aa8b6..36643a8 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -140,6 +140,7 @@
{
acpi_status status = AE_OK;
struct acpi_device_walk_info info;
+ acpi_handle handle;
ACPI_FUNCTION_TRACE(ns_initialize_devices);
@@ -190,6 +191,27 @@
if (ACPI_SUCCESS(status)) {
info.num_INI++;
}
+
+ /*
+ * Execute \_SB._INI.
+ * There appears to be a strict order requirement for \_SB._INI,
+ * which should be evaluated before any _REG evaluations.
+ */
+ status = acpi_get_handle(NULL, "\\_SB", &handle);
+ if (ACPI_SUCCESS(status)) {
+ memset(info.evaluate_info, 0,
+ sizeof(struct acpi_evaluate_info));
+ info.evaluate_info->prefix_node = handle;
+ info.evaluate_info->relative_pathname =
+ METHOD_NAME__INI;
+ info.evaluate_info->parameters = NULL;
+ info.evaluate_info->flags = ACPI_IGNORE_RETURN_VALUE;
+
+ status = acpi_ns_evaluate(info.evaluate_info);
+ if (ACPI_SUCCESS(status)) {
+ info.num_INI++;
+ }
+ }
}
/*
@@ -198,6 +220,12 @@
* Note: Any objects accessed by the _REG methods will be automatically
* initialized, even if they contain executable AML (see the call to
* acpi_ns_initialize_objects below).
+ *
+ * Note: According to the ACPI specification, we actually needn't execute
+ * _REG for system_memory/system_io operation regions, but for PCI_Config
+ * operation regions, it is required to evaluate _REG for those on a PCI
+ * root bus that doesn't contain _BBN object. So this code is kept here
+ * in order not to break things.
*/
if (!(flags & ACPI_NO_ADDRESS_SPACE_INIT)) {
ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
@@ -592,33 +620,37 @@
* Note: We know there is an _INI within this subtree, but it may not be
* under this particular device, it may be lower in the branch.
*/
- ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname
- (ACPI_TYPE_METHOD, device_node, METHOD_NAME__INI));
+ if (!ACPI_COMPARE_NAME(device_node->name.ascii, "_SB_") ||
+ device_node->parent != acpi_gbl_root_node) {
+ ACPI_DEBUG_EXEC(acpi_ut_display_init_pathname
+ (ACPI_TYPE_METHOD, device_node,
+ METHOD_NAME__INI));
- memset(info, 0, sizeof(struct acpi_evaluate_info));
- info->prefix_node = device_node;
- info->relative_pathname = METHOD_NAME__INI;
- info->parameters = NULL;
- info->flags = ACPI_IGNORE_RETURN_VALUE;
+ memset(info, 0, sizeof(struct acpi_evaluate_info));
+ info->prefix_node = device_node;
+ info->relative_pathname = METHOD_NAME__INI;
+ info->parameters = NULL;
+ info->flags = ACPI_IGNORE_RETURN_VALUE;
- status = acpi_ns_evaluate(info);
-
- if (ACPI_SUCCESS(status)) {
- walk_info->num_INI++;
- }
+ status = acpi_ns_evaluate(info);
+ if (ACPI_SUCCESS(status)) {
+ walk_info->num_INI++;
+ }
#ifdef ACPI_DEBUG_OUTPUT
- else if (status != AE_NOT_FOUND) {
+ else if (status != AE_NOT_FOUND) {
- /* Ignore error and move on to next device */
+ /* Ignore error and move on to next device */
- char *scope_name =
- acpi_ns_get_normalized_pathname(device_node, TRUE);
+ char *scope_name =
+ acpi_ns_get_normalized_pathname(device_node, TRUE);
- ACPI_EXCEPTION((AE_INFO, status, "during %s._INI execution",
- scope_name));
- ACPI_FREE(scope_name);
- }
+ ACPI_EXCEPTION((AE_INFO, status,
+ "during %s._INI execution",
+ scope_name));
+ ACPI_FREE(scope_name);
+ }
#endif
+ }
/* Ignore errors from above */
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index 75cdb87..b5e2b0a 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -123,8 +123,8 @@
(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
acpi_ns_delete_namespace_by_owner(acpi_gbl_root_table_list.
tables[table_index].owner_id);
- acpi_tb_release_owner_id(table_index);
+ acpi_tb_release_owner_id(table_index);
return_ACPI_STATUS(status);
}
diff --git a/drivers/acpi/acpica/nsnames.c b/drivers/acpi/acpica/nsnames.c
index eb6e1b8..f03dd41 100644
--- a/drivers/acpi/acpica/nsnames.c
+++ b/drivers/acpi/acpica/nsnames.c
@@ -113,7 +113,7 @@
acpi_status
acpi_ns_handle_to_pathname(acpi_handle target_handle,
- struct acpi_buffer * buffer, u8 no_trailing)
+ struct acpi_buffer *buffer, u8 no_trailing)
{
acpi_status status;
struct acpi_namespace_node *node;
diff --git a/drivers/acpi/acpica/nsobject.c b/drivers/acpi/acpica/nsobject.c
index 051306f..cfa2bb7 100644
--- a/drivers/acpi/acpica/nsobject.c
+++ b/drivers/acpi/acpica/nsobject.c
@@ -399,7 +399,7 @@
******************************************************************************/
acpi_status
-acpi_ns_detach_data(struct acpi_namespace_node * node,
+acpi_ns_detach_data(struct acpi_namespace_node *node,
acpi_object_handler handler)
{
union acpi_operand_object *obj_desc;
@@ -444,7 +444,7 @@
******************************************************************************/
acpi_status
-acpi_ns_get_attached_data(struct acpi_namespace_node * node,
+acpi_ns_get_attached_data(struct acpi_namespace_node *node,
acpi_object_handler handler, void **data)
{
union acpi_operand_object *obj_desc;
diff --git a/drivers/acpi/acpica/nsprepkg.c b/drivers/acpi/acpica/nsprepkg.c
index 9047f28..fbedc6e 100644
--- a/drivers/acpi/acpica/nsprepkg.c
+++ b/drivers/acpi/acpica/nsprepkg.c
@@ -62,6 +62,10 @@
u32 count1,
u8 type2, u32 count2, u32 start_index);
+static acpi_status
+acpi_ns_custom_package(struct acpi_evaluate_info *info,
+ union acpi_operand_object **elements, u32 count);
+
/*******************************************************************************
*
* FUNCTION: acpi_ns_check_package
@@ -135,6 +139,11 @@
* PTYPE2 packages contain subpackages
*/
switch (package->ret_info.type) {
+ case ACPI_PTYPE_CUSTOM:
+
+ status = acpi_ns_custom_package(info, elements, count);
+ break;
+
case ACPI_PTYPE1_FIXED:
/*
* The package count is fixed and there are no subpackages
@@ -179,6 +188,7 @@
if (ACPI_FAILURE(status)) {
return (status);
}
+
elements++;
}
break;
@@ -225,6 +235,7 @@
return (status);
}
}
+
elements++;
}
break;
@@ -569,11 +580,13 @@
if (sub_package->package.count < expected_count) {
goto package_too_small;
}
+
if (sub_package->package.count <
package->ret_info.count1) {
expected_count = package->ret_info.count1;
goto package_too_small;
}
+
if (expected_count == 0) {
/*
* Either the num_entries element was originally zero or it was
@@ -622,6 +635,83 @@
/*******************************************************************************
*
+ * FUNCTION: acpi_ns_custom_package
+ *
+ * PARAMETERS: info - Method execution information block
+ * elements - Pointer to the package elements array
+ * count - Element count for the package
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Check a returned package object for the correct count and
+ * correct type of all sub-objects.
+ *
+ * NOTE: Currently used for the _BIX method only. When needed for two or more
+ * methods, probably a detect/dispatch mechanism will be required.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_ns_custom_package(struct acpi_evaluate_info *info,
+ union acpi_operand_object **elements, u32 count)
+{
+ u32 expected_count;
+ u32 version;
+ acpi_status status = AE_OK;
+
+ ACPI_FUNCTION_NAME(ns_custom_package);
+
+ /* Get version number, must be Integer */
+
+ if ((*elements)->common.type != ACPI_TYPE_INTEGER) {
+ ACPI_WARN_PREDEFINED((AE_INFO, info->full_pathname,
+ info->node_flags,
+ "Return Package has invalid object type for version number"));
+ return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
+ }
+
+ version = (u32)(*elements)->integer.value;
+ expected_count = 21; /* Version 1 */
+
+ if (version == 0) {
+ expected_count = 20; /* Version 0 */
+ }
+
+ if (count < expected_count) {
+ ACPI_WARN_PREDEFINED((AE_INFO, info->full_pathname,
+ info->node_flags,
+ "Return Package is too small - found %u elements, expected %u",
+ count, expected_count));
+ return_ACPI_STATUS(AE_AML_OPERAND_VALUE);
+ } else if (count > expected_count) {
+ ACPI_DEBUG_PRINT((ACPI_DB_REPAIR,
+ "%s: Return Package is larger than needed - "
+ "found %u, expected %u\n",
+ info->full_pathname, count, expected_count));
+ }
+
+ /* Validate all elements of the returned package */
+
+ status = acpi_ns_check_package_elements(info, elements,
+ ACPI_RTYPE_INTEGER, 16,
+ ACPI_RTYPE_STRING, 4, 0);
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
+ }
+
+ /* Version 1 has a single trailing integer */
+
+ if (version > 0) {
+ status = acpi_ns_check_package_elements(info, elements + 20,
+ ACPI_RTYPE_INTEGER, 1,
+ 0, 0, 20);
+ }
+
+ return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ns_check_package_elements
*
* PARAMETERS: info - Method execution information block
@@ -661,6 +751,7 @@
if (ACPI_FAILURE(status)) {
return (status);
}
+
this_element++;
}
@@ -671,6 +762,7 @@
if (ACPI_FAILURE(status)) {
return (status);
}
+
this_element++;
}
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index 805e36d..9523d41 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -399,7 +399,7 @@
******************************************************************************/
acpi_status
-acpi_ns_repair_null_element(struct acpi_evaluate_info * info,
+acpi_ns_repair_null_element(struct acpi_evaluate_info *info,
u32 expected_btypes,
u32 package_index,
union acpi_operand_object **return_object_ptr)
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index 63edbbb..d533612 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -54,9 +54,9 @@
* be repaired on a per-name basis.
*/
typedef
-acpi_status(*acpi_repair_function) (struct acpi_evaluate_info * info,
- union acpi_operand_object
- **return_object_ptr);
+acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info,
+ union acpi_operand_object **
+ return_object_ptr);
typedef struct acpi_repair_info {
char name[ACPI_NAME_SIZE];
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index c72cc62..784a30b 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -272,11 +272,11 @@
result = &internal_name[i];
} else if (num_segments == 2) {
internal_name[i] = AML_DUAL_NAME_PREFIX;
- result = &internal_name[(acpi_size) i + 1];
+ result = &internal_name[(acpi_size)i + 1];
} else {
internal_name[i] = AML_MULTI_NAME_PREFIX_OP;
- internal_name[(acpi_size) i + 1] = (char)num_segments;
- result = &internal_name[(acpi_size) i + 2];
+ internal_name[(acpi_size)i + 1] = (char)num_segments;
+ result = &internal_name[(acpi_size)i + 2];
}
}
@@ -456,7 +456,7 @@
names_index = prefix_length + 2;
num_segments = (u8)
- internal_name[(acpi_size) prefix_length + 1];
+ internal_name[(acpi_size)prefix_length + 1];
break;
case AML_DUAL_NAME_PREFIX:
diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c
index a7deeaa..d2a9b4f 100644
--- a/drivers/acpi/acpica/nsxfeval.c
+++ b/drivers/acpi/acpica/nsxfeval.c
@@ -256,7 +256,7 @@
* Allocate a new parameter block for the internal objects
* Add 1 to count to allow for null terminated internal list
*/
- info->parameters = ACPI_ALLOCATE_ZEROED(((acpi_size) info->
+ info->parameters = ACPI_ALLOCATE_ZEROED(((acpi_size)info->
param_count +
1) * sizeof(void *));
if (!info->parameters) {
@@ -280,13 +280,12 @@
info->parameters[info->param_count] = NULL;
}
-#if 0
+#ifdef _FUTURE_FEATURE
/*
* Begin incoming argument count analysis. Check for too few args
* and too many args.
*/
-
switch (acpi_ns_get_type(info->node)) {
case ACPI_TYPE_METHOD:
@@ -370,68 +369,68 @@
* If we are expecting a return value, and all went well above,
* copy the return value to an external object.
*/
- if (return_buffer) {
- if (!info->return_object) {
- return_buffer->length = 0;
+ if (!return_buffer) {
+ goto cleanup_return_object;
+ }
+
+ if (!info->return_object) {
+ return_buffer->length = 0;
+ goto cleanup;
+ }
+
+ if (ACPI_GET_DESCRIPTOR_TYPE(info->return_object) ==
+ ACPI_DESC_TYPE_NAMED) {
+ /*
+ * If we received a NS Node as a return object, this means that
+ * the object we are evaluating has nothing interesting to
+ * return (such as a mutex, etc.) We return an error because
+ * these types are essentially unsupported by this interface.
+ * We don't check up front because this makes it easier to add
+ * support for various types at a later date if necessary.
+ */
+ status = AE_TYPE;
+ info->return_object = NULL; /* No need to delete a NS Node */
+ return_buffer->length = 0;
+ }
+
+ if (ACPI_FAILURE(status)) {
+ goto cleanup_return_object;
+ }
+
+ /* Dereference Index and ref_of references */
+
+ acpi_ns_resolve_references(info);
+
+ /* Get the size of the returned object */
+
+ status = acpi_ut_get_object_size(info->return_object,
+ &buffer_space_needed);
+ if (ACPI_SUCCESS(status)) {
+
+ /* Validate/Allocate/Clear caller buffer */
+
+ status = acpi_ut_initialize_buffer(return_buffer,
+ buffer_space_needed);
+ if (ACPI_FAILURE(status)) {
+ /*
+ * Caller's buffer is too small or a new one can't
+ * be allocated
+ */
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ "Needed buffer size %X, %s\n",
+ (u32)buffer_space_needed,
+ acpi_format_exception(status)));
} else {
- if (ACPI_GET_DESCRIPTOR_TYPE(info->return_object) ==
- ACPI_DESC_TYPE_NAMED) {
- /*
- * If we received a NS Node as a return object, this means that
- * the object we are evaluating has nothing interesting to
- * return (such as a mutex, etc.) We return an error because
- * these types are essentially unsupported by this interface.
- * We don't check up front because this makes it easier to add
- * support for various types at a later date if necessary.
- */
- status = AE_TYPE;
- info->return_object = NULL; /* No need to delete a NS Node */
- return_buffer->length = 0;
- }
+ /* We have enough space for the object, build it */
- if (ACPI_SUCCESS(status)) {
-
- /* Dereference Index and ref_of references */
-
- acpi_ns_resolve_references(info);
-
- /* Get the size of the returned object */
-
- status =
- acpi_ut_get_object_size(info->return_object,
- &buffer_space_needed);
- if (ACPI_SUCCESS(status)) {
-
- /* Validate/Allocate/Clear caller buffer */
-
- status =
- acpi_ut_initialize_buffer
- (return_buffer,
- buffer_space_needed);
- if (ACPI_FAILURE(status)) {
- /*
- * Caller's buffer is too small or a new one can't
- * be allocated
- */
- ACPI_DEBUG_PRINT((ACPI_DB_INFO,
- "Needed buffer size %X, %s\n",
- (u32)
- buffer_space_needed,
- acpi_format_exception
- (status)));
- } else {
- /* We have enough space for the object, build it */
-
- status =
- acpi_ut_copy_iobject_to_eobject
- (info->return_object,
- return_buffer);
- }
- }
- }
+ status =
+ acpi_ut_copy_iobject_to_eobject(info->return_object,
+ return_buffer);
}
}
+cleanup_return_object:
+
if (info->return_object) {
/*
* Delete the internal return object. NOTE: Interpreter must be
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index 285b820..76a1bd4 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -78,7 +78,7 @@
acpi_status
acpi_get_handle(acpi_handle parent,
- acpi_string pathname, acpi_handle * ret_handle)
+ acpi_string pathname, acpi_handle *ret_handle)
{
acpi_status status;
struct acpi_namespace_node *node = NULL;
@@ -155,7 +155,7 @@
*
******************************************************************************/
acpi_status
-acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer * buffer)
+acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer *buffer)
{
acpi_status status;
struct acpi_namespace_node *node;
@@ -448,7 +448,7 @@
/* Point past the CID PNP_DEVICE_ID array */
next_id_string +=
- ((acpi_size) cid_list->count *
+ ((acpi_size)cid_list->count *
sizeof(struct acpi_pnp_device_id));
}
diff --git a/drivers/acpi/acpica/nsxfobj.c b/drivers/acpi/acpica/nsxfobj.c
index c312cd4..32d372b 100644
--- a/drivers/acpi/acpica/nsxfobj.c
+++ b/drivers/acpi/acpica/nsxfobj.c
@@ -63,7 +63,7 @@
* DESCRIPTION: This routine returns the type associatd with a particular handle
*
******************************************************************************/
-acpi_status acpi_get_type(acpi_handle handle, acpi_object_type * ret_type)
+acpi_status acpi_get_type(acpi_handle handle, acpi_object_type *ret_type)
{
struct acpi_namespace_node *node;
acpi_status status;
@@ -115,7 +115,7 @@
* Handle.
*
******************************************************************************/
-acpi_status acpi_get_parent(acpi_handle handle, acpi_handle * ret_handle)
+acpi_status acpi_get_parent(acpi_handle handle, acpi_handle *ret_handle)
{
struct acpi_namespace_node *node;
struct acpi_namespace_node *parent_node;
@@ -183,7 +183,7 @@
acpi_status
acpi_get_next_object(acpi_object_type type,
acpi_handle parent,
- acpi_handle child, acpi_handle * ret_handle)
+ acpi_handle child, acpi_handle *ret_handle)
{
acpi_status status;
struct acpi_namespace_node *node;
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index d48cbed..c29c930 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -87,7 +87,7 @@
* used to encode the package length, either 0,1,2, or 3
*/
byte_count = (aml[0] >> 6);
- parser_state->aml += ((acpi_size) byte_count + 1);
+ parser_state->aml += ((acpi_size)byte_count + 1);
/* Get bytes 3, 2, 1 as needed */
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index cfd17a4..177b05b 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -158,7 +158,7 @@
*
******************************************************************************/
-char *acpi_ps_get_opcode_name(u16 opcode)
+const char *acpi_ps_get_opcode_name(u16 opcode)
{
#if defined(ACPI_DISASSEMBLER) || defined (ACPI_DEBUG_OUTPUT)
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 8038ed2ac..0a23897 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -130,8 +130,8 @@
******************************************************************************/
acpi_status
-acpi_ps_complete_this_op(struct acpi_walk_state * walk_state,
- union acpi_parse_object * op)
+acpi_ps_complete_this_op(struct acpi_walk_state *walk_state,
+ union acpi_parse_object *op)
{
union acpi_parse_object *prev;
union acpi_parse_object *next;
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index b28b0da..89cb4bf 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -128,7 +128,7 @@
if (op_info->flags & AML_DEFER) {
flags = ACPI_PARSEOP_DEFERRED;
} else if (op_info->flags & AML_NAMED) {
- flags = ACPI_PARSEOP_NAMED;
+ flags = ACPI_PARSEOP_NAMED_OBJECT;
} else if (opcode == AML_INT_BYTELIST_OP) {
flags = ACPI_PARSEOP_BYTELIST;
}
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index 04b37fc..cf30cd82 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -115,7 +115,7 @@
*
******************************************************************************/
-acpi_status acpi_ps_execute_method(struct acpi_evaluate_info * info)
+acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info)
{
acpi_status status;
union acpi_parse_object *op;
diff --git a/drivers/acpi/acpica/rscalc.c b/drivers/acpi/acpica/rscalc.c
index 2b1209d..f1e83ad 100644
--- a/drivers/acpi/acpica/rscalc.c
+++ b/drivers/acpi/acpica/rscalc.c
@@ -112,7 +112,7 @@
* resource_source_index (1).
*/
if (resource_source->string_ptr) {
- return ((acpi_rs_length) (resource_source->string_length + 1));
+ return ((acpi_rs_length)(resource_source->string_length + 1));
}
return (0);
@@ -188,7 +188,7 @@
acpi_status
acpi_rs_get_aml_length(struct acpi_resource *resource,
- acpi_size resource_list_size, acpi_size * size_needed)
+ acpi_size resource_list_size, acpi_size *size_needed)
{
acpi_size aml_size_needed = 0;
struct acpi_resource *resource_end;
@@ -278,11 +278,11 @@
* 16-Bit Address Resource:
* Add the size of the optional resource_source info
*/
- total_size = (acpi_rs_length) (total_size +
- acpi_rs_struct_option_length
- (&resource->data.
- address16.
- resource_source));
+ total_size = (acpi_rs_length)(total_size +
+ acpi_rs_struct_option_length
+ (&resource->data.
+ address16.
+ resource_source));
break;
case ACPI_RESOURCE_TYPE_ADDRESS32:
@@ -290,11 +290,11 @@
* 32-Bit Address Resource:
* Add the size of the optional resource_source info
*/
- total_size = (acpi_rs_length) (total_size +
- acpi_rs_struct_option_length
- (&resource->data.
- address32.
- resource_source));
+ total_size = (acpi_rs_length)(total_size +
+ acpi_rs_struct_option_length
+ (&resource->data.
+ address32.
+ resource_source));
break;
case ACPI_RESOURCE_TYPE_ADDRESS64:
@@ -302,11 +302,11 @@
* 64-Bit Address Resource:
* Add the size of the optional resource_source info
*/
- total_size = (acpi_rs_length) (total_size +
- acpi_rs_struct_option_length
- (&resource->data.
- address64.
- resource_source));
+ total_size = (acpi_rs_length)(total_size +
+ acpi_rs_struct_option_length
+ (&resource->data.
+ address64.
+ resource_source));
break;
case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
@@ -315,28 +315,28 @@
* Add the size of each additional optional interrupt beyond the
* required 1 (4 bytes for each u32 interrupt number)
*/
- total_size = (acpi_rs_length) (total_size +
- ((resource->data.
- extended_irq.
- interrupt_count -
- 1) * 4) +
- /* Add the size of the optional resource_source info */
- acpi_rs_struct_option_length
- (&resource->data.
+ total_size = (acpi_rs_length)(total_size +
+ ((resource->data.
extended_irq.
- resource_source));
+ interrupt_count -
+ 1) * 4) +
+ /* Add the size of the optional resource_source info */
+ acpi_rs_struct_option_length
+ (&resource->data.
+ extended_irq.
+ resource_source));
break;
case ACPI_RESOURCE_TYPE_GPIO:
- total_size = (acpi_rs_length) (total_size +
- (resource->data.gpio.
- pin_table_length * 2) +
- resource->data.gpio.
- resource_source.
- string_length +
- resource->data.gpio.
- vendor_length);
+ total_size = (acpi_rs_length)(total_size +
+ (resource->data.gpio.
+ pin_table_length * 2) +
+ resource->data.gpio.
+ resource_source.
+ string_length +
+ resource->data.gpio.
+ vendor_length);
break;
@@ -348,14 +348,14 @@
common_serial_bus.
type];
- total_size = (acpi_rs_length) (total_size +
- resource->data.
- i2c_serial_bus.
- resource_source.
- string_length +
- resource->data.
- i2c_serial_bus.
- vendor_length);
+ total_size = (acpi_rs_length)(total_size +
+ resource->data.
+ i2c_serial_bus.
+ resource_source.
+ string_length +
+ resource->data.
+ i2c_serial_bus.
+ vendor_length);
break;
@@ -397,8 +397,8 @@
******************************************************************************/
acpi_status
-acpi_rs_get_list_length(u8 * aml_buffer,
- u32 aml_buffer_length, acpi_size * size_needed)
+acpi_rs_get_list_length(u8 *aml_buffer,
+ u32 aml_buffer_length, acpi_size *size_needed)
{
acpi_status status;
u8 *end_aml;
@@ -610,7 +610,7 @@
acpi_status
acpi_rs_get_pci_routing_table_length(union acpi_operand_object *package_object,
- acpi_size * buffer_size_needed)
+ acpi_size *buffer_size_needed)
{
u32 number_of_elements;
acpi_size temp_size_needed = 0;
diff --git a/drivers/acpi/acpica/rscreate.c b/drivers/acpi/acpica/rscreate.c
index 1297889..809b61c 100644
--- a/drivers/acpi/acpica/rscreate.c
+++ b/drivers/acpi/acpica/rscreate.c
@@ -347,7 +347,7 @@
(u8 *) output_buffer->pointer);
path_buffer.pointer = user_prt->source;
- status = acpi_ns_handle_to_pathname((acpi_handle) node, &path_buffer, FALSE);
+ status = acpi_ns_handle_to_pathname((acpi_handle)node, &path_buffer, FALSE);
/* +1 to include null terminator */
diff --git a/drivers/acpi/acpica/rsdump.c b/drivers/acpi/acpica/rsdump.c
index 23a17c8..5ffdb56 100644
--- a/drivers/acpi/acpica/rsdump.c
+++ b/drivers/acpi/acpica/rsdump.c
@@ -52,17 +52,17 @@
* All functions in this module are used by the AML Debugger only
*/
/* Local prototypes */
-static void acpi_rs_out_string(char *title, char *value);
+static void acpi_rs_out_string(const char *title, const char *value);
-static void acpi_rs_out_integer8(char *title, u8 value);
+static void acpi_rs_out_integer8(const char *title, u8 value);
-static void acpi_rs_out_integer16(char *title, u16 value);
+static void acpi_rs_out_integer16(const char *title, u16 value);
-static void acpi_rs_out_integer32(char *title, u32 value);
+static void acpi_rs_out_integer32(const char *title, u32 value);
-static void acpi_rs_out_integer64(char *title, u64 value);
+static void acpi_rs_out_integer64(const char *title, u64 value);
-static void acpi_rs_out_title(char *title);
+static void acpi_rs_out_title(const char *title);
static void acpi_rs_dump_byte_list(u16 length, u8 *data);
@@ -208,7 +208,7 @@
{
u8 *target = NULL;
u8 *previous_target;
- char *name;
+ const char *name;
u8 count;
/* First table entry must contain the table length (# of table entries) */
@@ -248,10 +248,8 @@
case ACPI_RSD_UINT8:
if (table->pointer) {
- acpi_rs_out_string(name, ACPI_CAST_PTR(char,
- table->
- pointer
- [*target]));
+ acpi_rs_out_string(name,
+ table->pointer[*target]);
} else {
acpi_rs_out_integer8(name, ACPI_GET8(target));
}
@@ -276,26 +274,20 @@
case ACPI_RSD_1BITFLAG:
- acpi_rs_out_string(name, ACPI_CAST_PTR(char,
- table->
- pointer[*target &
- 0x01]));
+ acpi_rs_out_string(name,
+ table->pointer[*target & 0x01]);
break;
case ACPI_RSD_2BITFLAG:
- acpi_rs_out_string(name, ACPI_CAST_PTR(char,
- table->
- pointer[*target &
- 0x03]));
+ acpi_rs_out_string(name,
+ table->pointer[*target & 0x03]);
break;
case ACPI_RSD_3BITFLAG:
- acpi_rs_out_string(name, ACPI_CAST_PTR(char,
- table->
- pointer[*target &
- 0x07]));
+ acpi_rs_out_string(name,
+ table->pointer[*target & 0x07]);
break;
case ACPI_RSD_SHORTLIST:
@@ -481,7 +473,7 @@
*
******************************************************************************/
-static void acpi_rs_out_string(char *title, char *value)
+static void acpi_rs_out_string(const char *title, const char *value)
{
acpi_os_printf("%27s : %s", title, value);
@@ -491,30 +483,30 @@
acpi_os_printf("\n");
}
-static void acpi_rs_out_integer8(char *title, u8 value)
+static void acpi_rs_out_integer8(const char *title, u8 value)
{
acpi_os_printf("%27s : %2.2X\n", title, value);
}
-static void acpi_rs_out_integer16(char *title, u16 value)
+static void acpi_rs_out_integer16(const char *title, u16 value)
{
acpi_os_printf("%27s : %4.4X\n", title, value);
}
-static void acpi_rs_out_integer32(char *title, u32 value)
+static void acpi_rs_out_integer32(const char *title, u32 value)
{
acpi_os_printf("%27s : %8.8X\n", title, value);
}
-static void acpi_rs_out_integer64(char *title, u64 value)
+static void acpi_rs_out_integer64(const char *title, u64 value)
{
acpi_os_printf("%27s : %8.8X%8.8X\n", title, ACPI_FORMAT_UINT64(value));
}
-static void acpi_rs_out_title(char *title)
+static void acpi_rs_out_title(const char *title)
{
acpi_os_printf("%27s : ", title);
diff --git a/drivers/acpi/acpica/rsdumpinfo.c b/drivers/acpi/acpica/rsdumpinfo.c
index 5c34913..61e8f16 100644
--- a/drivers/acpi/acpica/rsdumpinfo.c
+++ b/drivers/acpi/acpica/rsdumpinfo.c
@@ -330,19 +330,20 @@
{ACPI_RSD_UINT8, ACPI_RSD_OFFSET (common_serial_bus.type), "Type", acpi_gbl_sbt_decode}, \
{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (common_serial_bus.producer_consumer), "ProducerConsumer", acpi_gbl_consume_decode}, \
{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (common_serial_bus.slave_mode), "SlaveMode", acpi_gbl_sm_decode}, \
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (common_serial_bus.connection_sharing),"ConnectionSharing", acpi_gbl_shr_decode}, \
{ACPI_RSD_UINT8, ACPI_RSD_OFFSET (common_serial_bus.type_revision_id), "TypeRevisionId", NULL}, \
{ACPI_RSD_UINT16, ACPI_RSD_OFFSET (common_serial_bus.type_data_length), "TypeDataLength", NULL}, \
{ACPI_RSD_SOURCE, ACPI_RSD_OFFSET (common_serial_bus.resource_source), "ResourceSource", NULL}, \
{ACPI_RSD_UINT16, ACPI_RSD_OFFSET (common_serial_bus.vendor_length), "VendorLength", NULL}, \
{ACPI_RSD_SHORTLISTX,ACPI_RSD_OFFSET (common_serial_bus.vendor_data), "VendorData", NULL},
-struct acpi_rsdump_info acpi_rs_dump_common_serial_bus[10] = {
+struct acpi_rsdump_info acpi_rs_dump_common_serial_bus[11] = {
{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_common_serial_bus),
"Common Serial Bus", NULL},
ACPI_RS_DUMP_COMMON_SERIAL_BUS
};
-struct acpi_rsdump_info acpi_rs_dump_i2c_serial_bus[13] = {
+struct acpi_rsdump_info acpi_rs_dump_i2c_serial_bus[14] = {
{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_i2c_serial_bus),
"I2C Serial Bus", NULL},
ACPI_RS_DUMP_COMMON_SERIAL_BUS {ACPI_RSD_1BITFLAG,
@@ -355,7 +356,7 @@
"SlaveAddress", NULL},
};
-struct acpi_rsdump_info acpi_rs_dump_spi_serial_bus[17] = {
+struct acpi_rsdump_info acpi_rs_dump_spi_serial_bus[18] = {
{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_spi_serial_bus),
"Spi Serial Bus", NULL},
ACPI_RS_DUMP_COMMON_SERIAL_BUS {ACPI_RSD_1BITFLAG,
@@ -376,7 +377,7 @@
"ConnectionSpeed", NULL},
};
-struct acpi_rsdump_info acpi_rs_dump_uart_serial_bus[19] = {
+struct acpi_rsdump_info acpi_rs_dump_uart_serial_bus[20] = {
{ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE(acpi_rs_dump_uart_serial_bus),
"Uart Serial Bus", NULL},
ACPI_RS_DUMP_COMMON_SERIAL_BUS {ACPI_RSD_2BITFLAG,
diff --git a/drivers/acpi/acpica/rsmisc.c b/drivers/acpi/acpica/rsmisc.c
index ce3d0b7..25165ca 100644
--- a/drivers/acpi/acpica/rsmisc.c
+++ b/drivers/acpi/acpica/rsmisc.c
@@ -87,7 +87,7 @@
return_ACPI_STATUS(AE_BAD_PARAMETER);
}
- if (((acpi_size) resource) & 0x3) {
+ if (((acpi_size)resource) & 0x3) {
/* Each internal resource struct is expected to be 32-bit aligned */
diff --git a/drivers/acpi/acpica/rsserial.c b/drivers/acpi/acpica/rsserial.c
index 8a01296..b82c061 100644
--- a/drivers/acpi/acpica/rsserial.c
+++ b/drivers/acpi/acpica/rsserial.c
@@ -151,7 +151,7 @@
*
******************************************************************************/
-struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[16] = {
+struct acpi_rsconvert_info acpi_rs_convert_i2c_serial_bus[17] = {
{ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
ACPI_RS_SIZE(struct acpi_resource_i2c_serialbus),
ACPI_RSC_TABLE_SIZE(acpi_rs_convert_i2c_serial_bus)},
@@ -177,6 +177,11 @@
AML_OFFSET(common_serial_bus.flags),
1},
+ {ACPI_RSC_1BITFLAG,
+ ACPI_RS_OFFSET(data.common_serial_bus.connection_sharing),
+ AML_OFFSET(common_serial_bus.flags),
+ 2},
+
{ACPI_RSC_MOVE8,
ACPI_RS_OFFSET(data.common_serial_bus.type_revision_id),
AML_OFFSET(common_serial_bus.type_revision_id),
@@ -237,7 +242,7 @@
*
******************************************************************************/
-struct acpi_rsconvert_info acpi_rs_convert_spi_serial_bus[20] = {
+struct acpi_rsconvert_info acpi_rs_convert_spi_serial_bus[21] = {
{ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
ACPI_RS_SIZE(struct acpi_resource_spi_serialbus),
ACPI_RSC_TABLE_SIZE(acpi_rs_convert_spi_serial_bus)},
@@ -263,6 +268,11 @@
AML_OFFSET(common_serial_bus.flags),
1},
+ {ACPI_RSC_1BITFLAG,
+ ACPI_RS_OFFSET(data.common_serial_bus.connection_sharing),
+ AML_OFFSET(common_serial_bus.flags),
+ 2},
+
{ACPI_RSC_MOVE8,
ACPI_RS_OFFSET(data.common_serial_bus.type_revision_id),
AML_OFFSET(common_serial_bus.type_revision_id),
@@ -339,7 +349,7 @@
*
******************************************************************************/
-struct acpi_rsconvert_info acpi_rs_convert_uart_serial_bus[22] = {
+struct acpi_rsconvert_info acpi_rs_convert_uart_serial_bus[23] = {
{ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
ACPI_RS_SIZE(struct acpi_resource_uart_serialbus),
ACPI_RSC_TABLE_SIZE(acpi_rs_convert_uart_serial_bus)},
@@ -365,6 +375,11 @@
AML_OFFSET(common_serial_bus.flags),
1},
+ {ACPI_RSC_1BITFLAG,
+ ACPI_RS_OFFSET(data.common_serial_bus.connection_sharing),
+ AML_OFFSET(common_serial_bus.flags),
+ 2},
+
{ACPI_RSC_MOVE8,
ACPI_RS_OFFSET(data.common_serial_bus.type_revision_id),
AML_OFFSET(common_serial_bus.type_revision_id),
diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c
index cf06e49..fa491c6 100644
--- a/drivers/acpi/acpica/rsutils.c
+++ b/drivers/acpi/acpica/rsutils.c
@@ -338,7 +338,7 @@
* Note: Some resource descriptors will have an additional null, so
* we add 1 to the minimum length.
*/
- if (total_length > (acpi_rsdesc_size) (minimum_length + 1)) {
+ if (total_length > (acpi_rsdesc_size)(minimum_length + 1)) {
/* Get the resource_source_index */
@@ -377,7 +377,7 @@
ACPI_CAST_PTR(char,
&aml_resource_source[1]));
- return ((acpi_rs_length) total_length);
+ return ((acpi_rs_length)total_length);
}
/* resource_source is not present */
@@ -406,9 +406,9 @@
******************************************************************************/
acpi_rsdesc_size
-acpi_rs_set_resource_source(union aml_resource * aml,
+acpi_rs_set_resource_source(union aml_resource *aml,
acpi_rs_length minimum_length,
- struct acpi_resource_source * resource_source)
+ struct acpi_resource_source *resource_source)
{
u8 *aml_resource_source;
acpi_rsdesc_size descriptor_length;
@@ -466,8 +466,8 @@
******************************************************************************/
acpi_status
-acpi_rs_get_prt_method_data(struct acpi_namespace_node * node,
- struct acpi_buffer * ret_buffer)
+acpi_rs_get_prt_method_data(struct acpi_namespace_node *node,
+ struct acpi_buffer *ret_buffer)
{
union acpi_operand_object *obj_desc;
acpi_status status;
@@ -671,7 +671,7 @@
acpi_status
acpi_rs_get_method_data(acpi_handle handle,
- char *path, struct acpi_buffer *ret_buffer)
+ const char *path, struct acpi_buffer *ret_buffer)
{
union acpi_operand_object *obj_desc;
acpi_status status;
diff --git a/drivers/acpi/acpica/rsxface.c b/drivers/acpi/acpica/rsxface.c
index 900933b..465ed81 100644
--- a/drivers/acpi/acpica/rsxface.c
+++ b/drivers/acpi/acpica/rsxface.c
@@ -433,8 +433,8 @@
acpi_status
acpi_get_vendor_resource(acpi_handle device_handle,
char *name,
- struct acpi_vendor_uuid * uuid,
- struct acpi_buffer * ret_buffer)
+ struct acpi_vendor_uuid *uuid,
+ struct acpi_buffer *ret_buffer)
{
struct acpi_vendor_walk_info info;
acpi_status status;
@@ -539,7 +539,7 @@
******************************************************************************/
acpi_status
-acpi_walk_resource_buffer(struct acpi_buffer * buffer,
+acpi_walk_resource_buffer(struct acpi_buffer *buffer,
acpi_walk_resource_callback user_function,
void *context)
{
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 7da79ce..1388a19 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -368,7 +368,7 @@
*****************************************************************************/
acpi_status
-acpi_tb_verify_temp_table(struct acpi_table_desc * table_desc, char *signature)
+acpi_tb_verify_temp_table(struct acpi_table_desc *table_desc, char *signature)
{
acpi_status status = AE_OK;
@@ -401,9 +401,9 @@
ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY,
"%4.4s 0x%8.8X%8.8X"
" Attempted table install failed",
- acpi_ut_valid_acpi_name(table_desc->
- signature.
- ascii) ?
+ acpi_ut_valid_nameseg(table_desc->
+ signature.
+ ascii) ?
table_desc->signature.ascii : "????",
ACPI_FORMAT_UINT64(table_desc->
address)));
@@ -454,7 +454,7 @@
table_count = acpi_gbl_root_table_list.current_table_count;
}
- tables = ACPI_ALLOCATE_ZEROED(((acpi_size) table_count +
+ tables = ACPI_ALLOCATE_ZEROED(((acpi_size)table_count +
ACPI_ROOT_TABLE_SIZE_INCREMENT) *
sizeof(struct acpi_table_desc));
if (!tables) {
@@ -467,8 +467,7 @@
if (acpi_gbl_root_table_list.tables) {
memcpy(tables, acpi_gbl_root_table_list.tables,
- (acpi_size) table_count *
- sizeof(struct acpi_table_desc));
+ (acpi_size)table_count * sizeof(struct acpi_table_desc));
if (acpi_gbl_root_table_list.flags & ACPI_ROOT_ORIGIN_ALLOCATED) {
ACPI_FREE(acpi_gbl_root_table_list.tables);
@@ -701,7 +700,7 @@
*
******************************************************************************/
-acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id * owner_id)
+acpi_status acpi_tb_get_owner_id(u32 table_index, acpi_owner_id *owner_id)
{
acpi_status status = AE_BAD_PARAMETER;
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index a79e4f3..6208069 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -53,7 +53,7 @@
acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
u8 space_id,
u8 byte_width,
- u64 address, char *register_name, u8 flags);
+ u64 address, const char *register_name, u8 flags);
static void acpi_tb_convert_fadt(void);
@@ -65,7 +65,7 @@
/* Table for conversion of FADT to common internal format and FADT validation */
typedef struct acpi_fadt_info {
- char *name;
+ const char *name;
u16 address64;
u16 address32;
u16 length;
@@ -192,7 +192,7 @@
acpi_tb_init_generic_address(struct acpi_generic_address *generic_address,
u8 space_id,
u8 byte_width,
- u64 address, char *register_name, u8 flags)
+ u64 address, const char *register_name, u8 flags)
{
u8 bit_width;
@@ -344,7 +344,7 @@
/* Obtain the DSDT and FACS tables via their addresses within the FADT */
- acpi_tb_install_fixed_table((acpi_physical_address) acpi_gbl_FADT.Xdsdt,
+ acpi_tb_install_fixed_table((acpi_physical_address)acpi_gbl_FADT.Xdsdt,
ACPI_SIG_DSDT, &acpi_gbl_dsdt_index);
/* If Hardware Reduced flag is set, there is no FACS */
@@ -385,14 +385,15 @@
{
/*
* Check if the FADT is larger than the largest table that we expect
- * (the ACPI 5.0 version). If so, truncate the table, and issue
- * a warning.
+ * (typically the current ACPI specification version). If so, truncate
+ * the table, and issue a warning.
*/
if (length > sizeof(struct acpi_table_fadt)) {
ACPI_BIOS_WARNING((AE_INFO,
- "FADT (revision %u) is longer than ACPI 5.0 version, "
+ "FADT (revision %u) is longer than %s length, "
"truncating length %u to %u",
- table->revision, length,
+ table->revision, ACPI_FADT_CONFORMANCE,
+ length,
(u32)sizeof(struct acpi_table_fadt)));
}
@@ -467,7 +468,7 @@
static void acpi_tb_convert_fadt(void)
{
- char *name;
+ const char *name;
struct acpi_generic_address *address64;
u32 address32;
u8 length;
@@ -646,9 +647,12 @@
if ((address64->address && !length) ||
(!address64->address && length)) {
ACPI_BIOS_WARNING((AE_INFO,
- "Optional FADT field %s has zero address or length: "
- "0x%8.8X%8.8X/0x%X",
- name,
+ "Optional FADT field %s has valid %s but zero %s: "
+ "0x%8.8X%8.8X/0x%X", name,
+ (length ? "Length" :
+ "Address"),
+ (length ? "Address" :
+ "Length"),
ACPI_FORMAT_UINT64
(address64->address),
length));
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index f2d0803..e348d61 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -76,7 +76,7 @@
/* Validate the input table signature */
- if (!acpi_is_valid_signature(signature)) {
+ if (!acpi_ut_valid_nameseg(signature)) {
return_ACPI_STATUS(AE_BAD_SIGNATURE);
}
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 4dc6108..8b13052 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -299,9 +299,9 @@
ACPI_BIOS_ERROR((AE_INFO,
"Table has invalid signature [%4.4s] (0x%8.8X), "
"must be SSDT or OEMx",
- acpi_ut_valid_acpi_name(new_table_desc.
- signature.
- ascii) ?
+ acpi_ut_valid_nameseg(new_table_desc.
+ signature.
+ ascii) ?
new_table_desc.signature.
ascii : "????",
new_table_desc.signature.integer));
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index 9240c76..e285539 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -231,7 +231,7 @@
ACPI_FORMAT_UINT64(address64)));
}
#endif
- return ((acpi_physical_address) (address64));
+ return ((acpi_physical_address)(address64));
}
}
@@ -287,12 +287,12 @@
* the XSDT if the revision is > 1 and the XSDT pointer is present,
* as per the ACPI specification.
*/
- address = (acpi_physical_address) rsdp->xsdt_physical_address;
+ address = (acpi_physical_address)rsdp->xsdt_physical_address;
table_entry_size = ACPI_XSDT_ENTRY_SIZE;
} else {
/* Root table is an RSDT (32-bit physical addresses) */
- address = (acpi_physical_address) rsdp->rsdt_physical_address;
+ address = (acpi_physical_address)rsdp->rsdt_physical_address;
table_entry_size = ACPI_RSDT_ENTRY_SIZE;
}
@@ -380,30 +380,3 @@
acpi_os_unmap_memory(table, length);
return_ACPI_STATUS(AE_OK);
}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_is_valid_signature
- *
- * PARAMETERS: signature - Sig string to be validated
- *
- * RETURN: TRUE if signature is has 4 valid ACPI characters
- *
- * DESCRIPTION: Validate an ACPI table signature.
- *
- ******************************************************************************/
-
-u8 acpi_is_valid_signature(char *signature)
-{
- u32 i;
-
- /* Validate each character in the signature */
-
- for (i = 0; i < ACPI_NAME_SIZE; i++) {
- if (!acpi_ut_valid_acpi_char(signature[i], i)) {
- return (FALSE);
- }
- }
-
- return (TRUE);
-}
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 326df65..3ecec93 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -99,7 +99,7 @@
******************************************************************************/
acpi_status __init
-acpi_initialize_tables(struct acpi_table_desc * initial_table_array,
+acpi_initialize_tables(struct acpi_table_desc *initial_table_array,
u32 initial_table_count, u8 allow_resize)
{
acpi_physical_address rsdp_address;
@@ -120,7 +120,7 @@
/* Root Table Array has been statically allocated by the host */
memset(initial_table_array, 0,
- (acpi_size) initial_table_count *
+ (acpi_size)initial_table_count *
sizeof(struct acpi_table_desc));
acpi_gbl_root_table_list.tables = initial_table_array;
@@ -352,7 +352,7 @@
*
******************************************************************************/
acpi_status
-acpi_get_table_by_index(u32 table_index, struct acpi_table_header ** table)
+acpi_get_table_by_index(u32 table_index, struct acpi_table_header **table)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 3151968..ac71abc 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -82,7 +82,7 @@
* their customized default region handlers.
*/
status = acpi_ev_install_region_handlers();
- if (ACPI_FAILURE(status) && status != AE_ALREADY_EXISTS) {
+ if (ACPI_FAILURE(status)) {
ACPI_EXCEPTION((AE_INFO, status,
"During Region initialization"));
return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index b9a78e4..adb6cfc 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -90,7 +90,7 @@
*
******************************************************************************/
-acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp * rsdp)
+acpi_status acpi_tb_validate_rsdp(struct acpi_table_rsdp *rsdp)
{
/*
@@ -142,7 +142,7 @@
*
******************************************************************************/
-acpi_status __init acpi_find_root_pointer(acpi_physical_address * table_address)
+acpi_status __init acpi_find_root_pointer(acpi_physical_address *table_address)
{
u8 *table_ptr;
u8 *mem_rover;
@@ -201,7 +201,7 @@
(u32) ACPI_PTR_DIFF(mem_rover, table_ptr);
*table_address =
- (acpi_physical_address) physical_address;
+ (acpi_physical_address)physical_address;
return_ACPI_STATUS(AE_OK);
}
}
@@ -234,7 +234,7 @@
(ACPI_HI_RSDP_WINDOW_BASE +
ACPI_PTR_DIFF(mem_rover, table_ptr));
- *table_address = (acpi_physical_address) physical_address;
+ *table_address = (acpi_physical_address)physical_address;
return_ACPI_STATUS(AE_OK);
}
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index 3dbdc3a..13324a2 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -231,7 +231,7 @@
*
******************************************************************************/
-acpi_status acpi_ut_validate_buffer(struct acpi_buffer * buffer)
+acpi_status acpi_ut_validate_buffer(struct acpi_buffer *buffer)
{
/* Obviously, the structure pointer must be valid */
@@ -272,8 +272,7 @@
******************************************************************************/
acpi_status
-acpi_ut_initialize_buffer(struct acpi_buffer * buffer,
- acpi_size required_length)
+acpi_ut_initialize_buffer(struct acpi_buffer *buffer, acpi_size required_length)
{
acpi_size input_buffer_length;
diff --git a/drivers/acpi/acpica/utascii.c b/drivers/acpi/acpica/utascii.c
new file mode 100644
index 0000000..706c1f3
--- /dev/null
+++ b/drivers/acpi/acpica/utascii.c
@@ -0,0 +1,140 @@
+/******************************************************************************
+ *
+ * Module Name: utascii - Utility ascii functions
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_valid_nameseg
+ *
+ * PARAMETERS: name - The name or table signature to be examined.
+ * Four characters, does not have to be a
+ * NULL terminated string.
+ *
+ * RETURN: TRUE if signature is has 4 valid ACPI characters
+ *
+ * DESCRIPTION: Validate an ACPI table signature.
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_valid_nameseg(char *name)
+{
+ u32 i;
+
+ /* Validate each character in the signature */
+
+ for (i = 0; i < ACPI_NAME_SIZE; i++) {
+ if (!acpi_ut_valid_name_char(name[i], i)) {
+ return (FALSE);
+ }
+ }
+
+ return (TRUE);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_valid_name_char
+ *
+ * PARAMETERS: char - The character to be examined
+ * position - Byte position (0-3)
+ *
+ * RETURN: TRUE if the character is valid, FALSE otherwise
+ *
+ * DESCRIPTION: Check for a valid ACPI character. Must be one of:
+ * 1) Upper case alpha
+ * 2) numeric
+ * 3) underscore
+ *
+ * We allow a '!' as the last character because of the ASF! table
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_valid_name_char(char character, u32 position)
+{
+
+ if (!((character >= 'A' && character <= 'Z') ||
+ (character >= '0' && character <= '9') || (character == '_'))) {
+
+ /* Allow a '!' in the last position */
+
+ if (character == '!' && position == 3) {
+ return (TRUE);
+ }
+
+ return (FALSE);
+ }
+
+ return (TRUE);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_ut_check_and_repair_ascii
+ *
+ * PARAMETERS: name - Ascii string
+ * count - Number of characters to check
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Ensure that the requested number of characters are printable
+ * Ascii characters. Sets non-printable and null chars to <space>.
+ *
+ ******************************************************************************/
+
+void acpi_ut_check_and_repair_ascii(u8 *name, char *repaired_name, u32 count)
+{
+ u32 i;
+
+ for (i = 0; i < count; i++) {
+ repaired_name[i] = (char)name[i];
+
+ if (!name[i]) {
+ return;
+ }
+ if (!isprint(name[i])) {
+ repaired_name[i] = ' ';
+ }
+ }
+}
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index 0cfb2b8..bd31faf 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -106,31 +106,31 @@
default: /* Default is BYTE display */
acpi_os_printf("%02X ",
- buffer[(acpi_size) i + j]);
+ buffer[(acpi_size)i + j]);
break;
case DB_WORD_DISPLAY:
ACPI_MOVE_16_TO_32(&temp32,
- &buffer[(acpi_size) i + j]);
+ &buffer[(acpi_size)i + j]);
acpi_os_printf("%04X ", temp32);
break;
case DB_DWORD_DISPLAY:
ACPI_MOVE_32_TO_32(&temp32,
- &buffer[(acpi_size) i + j]);
+ &buffer[(acpi_size)i + j]);
acpi_os_printf("%08X ", temp32);
break;
case DB_QWORD_DISPLAY:
ACPI_MOVE_32_TO_32(&temp32,
- &buffer[(acpi_size) i + j]);
+ &buffer[(acpi_size)i + j]);
acpi_os_printf("%08X", temp32);
ACPI_MOVE_32_TO_32(&temp32,
- &buffer[(acpi_size) i + j +
+ &buffer[(acpi_size)i + j +
4]);
acpi_os_printf("%08X ", temp32);
break;
@@ -158,7 +158,7 @@
acpi_os_printf("// ");
}
- buf_char = buffer[(acpi_size) i + j];
+ buf_char = buffer[(acpi_size)i + j];
if (isprint(buf_char)) {
acpi_os_printf("%c", buf_char);
} else {
@@ -274,31 +274,31 @@
default: /* Default is BYTE display */
acpi_ut_file_printf(file, "%02X ",
- buffer[(acpi_size) i + j]);
+ buffer[(acpi_size)i + j]);
break;
case DB_WORD_DISPLAY:
ACPI_MOVE_16_TO_32(&temp32,
- &buffer[(acpi_size) i + j]);
+ &buffer[(acpi_size)i + j]);
acpi_ut_file_printf(file, "%04X ", temp32);
break;
case DB_DWORD_DISPLAY:
ACPI_MOVE_32_TO_32(&temp32,
- &buffer[(acpi_size) i + j]);
+ &buffer[(acpi_size)i + j]);
acpi_ut_file_printf(file, "%08X ", temp32);
break;
case DB_QWORD_DISPLAY:
ACPI_MOVE_32_TO_32(&temp32,
- &buffer[(acpi_size) i + j]);
+ &buffer[(acpi_size)i + j]);
acpi_ut_file_printf(file, "%08X", temp32);
ACPI_MOVE_32_TO_32(&temp32,
- &buffer[(acpi_size) i + j +
+ &buffer[(acpi_size)i + j +
4]);
acpi_ut_file_printf(file, "%08X ", temp32);
break;
@@ -318,7 +318,7 @@
return;
}
- buf_char = buffer[(acpi_size) i + j];
+ buf_char = buffer[(acpi_size)i + j];
if (isprint(buf_char)) {
acpi_ut_file_printf(file, "%c", buf_char);
} else {
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index f8e9978..3b8d23e 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -105,7 +105,7 @@
*
******************************************************************************/
-acpi_status acpi_os_purge_cache(struct acpi_memory_list * cache)
+acpi_status acpi_os_purge_cache(struct acpi_memory_list *cache)
{
void *next;
acpi_status status;
@@ -151,7 +151,7 @@
*
******************************************************************************/
-acpi_status acpi_os_delete_cache(struct acpi_memory_list * cache)
+acpi_status acpi_os_delete_cache(struct acpi_memory_list *cache)
{
acpi_status status;
@@ -184,8 +184,7 @@
*
******************************************************************************/
-acpi_status
-acpi_os_release_object(struct acpi_memory_list * cache, void *object)
+acpi_status acpi_os_release_object(struct acpi_memory_list *cache, void *object)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
index 98d53e5..82f9714 100644
--- a/drivers/acpi/acpica/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c
@@ -53,7 +53,7 @@
static acpi_status
acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
union acpi_object *external_object,
- u8 * data_space, acpi_size * buffer_space_used);
+ u8 *data_space, acpi_size *buffer_space_used);
static acpi_status
acpi_ut_copy_ielement_to_ielement(u8 object_type,
@@ -63,7 +63,7 @@
static acpi_status
acpi_ut_copy_ipackage_to_epackage(union acpi_operand_object *internal_object,
- u8 * buffer, acpi_size * space_used);
+ u8 *buffer, acpi_size *space_used);
static acpi_status
acpi_ut_copy_esimple_to_isimple(union acpi_object *user_obj,
@@ -111,7 +111,7 @@
static acpi_status
acpi_ut_copy_isimple_to_esimple(union acpi_operand_object *internal_object,
union acpi_object *external_object,
- u8 * data_space, acpi_size * buffer_space_used)
+ u8 *data_space, acpi_size *buffer_space_used)
{
acpi_status status = AE_OK;
@@ -151,7 +151,7 @@
memcpy((void *)data_space,
(void *)internal_object->string.pointer,
- (acpi_size) internal_object->string.length + 1);
+ (acpi_size)internal_object->string.length + 1);
break;
case ACPI_TYPE_BUFFER:
@@ -331,7 +331,7 @@
static acpi_status
acpi_ut_copy_ipackage_to_epackage(union acpi_operand_object *internal_object,
- u8 * buffer, acpi_size * space_used)
+ u8 *buffer, acpi_size *space_used)
{
union acpi_object *external_object;
acpi_status status;
@@ -362,7 +362,7 @@
* Leave room for an array of ACPI_OBJECTS in the buffer
* and move the free space past it
*/
- info.length += (acpi_size) external_object->package.count *
+ info.length += (acpi_size)external_object->package.count *
ACPI_ROUND_UP_TO_NATIVE_WORD(sizeof(union acpi_object));
info.free_space += external_object->package.count *
ACPI_ROUND_UP_TO_NATIVE_WORD(sizeof(union acpi_object));
@@ -738,7 +738,7 @@
*/
if (source_desc->string.pointer) {
dest_desc->string.pointer =
- ACPI_ALLOCATE((acpi_size) source_desc->string.
+ ACPI_ALLOCATE((acpi_size)source_desc->string.
length + 1);
if (!dest_desc->string.pointer) {
return (AE_NO_MEMORY);
@@ -748,7 +748,7 @@
memcpy(dest_desc->string.pointer,
source_desc->string.pointer,
- (acpi_size) source_desc->string.length + 1);
+ (acpi_size)source_desc->string.length + 1);
}
break;
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 1cfc5f6..5744222 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -51,13 +51,9 @@
ACPI_MODULE_NAME("utdebug")
#ifdef ACPI_DEBUG_OUTPUT
-static acpi_thread_id acpi_gbl_prev_thread_id = (acpi_thread_id) 0xFFFFFFFF;
-static char *acpi_gbl_fn_entry_str = "----Entry";
-static char *acpi_gbl_fn_exit_str = "----Exit-";
-
-/* Local prototypes */
-
-static const char *acpi_ut_trim_function_name(const char *function_name);
+static acpi_thread_id acpi_gbl_previous_thread_id = (acpi_thread_id) 0xFFFFFFFF;
+static const char *acpi_gbl_function_entry_prefix = "----Entry";
+static const char *acpi_gbl_function_exit_prefix = "----Exit-";
/*******************************************************************************
*
@@ -178,14 +174,14 @@
* Thread tracking and context switch notification
*/
thread_id = acpi_os_get_thread_id();
- if (thread_id != acpi_gbl_prev_thread_id) {
+ if (thread_id != acpi_gbl_previous_thread_id) {
if (ACPI_LV_THREADS & acpi_dbg_level) {
acpi_os_printf
("\n**** Context Switch from TID %u to TID %u ****\n\n",
- (u32)acpi_gbl_prev_thread_id, (u32)thread_id);
+ (u32)acpi_gbl_previous_thread_id, (u32)thread_id);
}
- acpi_gbl_prev_thread_id = thread_id;
+ acpi_gbl_previous_thread_id = thread_id;
acpi_gbl_nesting_level = 0;
}
@@ -287,7 +283,8 @@
if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
- component_id, "%s\n", acpi_gbl_fn_entry_str);
+ component_id, "%s\n",
+ acpi_gbl_function_entry_prefix);
}
}
@@ -312,7 +309,8 @@
void
acpi_ut_trace_ptr(u32 line_number,
const char *function_name,
- const char *module_name, u32 component_id, void *pointer)
+ const char *module_name,
+ u32 component_id, const void *pointer)
{
acpi_gbl_nesting_level++;
@@ -323,8 +321,8 @@
if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
- component_id, "%s %p\n", acpi_gbl_fn_entry_str,
- pointer);
+ component_id, "%s %p\n",
+ acpi_gbl_function_entry_prefix, pointer);
}
}
@@ -348,7 +346,7 @@
void
acpi_ut_trace_str(u32 line_number,
const char *function_name,
- const char *module_name, u32 component_id, char *string)
+ const char *module_name, u32 component_id, const char *string)
{
acpi_gbl_nesting_level++;
@@ -359,8 +357,8 @@
if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
- component_id, "%s %s\n", acpi_gbl_fn_entry_str,
- string);
+ component_id, "%s %s\n",
+ acpi_gbl_function_entry_prefix, string);
}
}
@@ -396,7 +394,7 @@
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
component_id, "%s %08X\n",
- acpi_gbl_fn_entry_str, integer);
+ acpi_gbl_function_entry_prefix, integer);
}
}
@@ -427,7 +425,8 @@
if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
- component_id, "%s\n", acpi_gbl_fn_exit_str);
+ component_id, "%s\n",
+ acpi_gbl_function_exit_prefix);
}
if (acpi_gbl_nesting_level) {
@@ -467,14 +466,14 @@
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name,
module_name, component_id, "%s %s\n",
- acpi_gbl_fn_exit_str,
+ acpi_gbl_function_exit_prefix,
acpi_format_exception(status));
} else {
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name,
module_name, component_id,
"%s ****Exception****: %s\n",
- acpi_gbl_fn_exit_str,
+ acpi_gbl_function_exit_prefix,
acpi_format_exception(status));
}
}
@@ -514,7 +513,7 @@
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
component_id, "%s %8.8X%8.8X\n",
- acpi_gbl_fn_exit_str,
+ acpi_gbl_function_exit_prefix,
ACPI_FORMAT_UINT64(value));
}
@@ -552,8 +551,8 @@
if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
acpi_debug_print(ACPI_LV_FUNCTIONS,
line_number, function_name, module_name,
- component_id, "%s %p\n", acpi_gbl_fn_exit_str,
- ptr);
+ component_id, "%s %p\n",
+ acpi_gbl_function_exit_prefix, ptr);
}
if (acpi_gbl_nesting_level) {
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 6ba65b0..efd7988 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -446,7 +446,7 @@
/* Names for Notify() values, used for debug output */
-static const char *acpi_gbl_generic_notify[ACPI_NOTIFY_MAX + 1] = {
+static const char *acpi_gbl_generic_notify[ACPI_GENERIC_NOTIFY_MAX + 1] = {
/* 00 */ "Bus Check",
/* 01 */ "Device Check",
/* 02 */ "Device Wake",
@@ -459,49 +459,53 @@
/* 09 */ "Device PLD Check",
/* 0A */ "Reserved",
/* 0B */ "System Locality Update",
- /* 0C */ "Shutdown Request",
+ /* 0C */ "Shutdown Request",
+ /* Reserved in ACPI 6.0 */
/* 0D */ "System Resource Affinity Update"
};
-static const char *acpi_gbl_device_notify[4] = {
+static const char *acpi_gbl_device_notify[5] = {
/* 80 */ "Status Change",
/* 81 */ "Information Change",
/* 82 */ "Device-Specific Change",
- /* 83 */ "Device-Specific Change"
+ /* 83 */ "Device-Specific Change",
+ /* 84 */ "Reserved"
};
-static const char *acpi_gbl_processor_notify[4] = {
+static const char *acpi_gbl_processor_notify[5] = {
/* 80 */ "Performance Capability Change",
/* 81 */ "C-State Change",
/* 82 */ "Throttling Capability Change",
- /* 83 */ "Device-Specific Change"
+ /* 83 */ "Guaranteed Change",
+ /* 84 */ "Minimum Excursion"
};
-static const char *acpi_gbl_thermal_notify[4] = {
+static const char *acpi_gbl_thermal_notify[5] = {
/* 80 */ "Thermal Status Change",
/* 81 */ "Thermal Trip Point Change",
/* 82 */ "Thermal Device List Change",
- /* 83 */ "Thermal Relationship Change"
+ /* 83 */ "Thermal Relationship Change",
+ /* 84 */ "Reserved"
};
const char *acpi_ut_get_notify_name(u32 notify_value, acpi_object_type type)
{
- /* 00 - 0D are common to all object types */
+ /* 00 - 0D are "common to all object types" (from ACPI Spec) */
- if (notify_value <= ACPI_NOTIFY_MAX) {
+ if (notify_value <= ACPI_GENERIC_NOTIFY_MAX) {
return (acpi_gbl_generic_notify[notify_value]);
}
- /* 0D - 7F are reserved */
+ /* 0E - 7F are reserved */
if (notify_value <= ACPI_MAX_SYS_NOTIFY) {
return ("Reserved");
}
- /* 80 - 83 are per-object-type */
+ /* 80 - 84 are per-object-type */
- if (notify_value <= 0x83) {
+ if (notify_value <= ACPI_SPECIFIC_NOTIFY_MAX) {
switch (type) {
case ACPI_TYPE_ANY:
case ACPI_TYPE_DEVICE:
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 17b9f3e..7bad13f 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -69,7 +69,7 @@
acpi_status
acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
- char *path,
+ const char *path,
u32 expected_return_btypes,
union acpi_operand_object **return_desc)
{
@@ -204,7 +204,7 @@
******************************************************************************/
acpi_status
-acpi_ut_evaluate_numeric_object(char *object_name,
+acpi_ut_evaluate_numeric_object(const char *object_name,
struct acpi_namespace_node *device_node,
u64 *value)
{
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 48fffcf..dd3fd7f 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -80,6 +80,11 @@
"_S4D"
};
+/* Hex-to-ascii */
+
+const char acpi_gbl_lower_hex_digits[] = "0123456789abcdef";
+const char acpi_gbl_upper_hex_digits[] = "0123456789ABCDEF";
+
/*******************************************************************************
*
* Namespace globals
@@ -221,6 +226,49 @@
};
#endif /* !ACPI_REDUCED_HARDWARE */
+#if defined (ACPI_DISASSEMBLER) || defined (ACPI_ASL_COMPILER)
+
+/* to_pld macro: compile/disassemble strings */
+
+const char *acpi_gbl_pld_panel_list[] = {
+ "TOP",
+ "BOTTOM",
+ "LEFT",
+ "RIGHT",
+ "FRONT",
+ "BACK",
+ "UNKNOWN",
+ NULL
+};
+
+const char *acpi_gbl_pld_vertical_position_list[] = {
+ "UPPER",
+ "CENTER",
+ "LOWER",
+ NULL
+};
+
+const char *acpi_gbl_pld_horizontal_position_list[] = {
+ "LEFT",
+ "CENTER",
+ "RIGHT",
+ NULL
+};
+
+const char *acpi_gbl_pld_shape_list[] = {
+ "ROUND",
+ "OVAL",
+ "SQUARE",
+ "VERTICALRECTANGLE",
+ "HORIZONTALRECTANGLE",
+ "VERTICALTRAPEZOID",
+ "HORIZONTALTRAPEZOID",
+ "UNKNOWN",
+ "CHAMFERED",
+ NULL
+};
+#endif
+
/* Public globals */
ACPI_EXPORT_SYMBOL(acpi_gbl_FADT)
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
index 6fb4ec3..f7cd2d5 100644
--- a/drivers/acpi/acpica/utids.c
+++ b/drivers/acpi/acpica/utids.c
@@ -95,7 +95,7 @@
hid =
ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
- (acpi_size) length);
+ (acpi_size)length);
if (!hid) {
status = AE_NO_MEMORY;
goto cleanup;
@@ -173,7 +173,7 @@
uid =
ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
- (acpi_size) length);
+ (acpi_size)length);
if (!uid) {
status = AE_NO_MEMORY;
goto cleanup;
@@ -309,7 +309,7 @@
/* Area for CID strings starts after the CID PNP_DEVICE_ID array */
next_id_string = ACPI_CAST_PTR(char, cid_list->ids) +
- ((acpi_size) count * sizeof(struct acpi_pnp_device_id));
+ ((acpi_size)count * sizeof(struct acpi_pnp_device_id));
/* Copy/convert the CIDs to the return buffer */
@@ -413,7 +413,7 @@
cls =
ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_pnp_device_id) +
- (acpi_size) length);
+ (acpi_size)length);
if (!cls) {
status = AE_NO_MEMORY;
goto cleanup;
diff --git a/drivers/acpi/acpica/utmath.c b/drivers/acpi/acpica/utmath.c
index 6673720..2d6530e 100644
--- a/drivers/acpi/acpica/utmath.c
+++ b/drivers/acpi/acpica/utmath.c
@@ -236,8 +236,8 @@
}
remainder.full = remainder.full - dividend.full;
- remainder.part.hi = (u32) - ((s32) remainder.part.hi);
- remainder.part.lo = (u32) - ((s32) remainder.part.lo);
+ remainder.part.hi = (u32)-((s32)remainder.part.hi);
+ remainder.part.lo = (u32)-((s32)remainder.part.lo);
if (remainder.part.lo) {
remainder.part.hi--;
diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index d938c27..389de3b 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -361,7 +361,7 @@
void
acpi_ut_display_init_pathname(u8 type,
struct acpi_namespace_node *obj_handle,
- char *path)
+ const char *path)
{
acpi_status status;
struct acpi_buffer buffer;
diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c
index d5c3adf..3465fe2 100644
--- a/drivers/acpi/acpica/utnonansi.c
+++ b/drivers/acpi/acpica/utnonansi.c
@@ -205,37 +205,41 @@
*
* FUNCTION: acpi_ut_strtoul64
*
- * PARAMETERS: string - Null terminated string
- * base - Radix of the string: 16 or ACPI_ANY_BASE;
- * ACPI_ANY_BASE means 'in behalf of to_integer'
- * ret_integer - Where the converted integer is returned
+ * PARAMETERS: string - Null terminated string
+ * base - Radix of the string: 16 or 10 or
+ * ACPI_ANY_BASE
+ * max_integer_byte_width - Maximum allowable integer,in bytes:
+ * 4 or 8 (32 or 64 bits)
+ * ret_integer - Where the converted integer is
+ * returned
*
* RETURN: Status and Converted value
*
* DESCRIPTION: Convert a string into an unsigned value. Performs either a
- * 32-bit or 64-bit conversion, depending on the current mode
- * of the interpreter.
+ * 32-bit or 64-bit conversion, depending on the input integer
+ * size (often the current mode of the interpreter).
*
- * NOTES: acpi_gbl_integer_byte_width should be set to the proper width.
+ * NOTES: Negative numbers are not supported, as they are not supported
+ * by ACPI.
+ *
+ * acpi_gbl_integer_byte_width should be set to the proper width.
* For the core ACPICA code, this width depends on the DSDT
- * version. For iASL, the default byte width is always 8.
+ * version. For iASL, the default byte width is always 8 for the
+ * parser, but error checking is performed later to flag cases
+ * where a 64-bit constant is defined in a 32-bit DSDT/SSDT.
*
* Does not support Octal strings, not needed at this time.
*
- * There is an earlier version of the function after this one,
- * below. It is slightly different than this one, and the two
- * may eventually may need to be merged. (01/2016).
- *
******************************************************************************/
-acpi_status acpi_ut_strtoul64(char *string, u32 base, u64 *ret_integer)
+acpi_status
+acpi_ut_strtoul64(char *string,
+ u32 base, u32 max_integer_byte_width, u64 *ret_integer)
{
u32 this_digit = 0;
u64 return_value = 0;
u64 quotient;
u64 dividend;
- u32 to_integer_op = (base == ACPI_ANY_BASE);
- u32 mode32 = (acpi_gbl_integer_byte_width == 4);
u8 valid_digits = 0;
u8 sign_of0x = 0;
u8 term = 0;
@@ -244,6 +248,7 @@
switch (base) {
case ACPI_ANY_BASE:
+ case 10:
case 16:
break;
@@ -265,9 +270,9 @@
string++;
}
- if (to_integer_op) {
+ if (base == ACPI_ANY_BASE) {
/*
- * Base equal to ACPI_ANY_BASE means 'ToInteger operation case'.
+ * Base equal to ACPI_ANY_BASE means 'Either decimal or hex'.
* We need to determine if it is decimal or hexadecimal.
*/
if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) {
@@ -284,7 +289,7 @@
/* Any string left? Check that '0x' is not followed by white space. */
if (!(*string) || isspace((int)*string) || *string == '\t') {
- if (to_integer_op) {
+ if (base == ACPI_ANY_BASE) {
goto error_exit;
} else {
goto all_done;
@@ -292,10 +297,11 @@
}
/*
- * Perform a 32-bit or 64-bit conversion, depending upon the current
- * execution mode of the interpreter
+ * Perform a 32-bit or 64-bit conversion, depending upon the input
+ * byte width
*/
- dividend = (mode32) ? ACPI_UINT32_MAX : ACPI_UINT64_MAX;
+ dividend = (max_integer_byte_width <= ACPI_MAX32_BYTE_WIDTH) ?
+ ACPI_UINT32_MAX : ACPI_UINT64_MAX;
/* Main loop: convert the string to a 32- or 64-bit integer */
@@ -323,7 +329,7 @@
}
if (term) {
- if (to_integer_op) {
+ if (base == ACPI_ANY_BASE) {
goto error_exit;
} else {
break;
@@ -338,12 +344,13 @@
valid_digits++;
- if (sign_of0x
- && ((valid_digits > 16)
- || ((valid_digits > 8) && mode32))) {
+ if (sign_of0x && ((valid_digits > 16) ||
+ ((valid_digits > 8)
+ && (max_integer_byte_width <=
+ ACPI_MAX32_BYTE_WIDTH)))) {
/*
* This is to_integer operation case.
- * No any restrictions for string-to-integer conversion,
+ * No restrictions for string-to-integer conversion,
* see ACPI spec.
*/
goto error_exit;
@@ -355,7 +362,7 @@
"ient, NULL);
if (return_value > quotient) {
- if (to_integer_op) {
+ if (base == ACPI_ANY_BASE) {
goto error_exit;
} else {
break;
@@ -378,7 +385,8 @@
return_ACPI_STATUS(AE_OK);
error_exit:
- /* Base was set/validated above */
+
+ /* Base was set/validated above (10 or 16) */
if (base == 10) {
return_ACPI_STATUS(AE_BAD_DECIMAL_CONSTANT);
@@ -388,8 +396,7 @@
}
#ifdef _OBSOLETE_FUNCTIONS
-/* TBD: use version in ACPICA main code base? */
-/* DONE: 01/2016 */
+/* Removed: 01/2016 */
/*******************************************************************************
*
diff --git a/drivers/acpi/acpica/utobject.c b/drivers/acpi/acpica/utobject.c
index edad3f0..72b9a06 100644
--- a/drivers/acpi/acpica/utobject.c
+++ b/drivers/acpi/acpica/utobject.c
@@ -51,11 +51,11 @@
/* Local prototypes */
static acpi_status
acpi_ut_get_simple_object_size(union acpi_operand_object *obj,
- acpi_size * obj_length);
+ acpi_size *obj_length);
static acpi_status
acpi_ut_get_package_object_size(union acpi_operand_object *obj,
- acpi_size * obj_length);
+ acpi_size *obj_length);
static acpi_status
acpi_ut_get_element_length(u8 object_type,
@@ -177,7 +177,7 @@
* Create the element array. Count+1 allows the array to be null
* terminated.
*/
- package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size) count +
+ package_elements = ACPI_ALLOCATE_ZEROED(((acpi_size)count +
1) * sizeof(void *));
if (!package_elements) {
ACPI_FREE(package_desc);
@@ -454,7 +454,7 @@
static acpi_status
acpi_ut_get_simple_object_size(union acpi_operand_object *internal_object,
- acpi_size * obj_length)
+ acpi_size *obj_length)
{
acpi_size length;
acpi_size size;
@@ -495,12 +495,12 @@
switch (internal_object->common.type) {
case ACPI_TYPE_STRING:
- length += (acpi_size) internal_object->string.length + 1;
+ length += (acpi_size)internal_object->string.length + 1;
break;
case ACPI_TYPE_BUFFER:
- length += (acpi_size) internal_object->buffer.length;
+ length += (acpi_size)internal_object->buffer.length;
break;
case ACPI_TYPE_INTEGER:
@@ -640,7 +640,7 @@
static acpi_status
acpi_ut_get_package_object_size(union acpi_operand_object *internal_object,
- acpi_size * obj_length)
+ acpi_size *obj_length)
{
acpi_status status;
struct acpi_pkg_info info;
@@ -665,7 +665,7 @@
*/
info.length +=
ACPI_ROUND_UP_TO_NATIVE_WORD(sizeof(union acpi_object)) *
- (acpi_size) info.num_packages;
+ (acpi_size)info.num_packages;
/* Return the total package length */
@@ -689,7 +689,7 @@
acpi_status
acpi_ut_get_object_size(union acpi_operand_object *internal_object,
- acpi_size * obj_length)
+ acpi_size *obj_length)
{
acpi_status status;
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index b5cfe57..3f5fed6 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -150,7 +150,7 @@
i < (ACPI_ARRAY_LENGTH(acpi_default_supported_interfaces) - 1);
i++) {
acpi_default_supported_interfaces[i].next =
- &acpi_default_supported_interfaces[(acpi_size) i + 1];
+ &acpi_default_supported_interfaces[(acpi_size)i + 1];
}
acpi_os_release_mutex(acpi_gbl_osi_mutex);
@@ -397,7 +397,7 @@
*
******************************************************************************/
-acpi_status acpi_ut_osi_implementation(struct acpi_walk_state * walk_state)
+acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
{
union acpi_operand_object *string_desc;
union acpi_operand_object *return_desc;
diff --git a/drivers/acpi/acpica/utownerid.c b/drivers/acpi/acpica/utownerid.c
index 813520a..3cd573c 100644
--- a/drivers/acpi/acpica/utownerid.c
+++ b/drivers/acpi/acpica/utownerid.c
@@ -61,7 +61,7 @@
* when the method exits or the table is unloaded.
*
******************************************************************************/
-acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id)
+acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id)
{
u32 i;
u32 j;
@@ -122,7 +122,7 @@
* permanently allocated (prevents +1 overflow)
*/
*owner_id =
- (acpi_owner_id) ((k + 1) + ACPI_MUL_32(j));
+ (acpi_owner_id)((k + 1) + ACPI_MUL_32(j));
ACPI_DEBUG_PRINT((ACPI_DB_VALUES,
"Allocated OwnerId: %2.2X\n",
@@ -167,7 +167,7 @@
*
******************************************************************************/
-void acpi_ut_release_owner_id(acpi_owner_id * owner_id_ptr)
+void acpi_ut_release_owner_id(acpi_owner_id *owner_id_ptr)
{
acpi_owner_id owner_id = *owner_id_ptr;
acpi_status status;
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index 8c218ad..dd084cf 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -67,11 +67,6 @@
static char *acpi_ut_put_number(char *string, u64 number, u8 base, u8 upper);
-/* Module globals */
-
-static const char acpi_gbl_lower_hex_digits[] = "0123456789abcdef";
-static const char acpi_gbl_upper_hex_digits[] = "0123456789ABCDEF";
-
/*******************************************************************************
*
* FUNCTION: acpi_ut_bound_string_length
@@ -269,9 +264,9 @@
sign = '\0';
if (type & ACPI_FORMAT_SIGN) {
- if ((s64) number < 0) {
+ if ((s64)number < 0) {
sign = '-';
- number = -(s64) number;
+ number = -(s64)number;
width--;
} else if (type & ACPI_FORMAT_SIGN_PLUS) {
sign = '+';
@@ -409,7 +404,7 @@
width = -1;
if (isdigit((int)*format)) {
format = acpi_ut_scan_number(format, &number);
- width = (s32) number;
+ width = (s32)number;
} else if (*format == '*') {
++format;
width = va_arg(args, int);
@@ -426,7 +421,7 @@
++format;
if (isdigit((int)*format)) {
format = acpi_ut_scan_number(format, &number);
- precision = (s32) number;
+ precision = (s32)number;
} else if (*format == '*') {
++format;
precision = va_arg(args, int);
@@ -555,17 +550,17 @@
if (qualifier == 'L') {
number = va_arg(args, u64);
if (type & ACPI_FORMAT_SIGN) {
- number = (s64) number;
+ number = (s64)number;
}
} else if (qualifier == 'l') {
number = va_arg(args, unsigned long);
if (type & ACPI_FORMAT_SIGN) {
- number = (s32) number;
+ number = (s32)number;
}
} else if (qualifier == 'h') {
number = (u16)va_arg(args, int);
if (type & ACPI_FORMAT_SIGN) {
- number = (s16) number;
+ number = (s16)number;
}
} else {
number = va_arg(args, unsigned int);
diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c
index 0b00572..288913a 100644
--- a/drivers/acpi/acpica/utstring.c
+++ b/drivers/acpi/acpica/utstring.c
@@ -130,7 +130,7 @@
} else {
/* All others will be Hex escapes */
- acpi_os_printf("\\x%2.2X", (s32) string[i]);
+ acpi_os_printf("\\x%2.2X", (s32)string[i]);
}
break;
}
@@ -145,73 +145,6 @@
/*******************************************************************************
*
- * FUNCTION: acpi_ut_valid_acpi_char
- *
- * PARAMETERS: char - The character to be examined
- * position - Byte position (0-3)
- *
- * RETURN: TRUE if the character is valid, FALSE otherwise
- *
- * DESCRIPTION: Check for a valid ACPI character. Must be one of:
- * 1) Upper case alpha
- * 2) numeric
- * 3) underscore
- *
- * We allow a '!' as the last character because of the ASF! table
- *
- ******************************************************************************/
-
-u8 acpi_ut_valid_acpi_char(char character, u32 position)
-{
-
- if (!((character >= 'A' && character <= 'Z') ||
- (character >= '0' && character <= '9') || (character == '_'))) {
-
- /* Allow a '!' in the last position */
-
- if (character == '!' && position == 3) {
- return (TRUE);
- }
-
- return (FALSE);
- }
-
- return (TRUE);
-}
-
-/*******************************************************************************
- *
- * FUNCTION: acpi_ut_valid_acpi_name
- *
- * PARAMETERS: name - The name to be examined. Does not have to
- * be NULL terminated string.
- *
- * RETURN: TRUE if the name is valid, FALSE otherwise
- *
- * DESCRIPTION: Check for a valid ACPI name. Each character must be one of:
- * 1) Upper case alpha
- * 2) numeric
- * 3) underscore
- *
- ******************************************************************************/
-
-u8 acpi_ut_valid_acpi_name(char *name)
-{
- u32 i;
-
- ACPI_FUNCTION_ENTRY();
-
- for (i = 0; i < ACPI_NAME_SIZE; i++) {
- if (!acpi_ut_valid_acpi_char(name[i], i)) {
- return (FALSE);
- }
- }
-
- return (TRUE);
-}
-
-/*******************************************************************************
- *
* FUNCTION: acpi_ut_repair_name
*
* PARAMETERS: name - The ACPI name to be repaired
@@ -253,7 +186,7 @@
/* Check each character in the name */
for (i = 0; i < ACPI_NAME_SIZE; i++) {
- if (acpi_ut_valid_acpi_char(name[i], i)) {
+ if (acpi_ut_valid_name_char(name[i], i)) {
continue;
}
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index 60c406a..0df07df 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -90,7 +90,7 @@
******************************************************************************/
acpi_status
-acpi_ut_create_list(char *list_name,
+acpi_ut_create_list(const char *list_name,
u16 object_size, struct acpi_memory_list **return_cache)
{
struct acpi_memory_list *cache;
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index 68d4673..d9e6aac 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -127,7 +127,7 @@
* and the value of out_buffer is undefined.
*
******************************************************************************/
-acpi_status acpi_get_system_info(struct acpi_buffer * out_buffer)
+acpi_status acpi_get_system_info(struct acpi_buffer *out_buffer)
{
struct acpi_system_info *info_ptr;
acpi_status status;
@@ -483,7 +483,7 @@
******************************************************************************/
acpi_status
acpi_decode_pld_buffer(u8 *in_buffer,
- acpi_size length, struct acpi_pld_info ** return_buffer)
+ acpi_size length, struct acpi_pld_info **return_buffer)
{
struct acpi_pld_info *pld_info;
u32 *buffer = ACPI_CAST_PTR(u32, in_buffer);
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 96809cd..bdc67ba 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -3,7 +3,7 @@
*
* Check to see if the given machine has a known bad ACPI BIOS
* or if the BIOS is too old.
- * Check given machine against acpi_osi_dmi_table[].
+ * Check given machine against acpi_rev_dmi_table[].
*
* Copyright (C) 2004 Len Brown <len.brown@intel.com>
* Copyright (C) 2002 Andy Grover <andrew.grover@intel.com>
@@ -47,7 +47,7 @@
u32 is_critical_error;
};
-static struct dmi_system_id acpi_osi_dmi_table[] __initdata;
+static struct dmi_system_id acpi_rev_dmi_table[] __initdata;
/*
* POLICY: If *anything* doesn't work, put it on the blacklist.
@@ -128,36 +128,12 @@
}
}
- dmi_check_system(acpi_osi_dmi_table);
+ (void)early_acpi_osi_init();
+ dmi_check_system(acpi_rev_dmi_table);
return blacklisted;
}
#ifdef CONFIG_DMI
-static int __init dmi_enable_osi_linux(const struct dmi_system_id *d)
-{
- acpi_dmi_osi_linux(1, d); /* enable */
- return 0;
-}
-static int __init dmi_disable_osi_vista(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
- acpi_osi_setup("!Windows 2006");
- acpi_osi_setup("!Windows 2006 SP1");
- acpi_osi_setup("!Windows 2006 SP2");
- return 0;
-}
-static int __init dmi_disable_osi_win7(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
- acpi_osi_setup("!Windows 2009");
- return 0;
-}
-static int __init dmi_disable_osi_win8(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
- acpi_osi_setup("!Windows 2012");
- return 0;
-}
#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
static int __init dmi_enable_rev_override(const struct dmi_system_id *d)
{
@@ -168,169 +144,7 @@
}
#endif
-static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
- {
- .callback = dmi_disable_osi_vista,
- .ident = "Fujitsu Siemens",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
- DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
- },
- },
- {
- /*
- * There have a NVIF method in MSI GX723 DSDT need call by Nvidia
- * driver (e.g. nouveau) when user press brightness hotkey.
- * Currently, nouveau driver didn't do the job and it causes there
- * have a infinite while loop in DSDT when user press hotkey.
- * We add MSI GX723's dmi information to this table for workaround
- * this issue.
- * Will remove MSI GX723 from the table after nouveau grows support.
- */
- .callback = dmi_disable_osi_vista,
- .ident = "MSI GX723",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International"),
- DMI_MATCH(DMI_PRODUCT_NAME, "GX723"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "Sony VGN-NS10J_S",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS10J_S"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "Sony VGN-SR290J",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR290J"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "VGN-NS50B_L",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS50B_L"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "VGN-SR19XN",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR19XN"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "Toshiba Satellite L355",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "Satellite L355"),
- },
- },
- {
- .callback = dmi_disable_osi_win7,
- .ident = "ASUS K50IJ",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "Toshiba P305D",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"),
- },
- },
- {
- .callback = dmi_disable_osi_vista,
- .ident = "Toshiba NB100",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
- DMI_MATCH(DMI_PRODUCT_NAME, "NB100"),
- },
- },
-
- /*
- * The wireless hotkey does not work on those machines when
- * returning true for _OSI("Windows 2012")
- */
- {
- .callback = dmi_disable_osi_win8,
- .ident = "Dell Inspiron 7737",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7737"),
- },
- },
- {
- .callback = dmi_disable_osi_win8,
- .ident = "Dell Inspiron 7537",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7537"),
- },
- },
- {
- .callback = dmi_disable_osi_win8,
- .ident = "Dell Inspiron 5437",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5437"),
- },
- },
- {
- .callback = dmi_disable_osi_win8,
- .ident = "Dell Inspiron 3437",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 3437"),
- },
- },
- {
- .callback = dmi_disable_osi_win8,
- .ident = "Dell Vostro 3446",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3446"),
- },
- },
- {
- .callback = dmi_disable_osi_win8,
- .ident = "Dell Vostro 3546",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3546"),
- },
- },
-
- /*
- * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
- * Linux ignores it, except for the machines enumerated below.
- */
-
- /*
- * Without this this EEEpc exports a non working WMI interface, with
- * this it exports a working "good old" eeepc_laptop interface, fixing
- * both brightness control, and rfkill not working.
- */
- {
- .callback = dmi_enable_osi_linux,
- .ident = "Asus EEE PC 1015PX",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer INC."),
- DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"),
- },
- },
-
+static struct dmi_system_id acpi_rev_dmi_table[] __initdata = {
#ifdef CONFIG_ACPI_REV_OVERRIDE_POSSIBLE
/*
* DELL XPS 13 (2015) switches sound between HDA and I2S
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index c068c82..31e8da6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -925,11 +925,13 @@
goto error0;
}
- status = acpi_load_tables();
- if (ACPI_FAILURE(status)) {
- printk(KERN_ERR PREFIX
- "Unable to load the System Description Tables\n");
- goto error0;
+ if (acpi_gbl_group_module_level_code) {
+ status = acpi_load_tables();
+ if (ACPI_FAILURE(status)) {
+ printk(KERN_ERR PREFIX
+ "Unable to load the System Description Tables\n");
+ goto error0;
+ }
}
#ifdef CONFIG_X86
@@ -995,17 +997,10 @@
acpi_os_initialize1();
- status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE);
- if (ACPI_FAILURE(status)) {
- printk(KERN_ERR PREFIX
- "Unable to start the ACPI Interpreter\n");
- goto error1;
- }
-
/*
* ACPI 2.0 requires the EC driver to be loaded and work before
- * the EC device is found in the namespace (i.e. before acpi_initialize_objects()
- * is called).
+ * the EC device is found in the namespace (i.e. before
+ * acpi_load_tables() is called).
*
* This is accomplished by looking for the ECDT table, and getting
* the EC parameters out of that.
@@ -1013,6 +1008,22 @@
status = acpi_ec_ecdt_probe();
/* Ignore result. Not having an ECDT is not fatal. */
+ if (!acpi_gbl_group_module_level_code) {
+ status = acpi_load_tables();
+ if (ACPI_FAILURE(status)) {
+ printk(KERN_ERR PREFIX
+ "Unable to load the System Description Tables\n");
+ goto error1;
+ }
+ }
+
+ status = acpi_enable_subsystem(ACPI_NO_ACPI_ENABLE);
+ if (ACPI_FAILURE(status)) {
+ printk(KERN_ERR PREFIX
+ "Unable to start the ACPI Interpreter\n");
+ goto error1;
+ }
+
status = acpi_initialize_objects(ACPI_FULL_INITIALIZATION);
if (ACPI_FAILURE(status)) {
printk(KERN_ERR PREFIX "Unable to initialize ACPI objects\n");
diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c
index b9afb47..7b2c48f 100644
--- a/drivers/acpi/device_sysfs.c
+++ b/drivers/acpi/device_sysfs.c
@@ -35,7 +35,7 @@
if (result)
return result;
- result = sprintf(buf, "%s\n", (char*)path.pointer);
+ result = sprintf(buf, "%s\n", (char *)path.pointer);
kfree(path.pointer);
return result;
}
@@ -333,7 +333,8 @@
EXPORT_SYMBOL_GPL(acpi_device_modalias);
static ssize_t
-acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf) {
+acpi_device_modalias_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
return __acpi_device_modalias(to_acpi_device(dev), buf, 1024);
}
static DEVICE_ATTR(modalias, 0444, acpi_device_modalias_show, NULL);
@@ -397,7 +398,8 @@
static DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store);
static ssize_t
-acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf) {
+acpi_device_hid_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
struct acpi_device *acpi_dev = to_acpi_device(dev);
return sprintf(buf, "%s\n", acpi_device_hid(acpi_dev));
@@ -467,12 +469,27 @@
status = acpi_evaluate_integer(acpi_dev->handle, "_SUN", NULL, &sun);
if (ACPI_FAILURE(status))
- return -ENODEV;
+ return -EIO;
return sprintf(buf, "%llu\n", sun);
}
static DEVICE_ATTR(sun, 0444, acpi_device_sun_show, NULL);
+static ssize_t
+acpi_device_hrv_show(struct device *dev, struct device_attribute *attr,
+ char *buf) {
+ struct acpi_device *acpi_dev = to_acpi_device(dev);
+ acpi_status status;
+ unsigned long long hrv;
+
+ status = acpi_evaluate_integer(acpi_dev->handle, "_HRV", NULL, &hrv);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ return sprintf(buf, "%llu\n", hrv);
+}
+static DEVICE_ATTR(hrv, 0444, acpi_device_hrv_show, NULL);
+
static ssize_t status_show(struct device *dev, struct device_attribute *attr,
char *buf) {
struct acpi_device *acpi_dev = to_acpi_device(dev);
@@ -481,7 +498,7 @@
status = acpi_evaluate_integer(acpi_dev->handle, "_STA", NULL, &sta);
if (ACPI_FAILURE(status))
- return -ENODEV;
+ return -EIO;
return sprintf(buf, "%llu\n", sta);
}
@@ -541,16 +558,22 @@
goto end;
}
+ if (acpi_has_method(dev->handle, "_HRV")) {
+ result = device_create_file(&dev->dev, &dev_attr_hrv);
+ if (result)
+ goto end;
+ }
+
if (acpi_has_method(dev->handle, "_STA")) {
result = device_create_file(&dev->dev, &dev_attr_status);
if (result)
goto end;
}
- /*
- * If device has _EJ0, 'eject' file is created that is used to trigger
- * hot-removal function from userland.
- */
+ /*
+ * If device has _EJ0, 'eject' file is created that is used to trigger
+ * hot-removal function from userland.
+ */
if (acpi_has_method(dev->handle, "_EJ0")) {
result = device_create_file(&dev->dev, &dev_attr_eject);
if (result)
@@ -604,6 +627,9 @@
if (acpi_has_method(dev->handle, "_SUN"))
device_remove_file(&dev->dev, &dev_attr_sun);
+ if (acpi_has_method(dev->handle, "_HRV"))
+ device_remove_file(&dev->dev, &dev_attr_hrv);
+
if (dev->pnp.unique_id)
device_remove_file(&dev->dev, &dev_attr_uid);
if (dev->pnp.type.bus_address)
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index b420fb4..0e70181 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -105,8 +105,8 @@
enum {
EC_FLAGS_QUERY_PENDING, /* Query is pending */
EC_FLAGS_QUERY_GUARDING, /* Guard for SCI_EVT check */
- EC_FLAGS_HANDLERS_INSTALLED, /* Handlers for GPE and
- * OpReg are installed */
+ EC_FLAGS_GPE_HANDLER_INSTALLED, /* GPE handler installed */
+ EC_FLAGS_EC_HANDLER_INSTALLED, /* OpReg handler installed */
EC_FLAGS_STARTED, /* Driver is started */
EC_FLAGS_STOPPED, /* Driver is stopped */
EC_FLAGS_COMMAND_STORM, /* GPE storms occurred to the
@@ -175,10 +175,9 @@
struct acpi_ec *boot_ec, *first_ec;
EXPORT_SYMBOL(first_ec);
-static int EC_FLAGS_VALIDATE_ECDT; /* ASUStec ECDTs need to be validated */
-static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */
static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */
+static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */
/* --------------------------------------------------------------------------
* Logging/Debugging
@@ -367,7 +366,8 @@
static void acpi_ec_submit_request(struct acpi_ec *ec)
{
ec->reference_count++;
- if (ec->reference_count == 1)
+ if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags) &&
+ ec->reference_count == 1)
acpi_ec_enable_gpe(ec, true);
}
@@ -376,7 +376,8 @@
bool flushed = false;
ec->reference_count--;
- if (ec->reference_count == 0)
+ if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags) &&
+ ec->reference_count == 0)
acpi_ec_disable_gpe(ec, true);
flushed = acpi_ec_flushed(ec);
if (flushed)
@@ -1287,52 +1288,64 @@
{
acpi_status status;
- if (test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
- return 0;
- status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
- ACPI_GPE_EDGE_TRIGGERED,
- &acpi_ec_gpe_handler, ec);
- if (ACPI_FAILURE(status))
- return -ENODEV;
-
acpi_ec_start(ec, false);
- status = acpi_install_address_space_handler(ec->handle,
- ACPI_ADR_SPACE_EC,
- &acpi_ec_space_handler,
- NULL, ec);
- if (ACPI_FAILURE(status)) {
- if (status == AE_NOT_FOUND) {
- /*
- * Maybe OS fails in evaluating the _REG object.
- * The AE_NOT_FOUND error will be ignored and OS
- * continue to initialize EC.
- */
- pr_err("Fail in evaluating the _REG object"
- " of EC device. Broken bios is suspected.\n");
- } else {
- acpi_ec_stop(ec, false);
- acpi_remove_gpe_handler(NULL, ec->gpe,
- &acpi_ec_gpe_handler);
- return -ENODEV;
+
+ if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
+ status = acpi_install_address_space_handler(ec->handle,
+ ACPI_ADR_SPACE_EC,
+ &acpi_ec_space_handler,
+ NULL, ec);
+ if (ACPI_FAILURE(status)) {
+ if (status == AE_NOT_FOUND) {
+ /*
+ * Maybe OS fails in evaluating the _REG
+ * object. The AE_NOT_FOUND error will be
+ * ignored and OS * continue to initialize
+ * EC.
+ */
+ pr_err("Fail in evaluating the _REG object"
+ " of EC device. Broken bios is suspected.\n");
+ } else {
+ acpi_ec_stop(ec, false);
+ return -ENODEV;
+ }
+ }
+ set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
+ }
+
+ if (!test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
+ status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
+ ACPI_GPE_EDGE_TRIGGERED,
+ &acpi_ec_gpe_handler, ec);
+ /* This is not fatal as we can poll EC events */
+ if (ACPI_SUCCESS(status)) {
+ set_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
+ if (test_bit(EC_FLAGS_STARTED, &ec->flags) &&
+ ec->reference_count >= 1)
+ acpi_ec_enable_gpe(ec, true);
}
}
- set_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags);
return 0;
}
static void ec_remove_handlers(struct acpi_ec *ec)
{
- if (!test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
- return;
acpi_ec_stop(ec, false);
- if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
- ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
- pr_err("failed to remove space handler\n");
- if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
- &acpi_ec_gpe_handler)))
- pr_err("failed to remove gpe handler\n");
- clear_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags);
+
+ if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
+ if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
+ ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
+ pr_err("failed to remove space handler\n");
+ clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
+ }
+
+ if (test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
+ if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
+ &acpi_ec_gpe_handler)))
+ pr_err("failed to remove gpe handler\n");
+ clear_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
+ }
}
static int acpi_ec_add(struct acpi_device *device)
@@ -1344,11 +1357,12 @@
strcpy(acpi_device_class(device), ACPI_EC_CLASS);
/* Check for boot EC */
- if (boot_ec &&
- (boot_ec->handle == device->handle ||
- boot_ec->handle == ACPI_ROOT_OBJECT)) {
+ if (boot_ec) {
ec = boot_ec;
boot_ec = NULL;
+ ec_remove_handlers(ec);
+ if (first_ec == ec)
+ first_ec = NULL;
} else {
ec = make_acpi_ec();
if (!ec)
@@ -1434,7 +1448,7 @@
int __init acpi_boot_ec_enable(void)
{
- if (!boot_ec || test_bit(EC_FLAGS_HANDLERS_INSTALLED, &boot_ec->flags))
+ if (!boot_ec)
return 0;
if (!ec_install_handlers(boot_ec)) {
first_ec = boot_ec;
@@ -1448,20 +1462,6 @@
{"", 0},
};
-/* Some BIOS do not survive early DSDT scan, skip it */
-static int ec_skip_dsdt_scan(const struct dmi_system_id *id)
-{
- EC_FLAGS_SKIP_DSDT_SCAN = 1;
- return 0;
-}
-
-/* ASUStek often supplies us with broken ECDT, validate it */
-static int ec_validate_ecdt(const struct dmi_system_id *id)
-{
- EC_FLAGS_VALIDATE_ECDT = 1;
- return 0;
-}
-
#if 0
/*
* Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not
@@ -1503,30 +1503,29 @@
return 0;
}
+static int ec_correct_ecdt(const struct dmi_system_id *id)
+{
+ pr_debug("Detected system needing ECDT address correction.\n");
+ EC_FLAGS_CORRECT_ECDT = 1;
+ return 0;
+}
+
static struct dmi_system_id ec_dmi_table[] __initdata = {
{
- ec_skip_dsdt_scan, "Compal JFL92", {
- DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
- DMI_MATCH(DMI_BOARD_NAME, "JFL92") }, NULL},
+ ec_correct_ecdt, "Asus L4R", {
+ DMI_MATCH(DMI_BIOS_VERSION, "1008.006"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),
+ DMI_MATCH(DMI_BOARD_NAME, "L4R") }, NULL},
{
- ec_validate_ecdt, "MSI MS-171F", {
+ ec_correct_ecdt, "Asus M6R", {
+ DMI_MATCH(DMI_BIOS_VERSION, "0207"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M6R"),
+ DMI_MATCH(DMI_BOARD_NAME, "M6R") }, NULL},
+ {
+ ec_correct_ecdt, "MSI MS-171F", {
DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star"),
DMI_MATCH(DMI_PRODUCT_NAME, "MS-171F"),}, NULL},
{
- ec_validate_ecdt, "ASUS hardware", {
- DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL},
- {
- ec_validate_ecdt, "ASUS hardware", {
- DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL},
- {
- ec_skip_dsdt_scan, "HP Folio 13", {
- DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13"),}, NULL},
- {
- ec_validate_ecdt, "ASUS hardware", {
- DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek Computer Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),}, NULL},
- {
ec_clear_on_resume, "Samsung hardware", {
DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL},
{},
@@ -1534,8 +1533,8 @@
int __init acpi_ec_ecdt_probe(void)
{
+ int ret = 0;
acpi_status status;
- struct acpi_ec *saved_ec = NULL;
struct acpi_table_ecdt *ecdt_ptr;
boot_ec = make_acpi_ec();
@@ -1547,67 +1546,45 @@
dmi_check_system(ec_dmi_table);
status = acpi_get_table(ACPI_SIG_ECDT, 1,
(struct acpi_table_header **)&ecdt_ptr);
- if (ACPI_SUCCESS(status)) {
- pr_info("EC description table is found, configuring boot EC\n");
+ if (ACPI_FAILURE(status)) {
+ ret = -ENODEV;
+ goto error;
+ }
+
+ if (!ecdt_ptr->control.address || !ecdt_ptr->data.address) {
+ /*
+ * Asus X50GL:
+ * https://bugzilla.kernel.org/show_bug.cgi?id=11880
+ */
+ ret = -ENODEV;
+ goto error;
+ }
+
+ pr_info("EC description table is found, configuring boot EC\n");
+ if (EC_FLAGS_CORRECT_ECDT) {
+ /*
+ * Asus L4R, Asus M6R
+ * https://bugzilla.kernel.org/show_bug.cgi?id=9399
+ * MSI MS-171F
+ * https://bugzilla.kernel.org/show_bug.cgi?id=12461
+ */
+ boot_ec->command_addr = ecdt_ptr->data.address;
+ boot_ec->data_addr = ecdt_ptr->control.address;
+ } else {
boot_ec->command_addr = ecdt_ptr->control.address;
boot_ec->data_addr = ecdt_ptr->data.address;
- boot_ec->gpe = ecdt_ptr->gpe;
- boot_ec->handle = ACPI_ROOT_OBJECT;
- acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id,
- &boot_ec->handle);
- /* Don't trust ECDT, which comes from ASUSTek */
- if (!EC_FLAGS_VALIDATE_ECDT)
- goto install;
- saved_ec = kmemdup(boot_ec, sizeof(struct acpi_ec), GFP_KERNEL);
- if (!saved_ec)
- return -ENOMEM;
- /* fall through */
}
-
- if (EC_FLAGS_SKIP_DSDT_SCAN) {
- kfree(saved_ec);
- return -ENODEV;
- }
-
- /* This workaround is needed only on some broken machines,
- * which require early EC, but fail to provide ECDT */
- pr_debug("Look up EC in DSDT\n");
- status = acpi_get_devices(ec_device_ids[0].id, ec_parse_device,
- boot_ec, NULL);
- /* Check that acpi_get_devices actually find something */
- if (ACPI_FAILURE(status) || !boot_ec->handle)
- goto error;
- if (saved_ec) {
- /* try to find good ECDT from ASUSTek */
- if (saved_ec->command_addr != boot_ec->command_addr ||
- saved_ec->data_addr != boot_ec->data_addr ||
- saved_ec->gpe != boot_ec->gpe ||
- saved_ec->handle != boot_ec->handle)
- pr_info("ASUSTek keeps feeding us with broken "
- "ECDT tables, which are very hard to workaround. "
- "Trying to use DSDT EC info instead. Please send "
- "output of acpidump to linux-acpi@vger.kernel.org\n");
- kfree(saved_ec);
- saved_ec = NULL;
- } else {
- /* We really need to limit this workaround, the only ASUS,
- * which needs it, has fake EC._INI method, so use it as flag.
- * Keep boot_ec struct as it will be needed soon.
- */
- if (!dmi_name_in_vendors("ASUS") ||
- !acpi_has_method(boot_ec->handle, "_INI"))
- return -ENODEV;
- }
-install:
- if (!ec_install_handlers(boot_ec)) {
+ boot_ec->gpe = ecdt_ptr->gpe;
+ boot_ec->handle = ACPI_ROOT_OBJECT;
+ ret = ec_install_handlers(boot_ec);
+ if (!ret)
first_ec = boot_ec;
- return 0;
- }
error:
- kfree(boot_ec);
- kfree(saved_ec);
- boot_ec = NULL;
- return -ENODEV;
+ if (ret) {
+ kfree(boot_ec);
+ boot_ec = NULL;
+ }
+ return ret;
}
static int param_set_event_clearing(const char *val, struct kernel_param *kp)
diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c
new file mode 100644
index 0000000..46f0603
--- /dev/null
+++ b/drivers/acpi/evged.c
@@ -0,0 +1,154 @@
+/*
+ * Generic Event Device for ACPI.
+ *
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Generic Event Device allows platforms to handle interrupts in ACPI
+ * ASL statements. It follows very similar to _EVT method approach
+ * from GPIO events. All interrupts are listed in _CRS and the handler
+ * is written in _EVT method. Here is an example.
+ *
+ * Device (GED0)
+ * {
+ *
+ * Name (_HID, "ACPI0013")
+ * Name (_UID, 0)
+ * Method (_CRS, 0x0, Serialized)
+ * {
+ * Name (RBUF, ResourceTemplate ()
+ * {
+ * Interrupt(ResourceConsumer, Edge, ActiveHigh, Shared, , , )
+ * {123}
+ * }
+ * })
+ *
+ * Method (_EVT, 1) {
+ * if (Lequal(123, Arg0))
+ * {
+ * }
+ * }
+ * }
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+#define MODULE_NAME "acpi-ged"
+
+struct acpi_ged_event {
+ struct list_head node;
+ struct device *dev;
+ unsigned int gsi;
+ unsigned int irq;
+ acpi_handle handle;
+};
+
+static irqreturn_t acpi_ged_irq_handler(int irq, void *data)
+{
+ struct acpi_ged_event *event = data;
+ acpi_status acpi_ret;
+
+ acpi_ret = acpi_execute_simple_method(event->handle, NULL, event->gsi);
+ if (ACPI_FAILURE(acpi_ret))
+ dev_err_once(event->dev, "IRQ method execution failed\n");
+
+ return IRQ_HANDLED;
+}
+
+static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares,
+ void *context)
+{
+ struct acpi_ged_event *event;
+ unsigned int irq;
+ unsigned int gsi;
+ unsigned int irqflags = IRQF_ONESHOT;
+ struct device *dev = context;
+ acpi_handle handle = ACPI_HANDLE(dev);
+ acpi_handle evt_handle;
+ struct resource r;
+ struct acpi_resource_irq *p = &ares->data.irq;
+ struct acpi_resource_extended_irq *pext = &ares->data.extended_irq;
+
+ if (ares->type == ACPI_RESOURCE_TYPE_END_TAG)
+ return AE_OK;
+
+ if (!acpi_dev_resource_interrupt(ares, 0, &r)) {
+ dev_err(dev, "unable to parse IRQ resource\n");
+ return AE_ERROR;
+ }
+ if (ares->type == ACPI_RESOURCE_TYPE_IRQ)
+ gsi = p->interrupts[0];
+ else
+ gsi = pext->interrupts[0];
+
+ irq = r.start;
+
+ if (ACPI_FAILURE(acpi_get_handle(handle, "_EVT", &evt_handle))) {
+ dev_err(dev, "cannot locate _EVT method\n");
+ return AE_ERROR;
+ }
+
+ dev_info(dev, "GED listening GSI %u @ IRQ %u\n", gsi, irq);
+
+ event = devm_kzalloc(dev, sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return AE_ERROR;
+
+ event->gsi = gsi;
+ event->dev = dev;
+ event->irq = irq;
+ event->handle = evt_handle;
+
+ if (r.flags & IORESOURCE_IRQ_SHAREABLE)
+ irqflags |= IRQF_SHARED;
+
+ if (devm_request_threaded_irq(dev, irq, NULL, acpi_ged_irq_handler,
+ irqflags, "ACPI:Ged", event)) {
+ dev_err(dev, "failed to setup event handler for irq %u\n", irq);
+ return AE_ERROR;
+ }
+
+ return AE_OK;
+}
+
+static int ged_probe(struct platform_device *pdev)
+{
+ acpi_status acpi_ret;
+
+ acpi_ret = acpi_walk_resources(ACPI_HANDLE(&pdev->dev), "_CRS",
+ acpi_ged_request_interrupt, &pdev->dev);
+ if (ACPI_FAILURE(acpi_ret)) {
+ dev_err(&pdev->dev, "unable to parse the _CRS record\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct acpi_device_id ged_acpi_ids[] = {
+ {"ACPI0013"},
+ {},
+};
+
+static struct platform_driver ged_driver = {
+ .probe = ged_probe,
+ .driver = {
+ .name = MODULE_NAME,
+ .acpi_match_table = ACPI_PTR(ged_acpi_ids),
+ },
+};
+builtin_platform_driver(ged_driver);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 7c18847..9bb0773 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -20,7 +20,8 @@
#define PREFIX "ACPI: "
-void acpi_initrd_initialize_tables(void);
+int early_acpi_osi_init(void);
+int acpi_osi_init(void);
acpi_status acpi_os_initialize1(void);
void init_acpi_device_notify(void);
int acpi_scan_init(void);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 72b6e9e..d176e0e 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -327,10 +327,18 @@
/* SRAT: Static Resource Affinity Table */
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
- acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
- acpi_parse_x2apic_affinity, 0);
- acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
- acpi_parse_processor_affinity, 0);
+ struct acpi_subtable_proc srat_proc[2];
+
+ memset(srat_proc, 0, sizeof(srat_proc));
+ srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_proc[0].handler = acpi_parse_processor_affinity;
+ srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
+ srat_proc[1].handler = acpi_parse_x2apic_affinity;
+
+ acpi_table_parse_entries_array(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ srat_proc, ARRAY_SIZE(srat_proc), 0);
+
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
diff --git a/drivers/acpi/osi.c b/drivers/acpi/osi.c
new file mode 100644
index 0000000..849f9d2
--- /dev/null
+++ b/drivers/acpi/osi.c
@@ -0,0 +1,522 @@
+/*
+ * osi.c - _OSI implementation
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Lv Zheng <lv.zheng@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+/* Uncomment next line to get verbose printout */
+/* #define DEBUG */
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+
+#include "internal.h"
+
+
+#define OSI_STRING_LENGTH_MAX 64
+#define OSI_STRING_ENTRIES_MAX 16
+
+struct acpi_osi_entry {
+ char string[OSI_STRING_LENGTH_MAX];
+ bool enable;
+};
+
+static struct acpi_osi_config {
+ u8 default_disabling;
+ unsigned int linux_enable:1;
+ unsigned int linux_dmi:1;
+ unsigned int linux_cmdline:1;
+ unsigned int darwin_enable:1;
+ unsigned int darwin_dmi:1;
+ unsigned int darwin_cmdline:1;
+} osi_config;
+
+static struct acpi_osi_config osi_config;
+static struct acpi_osi_entry
+osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = {
+ {"Module Device", true},
+ {"Processor Device", true},
+ {"3.0 _SCP Extensions", true},
+ {"Processor Aggregator Device", true},
+};
+
+static u32 acpi_osi_handler(acpi_string interface, u32 supported)
+{
+ if (!strcmp("Linux", interface)) {
+ pr_notice_once(FW_BUG
+ "BIOS _OSI(Linux) query %s%s\n",
+ osi_config.linux_enable ? "honored" : "ignored",
+ osi_config.linux_cmdline ? " via cmdline" :
+ osi_config.linux_dmi ? " via DMI" : "");
+ }
+ if (!strcmp("Darwin", interface)) {
+ pr_notice_once(
+ "BIOS _OSI(Darwin) query %s%s\n",
+ osi_config.darwin_enable ? "honored" : "ignored",
+ osi_config.darwin_cmdline ? " via cmdline" :
+ osi_config.darwin_dmi ? " via DMI" : "");
+ }
+
+ return supported;
+}
+
+void __init acpi_osi_setup(char *str)
+{
+ struct acpi_osi_entry *osi;
+ bool enable = true;
+ int i;
+
+ if (!acpi_gbl_create_osi_method)
+ return;
+
+ if (str == NULL || *str == '\0') {
+ pr_info("_OSI method disabled\n");
+ acpi_gbl_create_osi_method = FALSE;
+ return;
+ }
+
+ if (*str == '!') {
+ str++;
+ if (*str == '\0') {
+ /* Do not override acpi_osi=!* */
+ if (!osi_config.default_disabling)
+ osi_config.default_disabling =
+ ACPI_DISABLE_ALL_VENDOR_STRINGS;
+ return;
+ } else if (*str == '*') {
+ osi_config.default_disabling = ACPI_DISABLE_ALL_STRINGS;
+ for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+ osi = &osi_setup_entries[i];
+ osi->enable = false;
+ }
+ return;
+ } else if (*str == '!') {
+ osi_config.default_disabling = 0;
+ return;
+ }
+ enable = false;
+ }
+
+ for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+ osi = &osi_setup_entries[i];
+ if (!strcmp(osi->string, str)) {
+ osi->enable = enable;
+ break;
+ } else if (osi->string[0] == '\0') {
+ osi->enable = enable;
+ strncpy(osi->string, str, OSI_STRING_LENGTH_MAX);
+ break;
+ }
+ }
+}
+
+static void __init __acpi_osi_setup_darwin(bool enable)
+{
+ osi_config.darwin_enable = !!enable;
+ if (enable) {
+ acpi_osi_setup("!");
+ acpi_osi_setup("Darwin");
+ } else {
+ acpi_osi_setup("!!");
+ acpi_osi_setup("!Darwin");
+ }
+}
+
+static void __init acpi_osi_setup_darwin(bool enable)
+{
+ /* Override acpi_osi_dmi_blacklisted() */
+ osi_config.darwin_dmi = 0;
+ osi_config.darwin_cmdline = 1;
+ __acpi_osi_setup_darwin(enable);
+}
+
+/*
+ * The story of _OSI(Linux)
+ *
+ * From pre-history through Linux-2.6.22, Linux responded TRUE upon a BIOS
+ * OSI(Linux) query.
+ *
+ * Unfortunately, reference BIOS writers got wind of this and put
+ * OSI(Linux) in their example code, quickly exposing this string as
+ * ill-conceived and opening the door to an un-bounded number of BIOS
+ * incompatibilities.
+ *
+ * For example, OSI(Linux) was used on resume to re-POST a video card on
+ * one system, because Linux at that time could not do a speedy restore in
+ * its native driver. But then upon gaining quick native restore
+ * capability, Linux has no way to tell the BIOS to skip the time-consuming
+ * POST -- putting Linux at a permanent performance disadvantage. On
+ * another system, the BIOS writer used OSI(Linux) to infer native OS
+ * support for IPMI! On other systems, OSI(Linux) simply got in the way of
+ * Linux claiming to be compatible with other operating systems, exposing
+ * BIOS issues such as skipped device initialization.
+ *
+ * So "Linux" turned out to be a really poor chose of OSI string, and from
+ * Linux-2.6.23 onward we respond FALSE.
+ *
+ * BIOS writers should NOT query _OSI(Linux) on future systems. Linux will
+ * complain on the console when it sees it, and return FALSE. To get Linux
+ * to return TRUE for your system will require a kernel source update to
+ * add a DMI entry, or boot with "acpi_osi=Linux"
+ */
+static void __init __acpi_osi_setup_linux(bool enable)
+{
+ osi_config.linux_enable = !!enable;
+ if (enable)
+ acpi_osi_setup("Linux");
+ else
+ acpi_osi_setup("!Linux");
+}
+
+static void __init acpi_osi_setup_linux(bool enable)
+{
+ /* Override acpi_osi_dmi_blacklisted() */
+ osi_config.linux_dmi = 0;
+ osi_config.linux_cmdline = 1;
+ __acpi_osi_setup_linux(enable);
+}
+
+/*
+ * Modify the list of "OS Interfaces" reported to BIOS via _OSI
+ *
+ * empty string disables _OSI
+ * string starting with '!' disables that string
+ * otherwise string is added to list, augmenting built-in strings
+ */
+static void __init acpi_osi_setup_late(void)
+{
+ struct acpi_osi_entry *osi;
+ char *str;
+ int i;
+ acpi_status status;
+
+ if (osi_config.default_disabling) {
+ status = acpi_update_interfaces(osi_config.default_disabling);
+ if (ACPI_SUCCESS(status))
+ pr_info("Disabled all _OSI OS vendors%s\n",
+ osi_config.default_disabling ==
+ ACPI_DISABLE_ALL_STRINGS ?
+ " and feature groups" : "");
+ }
+
+ for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
+ osi = &osi_setup_entries[i];
+ str = osi->string;
+ if (*str == '\0')
+ break;
+ if (osi->enable) {
+ status = acpi_install_interface(str);
+ if (ACPI_SUCCESS(status))
+ pr_info("Added _OSI(%s)\n", str);
+ } else {
+ status = acpi_remove_interface(str);
+ if (ACPI_SUCCESS(status))
+ pr_info("Deleted _OSI(%s)\n", str);
+ }
+ }
+}
+
+static int __init osi_setup(char *str)
+{
+ if (str && !strcmp("Linux", str))
+ acpi_osi_setup_linux(true);
+ else if (str && !strcmp("!Linux", str))
+ acpi_osi_setup_linux(false);
+ else if (str && !strcmp("Darwin", str))
+ acpi_osi_setup_darwin(true);
+ else if (str && !strcmp("!Darwin", str))
+ acpi_osi_setup_darwin(false);
+ else
+ acpi_osi_setup(str);
+
+ return 1;
+}
+__setup("acpi_osi=", osi_setup);
+
+bool acpi_osi_is_win8(void)
+{
+ return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
+}
+EXPORT_SYMBOL(acpi_osi_is_win8);
+
+static void __init acpi_osi_dmi_darwin(bool enable,
+ const struct dmi_system_id *d)
+{
+ pr_notice("DMI detected to setup _OSI(\"Darwin\"): %s\n", d->ident);
+ osi_config.darwin_dmi = 1;
+ __acpi_osi_setup_darwin(enable);
+}
+
+void __init acpi_osi_dmi_linux(bool enable, const struct dmi_system_id *d)
+{
+ pr_notice("DMI detected to setup _OSI(\"Linux\"): %s\n", d->ident);
+ osi_config.linux_dmi = 1;
+ __acpi_osi_setup_linux(enable);
+}
+
+static int __init dmi_enable_osi_darwin(const struct dmi_system_id *d)
+{
+ acpi_osi_dmi_darwin(true, d);
+
+ return 0;
+}
+
+static int __init dmi_enable_osi_linux(const struct dmi_system_id *d)
+{
+ acpi_osi_dmi_linux(true, d);
+
+ return 0;
+}
+
+static int __init dmi_disable_osi_vista(const struct dmi_system_id *d)
+{
+ pr_notice("DMI detected: %s\n", d->ident);
+ acpi_osi_setup("!Windows 2006");
+ acpi_osi_setup("!Windows 2006 SP1");
+ acpi_osi_setup("!Windows 2006 SP2");
+
+ return 0;
+}
+
+static int __init dmi_disable_osi_win7(const struct dmi_system_id *d)
+{
+ pr_notice("DMI detected: %s\n", d->ident);
+ acpi_osi_setup("!Windows 2009");
+
+ return 0;
+}
+
+static int __init dmi_disable_osi_win8(const struct dmi_system_id *d)
+{
+ pr_notice("DMI detected: %s\n", d->ident);
+ acpi_osi_setup("!Windows 2012");
+
+ return 0;
+}
+
+/*
+ * Linux default _OSI response behavior is determined by this DMI table.
+ *
+ * Note that _OSI("Linux")/_OSI("Darwin") determined here can be overridden
+ * by acpi_osi=!Linux/acpi_osi=!Darwin command line options.
+ */
+static struct dmi_system_id acpi_osi_dmi_table[] __initdata = {
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "Fujitsu Siemens",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile V5505"),
+ },
+ },
+ {
+ /*
+ * There have a NVIF method in MSI GX723 DSDT need call by Nvidia
+ * driver (e.g. nouveau) when user press brightness hotkey.
+ * Currently, nouveau driver didn't do the job and it causes there
+ * have a infinite while loop in DSDT when user press hotkey.
+ * We add MSI GX723's dmi information to this table for workaround
+ * this issue.
+ * Will remove MSI GX723 from the table after nouveau grows support.
+ */
+ .callback = dmi_disable_osi_vista,
+ .ident = "MSI GX723",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star International"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "GX723"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "Sony VGN-NS10J_S",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS10J_S"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "Sony VGN-SR290J",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR290J"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "VGN-NS50B_L",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-NS50B_L"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "VGN-SR19XN",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR19XN"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "Toshiba Satellite L355",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Satellite L355"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_win7,
+ .ident = "ASUS K50IJ",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "Toshiba P305D",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_vista,
+ .ident = "Toshiba NB100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "NB100"),
+ },
+ },
+
+ /*
+ * The wireless hotkey does not work on those machines when
+ * returning true for _OSI("Windows 2012")
+ */
+ {
+ .callback = dmi_disable_osi_win8,
+ .ident = "Dell Inspiron 7737",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7737"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_win8,
+ .ident = "Dell Inspiron 7537",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7537"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_win8,
+ .ident = "Dell Inspiron 5437",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5437"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_win8,
+ .ident = "Dell Inspiron 3437",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 3437"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_win8,
+ .ident = "Dell Vostro 3446",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3446"),
+ },
+ },
+ {
+ .callback = dmi_disable_osi_win8,
+ .ident = "Dell Vostro 3546",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3546"),
+ },
+ },
+
+ /*
+ * BIOS invocation of _OSI(Linux) is almost always a BIOS bug.
+ * Linux ignores it, except for the machines enumerated below.
+ */
+
+ /*
+ * Without this this EEEpc exports a non working WMI interface, with
+ * this it exports a working "good old" eeepc_laptop interface, fixing
+ * both brightness control, and rfkill not working.
+ */
+ {
+ .callback = dmi_enable_osi_linux,
+ .ident = "Asus EEE PC 1015PX",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "1015PX"),
+ },
+ },
+
+ /*
+ * Enable _OSI("Darwin") for all apple platforms.
+ */
+ {
+ .callback = dmi_enable_osi_darwin,
+ .ident = "Apple hardware",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ },
+ },
+ {
+ .callback = dmi_enable_osi_darwin,
+ .ident = "Apple hardware",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."),
+ },
+ },
+ {}
+};
+
+static __init void acpi_osi_dmi_blacklisted(void)
+{
+ dmi_check_system(acpi_osi_dmi_table);
+}
+
+int __init early_acpi_osi_init(void)
+{
+ acpi_osi_dmi_blacklisted();
+
+ return 0;
+}
+
+int __init acpi_osi_init(void)
+{
+ acpi_install_interface_handler(acpi_osi_handler);
+ acpi_osi_setup_late();
+
+ return 0;
+}
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 814d5f8..b108f13 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -56,10 +56,6 @@
struct work_struct work;
};
-#ifdef CONFIG_ACPI_CUSTOM_DSDT
-#include CONFIG_ACPI_CUSTOM_DSDT_FILE
-#endif
-
#ifdef ENABLE_DEBUGGER
#include <linux/kdb.h>
@@ -96,72 +92,6 @@
static LIST_HEAD(acpi_ioremaps);
static DEFINE_MUTEX(acpi_ioremap_lock);
-static void __init acpi_osi_setup_late(void);
-
-/*
- * The story of _OSI(Linux)
- *
- * From pre-history through Linux-2.6.22,
- * Linux responded TRUE upon a BIOS OSI(Linux) query.
- *
- * Unfortunately, reference BIOS writers got wind of this
- * and put OSI(Linux) in their example code, quickly exposing
- * this string as ill-conceived and opening the door to
- * an un-bounded number of BIOS incompatibilities.
- *
- * For example, OSI(Linux) was used on resume to re-POST a
- * video card on one system, because Linux at that time
- * could not do a speedy restore in its native driver.
- * But then upon gaining quick native restore capability,
- * Linux has no way to tell the BIOS to skip the time-consuming
- * POST -- putting Linux at a permanent performance disadvantage.
- * On another system, the BIOS writer used OSI(Linux)
- * to infer native OS support for IPMI! On other systems,
- * OSI(Linux) simply got in the way of Linux claiming to
- * be compatible with other operating systems, exposing
- * BIOS issues such as skipped device initialization.
- *
- * So "Linux" turned out to be a really poor chose of
- * OSI string, and from Linux-2.6.23 onward we respond FALSE.
- *
- * BIOS writers should NOT query _OSI(Linux) on future systems.
- * Linux will complain on the console when it sees it, and return FALSE.
- * To get Linux to return TRUE for your system will require
- * a kernel source update to add a DMI entry,
- * or boot with "acpi_osi=Linux"
- */
-
-static struct osi_linux {
- unsigned int enable:1;
- unsigned int dmi:1;
- unsigned int cmdline:1;
- unsigned int default_disabling:1;
-} osi_linux = {0, 0, 0, 0};
-
-static u32 acpi_osi_handler(acpi_string interface, u32 supported)
-{
- if (!strcmp("Linux", interface)) {
-
- printk_once(KERN_NOTICE FW_BUG PREFIX
- "BIOS _OSI(Linux) query %s%s\n",
- osi_linux.enable ? "honored" : "ignored",
- osi_linux.cmdline ? " via cmdline" :
- osi_linux.dmi ? " via DMI" : "");
- }
-
- if (!strcmp("Darwin", interface)) {
- /*
- * Apple firmware will behave poorly if it receives positive
- * answers to "Darwin" and any other OS. Respond positively
- * to Darwin and then disable all other vendor strings.
- */
- acpi_update_interfaces(ACPI_DISABLE_ALL_VENDOR_STRINGS);
- supported = ACPI_UINT32_MAX;
- }
-
- return supported;
-}
-
static void __init acpi_request_region (struct acpi_generic_address *gas,
unsigned int length, char *desc)
{
@@ -582,7 +512,7 @@
acpi_status
acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
- char **new_val)
+ acpi_string *new_val)
{
if (!init_val || !new_val)
return AE_BAD_PARAMETER;
@@ -602,280 +532,6 @@
return AE_OK;
}
-static void acpi_table_taint(struct acpi_table_header *table)
-{
- pr_warn(PREFIX
- "Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
- table->signature, table->oem_table_id);
- add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
-}
-
-#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
-#include <linux/earlycpio.h>
-#include <linux/memblock.h>
-
-static u64 acpi_tables_addr;
-static int all_tables_size;
-
-/* Copied from acpica/tbutils.c:acpi_tb_checksum() */
-static u8 __init acpi_table_checksum(u8 *buffer, u32 length)
-{
- u8 sum = 0;
- u8 *end = buffer + length;
-
- while (buffer < end)
- sum = (u8) (sum + *(buffer++));
- return sum;
-}
-
-/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
-static const char * const table_sigs[] = {
- ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ,
- ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT,
- ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF,
- ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET,
- ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI,
- ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA,
- ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
- ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
- ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL };
-
-#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
-
-#define ACPI_OVERRIDE_TABLES 64
-static struct cpio_data __initdata acpi_initrd_files[ACPI_OVERRIDE_TABLES];
-static DECLARE_BITMAP(acpi_initrd_installed, ACPI_OVERRIDE_TABLES);
-
-#define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT)
-
-void __init acpi_initrd_override(void *data, size_t size)
-{
- int sig, no, table_nr = 0, total_offset = 0;
- long offset = 0;
- struct acpi_table_header *table;
- char cpio_path[32] = "kernel/firmware/acpi/";
- struct cpio_data file;
-
- if (data == NULL || size == 0)
- return;
-
- for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) {
- file = find_cpio_data(cpio_path, data, size, &offset);
- if (!file.data)
- break;
-
- data += offset;
- size -= offset;
-
- if (file.size < sizeof(struct acpi_table_header)) {
- pr_err("ACPI OVERRIDE: Table smaller than ACPI header [%s%s]\n",
- cpio_path, file.name);
- continue;
- }
-
- table = file.data;
-
- for (sig = 0; table_sigs[sig]; sig++)
- if (!memcmp(table->signature, table_sigs[sig], 4))
- break;
-
- if (!table_sigs[sig]) {
- pr_err("ACPI OVERRIDE: Unknown signature [%s%s]\n",
- cpio_path, file.name);
- continue;
- }
- if (file.size != table->length) {
- pr_err("ACPI OVERRIDE: File length does not match table length [%s%s]\n",
- cpio_path, file.name);
- continue;
- }
- if (acpi_table_checksum(file.data, table->length)) {
- pr_err("ACPI OVERRIDE: Bad table checksum [%s%s]\n",
- cpio_path, file.name);
- continue;
- }
-
- pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n",
- table->signature, cpio_path, file.name, table->length);
-
- all_tables_size += table->length;
- acpi_initrd_files[table_nr].data = file.data;
- acpi_initrd_files[table_nr].size = file.size;
- table_nr++;
- }
- if (table_nr == 0)
- return;
-
- acpi_tables_addr =
- memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
- all_tables_size, PAGE_SIZE);
- if (!acpi_tables_addr) {
- WARN_ON(1);
- return;
- }
- /*
- * Only calling e820_add_reserve does not work and the
- * tables are invalid (memory got used) later.
- * memblock_reserve works as expected and the tables won't get modified.
- * But it's not enough on X86 because ioremap will
- * complain later (used by acpi_os_map_memory) that the pages
- * that should get mapped are not marked "reserved".
- * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area)
- * works fine.
- */
- memblock_reserve(acpi_tables_addr, all_tables_size);
- arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
-
- /*
- * early_ioremap only can remap 256k one time. If we map all
- * tables one time, we will hit the limit. Need to map chunks
- * one by one during copying the same as that in relocate_initrd().
- */
- for (no = 0; no < table_nr; no++) {
- unsigned char *src_p = acpi_initrd_files[no].data;
- phys_addr_t size = acpi_initrd_files[no].size;
- phys_addr_t dest_addr = acpi_tables_addr + total_offset;
- phys_addr_t slop, clen;
- char *dest_p;
-
- total_offset += size;
-
- while (size) {
- slop = dest_addr & ~PAGE_MASK;
- clen = size;
- if (clen > MAP_CHUNK_SIZE - slop)
- clen = MAP_CHUNK_SIZE - slop;
- dest_p = early_ioremap(dest_addr & PAGE_MASK,
- clen + slop);
- memcpy(dest_p + slop, src_p, clen);
- early_iounmap(dest_p, clen + slop);
- src_p += clen;
- dest_addr += clen;
- size -= clen;
- }
- }
-}
-
-acpi_status
-acpi_os_physical_table_override(struct acpi_table_header *existing_table,
- acpi_physical_address *address, u32 *length)
-{
- int table_offset = 0;
- int table_index = 0;
- struct acpi_table_header *table;
- u32 table_length;
-
- *length = 0;
- *address = 0;
- if (!acpi_tables_addr)
- return AE_OK;
-
- while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
- table = acpi_os_map_memory(acpi_tables_addr + table_offset,
- ACPI_HEADER_SIZE);
- if (table_offset + table->length > all_tables_size) {
- acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- WARN_ON(1);
- return AE_OK;
- }
-
- table_length = table->length;
-
- /* Only override tables matched */
- if (test_bit(table_index, acpi_initrd_installed) ||
- memcmp(existing_table->signature, table->signature, 4) ||
- memcmp(table->oem_table_id, existing_table->oem_table_id,
- ACPI_OEM_TABLE_ID_SIZE)) {
- acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- goto next_table;
- }
-
- *length = table_length;
- *address = acpi_tables_addr + table_offset;
- acpi_table_taint(existing_table);
- acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- set_bit(table_index, acpi_initrd_installed);
- break;
-
-next_table:
- table_offset += table_length;
- table_index++;
- }
- return AE_OK;
-}
-
-void __init acpi_initrd_initialize_tables(void)
-{
- int table_offset = 0;
- int table_index = 0;
- u32 table_length;
- struct acpi_table_header *table;
-
- if (!acpi_tables_addr)
- return;
-
- while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
- table = acpi_os_map_memory(acpi_tables_addr + table_offset,
- ACPI_HEADER_SIZE);
- if (table_offset + table->length > all_tables_size) {
- acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- WARN_ON(1);
- return;
- }
-
- table_length = table->length;
-
- /* Skip RSDT/XSDT which should only be used for override */
- if (test_bit(table_index, acpi_initrd_installed) ||
- ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) ||
- ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) {
- acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- goto next_table;
- }
-
- acpi_table_taint(table);
- acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
- acpi_install_table(acpi_tables_addr + table_offset, TRUE);
- set_bit(table_index, acpi_initrd_installed);
-next_table:
- table_offset += table_length;
- table_index++;
- }
-}
-#else
-acpi_status
-acpi_os_physical_table_override(struct acpi_table_header *existing_table,
- acpi_physical_address *address,
- u32 *table_length)
-{
- *table_length = 0;
- *address = 0;
- return AE_OK;
-}
-
-void __init acpi_initrd_initialize_tables(void)
-{
-}
-#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */
-
-acpi_status
-acpi_os_table_override(struct acpi_table_header *existing_table,
- struct acpi_table_header **new_table)
-{
- if (!existing_table || !new_table)
- return AE_BAD_PARAMETER;
-
- *new_table = NULL;
-
-#ifdef CONFIG_ACPI_CUSTOM_DSDT
- if (strncmp(existing_table->signature, "DSDT", 4) == 0)
- *new_table = (struct acpi_table_header *)AmlCode;
-#endif
- if (*new_table != NULL)
- acpi_table_taint(existing_table);
- return AE_OK;
-}
-
static irqreturn_t acpi_irq(int irq, void *dev_id)
{
u32 handled;
@@ -1717,156 +1373,6 @@
__setup("acpi_os_name=", acpi_os_name_setup);
-#define OSI_STRING_LENGTH_MAX 64 /* arbitrary */
-#define OSI_STRING_ENTRIES_MAX 16 /* arbitrary */
-
-struct osi_setup_entry {
- char string[OSI_STRING_LENGTH_MAX];
- bool enable;
-};
-
-static struct osi_setup_entry
- osi_setup_entries[OSI_STRING_ENTRIES_MAX] __initdata = {
- {"Module Device", true},
- {"Processor Device", true},
- {"3.0 _SCP Extensions", true},
- {"Processor Aggregator Device", true},
-};
-
-void __init acpi_osi_setup(char *str)
-{
- struct osi_setup_entry *osi;
- bool enable = true;
- int i;
-
- if (!acpi_gbl_create_osi_method)
- return;
-
- if (str == NULL || *str == '\0') {
- printk(KERN_INFO PREFIX "_OSI method disabled\n");
- acpi_gbl_create_osi_method = FALSE;
- return;
- }
-
- if (*str == '!') {
- str++;
- if (*str == '\0') {
- osi_linux.default_disabling = 1;
- return;
- } else if (*str == '*') {
- acpi_update_interfaces(ACPI_DISABLE_ALL_STRINGS);
- for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
- osi = &osi_setup_entries[i];
- osi->enable = false;
- }
- return;
- }
- enable = false;
- }
-
- for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
- osi = &osi_setup_entries[i];
- if (!strcmp(osi->string, str)) {
- osi->enable = enable;
- break;
- } else if (osi->string[0] == '\0') {
- osi->enable = enable;
- strncpy(osi->string, str, OSI_STRING_LENGTH_MAX);
- break;
- }
- }
-}
-
-static void __init set_osi_linux(unsigned int enable)
-{
- if (osi_linux.enable != enable)
- osi_linux.enable = enable;
-
- if (osi_linux.enable)
- acpi_osi_setup("Linux");
- else
- acpi_osi_setup("!Linux");
-
- return;
-}
-
-static void __init acpi_cmdline_osi_linux(unsigned int enable)
-{
- osi_linux.cmdline = 1; /* cmdline set the default and override DMI */
- osi_linux.dmi = 0;
- set_osi_linux(enable);
-
- return;
-}
-
-void __init acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident);
-
- if (enable == -1)
- return;
-
- osi_linux.dmi = 1; /* DMI knows that this box asks OSI(Linux) */
- set_osi_linux(enable);
-
- return;
-}
-
-/*
- * Modify the list of "OS Interfaces" reported to BIOS via _OSI
- *
- * empty string disables _OSI
- * string starting with '!' disables that string
- * otherwise string is added to list, augmenting built-in strings
- */
-static void __init acpi_osi_setup_late(void)
-{
- struct osi_setup_entry *osi;
- char *str;
- int i;
- acpi_status status;
-
- if (osi_linux.default_disabling) {
- status = acpi_update_interfaces(ACPI_DISABLE_ALL_VENDOR_STRINGS);
-
- if (ACPI_SUCCESS(status))
- printk(KERN_INFO PREFIX "Disabled all _OSI OS vendors\n");
- }
-
- for (i = 0; i < OSI_STRING_ENTRIES_MAX; i++) {
- osi = &osi_setup_entries[i];
- str = osi->string;
-
- if (*str == '\0')
- break;
- if (osi->enable) {
- status = acpi_install_interface(str);
-
- if (ACPI_SUCCESS(status))
- printk(KERN_INFO PREFIX "Added _OSI(%s)\n", str);
- } else {
- status = acpi_remove_interface(str);
-
- if (ACPI_SUCCESS(status))
- printk(KERN_INFO PREFIX "Deleted _OSI(%s)\n", str);
- }
- }
-}
-
-static int __init osi_setup(char *str)
-{
- if (str && !strcmp("Linux", str))
- acpi_cmdline_osi_linux(1);
- else if (str && !strcmp("!Linux", str))
- acpi_cmdline_osi_linux(0);
- else
- acpi_osi_setup(str);
-
- return 1;
-}
-
-__setup("acpi_osi=", osi_setup);
-
/*
* Disable the auto-serialization of named objects creation methods.
*
@@ -1986,12 +1492,6 @@
}
EXPORT_SYMBOL(acpi_resources_are_enforced);
-bool acpi_osi_is_win8(void)
-{
- return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
-}
-EXPORT_SYMBOL(acpi_osi_is_win8);
-
/*
* Deallocate the memory for a spinlock.
*/
@@ -2157,8 +1657,7 @@
BUG_ON(!kacpid_wq);
BUG_ON(!kacpi_notify_wq);
BUG_ON(!kacpi_hotplug_wq);
- acpi_install_interface_handler(acpi_osi_handler);
- acpi_osi_setup_late();
+ acpi_osi_init();
return AE_OK;
}
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index ededa90..8fc7323 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -36,6 +36,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/acpi.h>
+#include <linux/irq.h>
#include "internal.h"
@@ -437,17 +438,15 @@
* enabled system.
*/
-#define ACPI_MAX_IRQS 256
-#define ACPI_MAX_ISA_IRQ 16
+#define ACPI_MAX_ISA_IRQS 16
-#define PIRQ_PENALTY_PCI_AVAILABLE (0)
#define PIRQ_PENALTY_PCI_POSSIBLE (16*16)
#define PIRQ_PENALTY_PCI_USING (16*16*16)
#define PIRQ_PENALTY_ISA_TYPICAL (16*16*16*16)
#define PIRQ_PENALTY_ISA_USED (16*16*16*16*16)
#define PIRQ_PENALTY_ISA_ALWAYS (16*16*16*16*16*16)
-static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
+static int acpi_isa_irq_penalty[ACPI_MAX_ISA_IRQS] = {
PIRQ_PENALTY_ISA_ALWAYS, /* IRQ0 timer */
PIRQ_PENALTY_ISA_ALWAYS, /* IRQ1 keyboard */
PIRQ_PENALTY_ISA_ALWAYS, /* IRQ2 cascade */
@@ -457,9 +456,9 @@
PIRQ_PENALTY_ISA_TYPICAL, /* IRQ6 */
PIRQ_PENALTY_ISA_TYPICAL, /* IRQ7 parallel, spurious */
PIRQ_PENALTY_ISA_TYPICAL, /* IRQ8 rtc, sometimes */
- PIRQ_PENALTY_PCI_AVAILABLE, /* IRQ9 PCI, often acpi */
- PIRQ_PENALTY_PCI_AVAILABLE, /* IRQ10 PCI */
- PIRQ_PENALTY_PCI_AVAILABLE, /* IRQ11 PCI */
+ 0, /* IRQ9 PCI, often acpi */
+ 0, /* IRQ10 PCI */
+ 0, /* IRQ11 PCI */
PIRQ_PENALTY_ISA_USED, /* IRQ12 mouse */
PIRQ_PENALTY_ISA_USED, /* IRQ13 fpe, sometimes */
PIRQ_PENALTY_ISA_USED, /* IRQ14 ide0 */
@@ -467,39 +466,58 @@
/* >IRQ15 */
};
-int __init acpi_irq_penalty_init(void)
+static int acpi_irq_pci_sharing_penalty(int irq)
{
struct acpi_pci_link *link;
- int i;
+ int penalty = 0;
- /*
- * Update penalties to facilitate IRQ balancing.
- */
list_for_each_entry(link, &acpi_link_list, list) {
-
/*
- * reflect the possible and active irqs in the penalty table --
- * useful for breaking ties.
+ * If a link is active, penalize its IRQ heavily
+ * so we try to choose a different IRQ.
*/
- if (link->irq.possible_count) {
- int penalty =
- PIRQ_PENALTY_PCI_POSSIBLE /
- link->irq.possible_count;
+ if (link->irq.active && link->irq.active == irq)
+ penalty += PIRQ_PENALTY_PCI_USING;
+ else {
+ int i;
- for (i = 0; i < link->irq.possible_count; i++) {
- if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
- acpi_irq_penalty[link->irq.
- possible[i]] +=
- penalty;
- }
-
- } else if (link->irq.active) {
- acpi_irq_penalty[link->irq.active] +=
- PIRQ_PENALTY_PCI_POSSIBLE;
+ /*
+ * If a link is inactive, penalize the IRQs it
+ * might use, but not as severely.
+ */
+ for (i = 0; i < link->irq.possible_count; i++)
+ if (link->irq.possible[i] == irq)
+ penalty += PIRQ_PENALTY_PCI_POSSIBLE /
+ link->irq.possible_count;
}
}
- return 0;
+ return penalty;
+}
+
+static int acpi_irq_get_penalty(int irq)
+{
+ int penalty = 0;
+
+ if (irq < ACPI_MAX_ISA_IRQS)
+ penalty += acpi_isa_irq_penalty[irq];
+
+ /*
+ * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
+ * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
+ * use for PCI IRQs.
+ */
+ if (irq == acpi_gbl_FADT.sci_interrupt) {
+ u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK;
+
+ if (type != IRQ_TYPE_LEVEL_LOW)
+ penalty += PIRQ_PENALTY_ISA_ALWAYS;
+ else
+ penalty += PIRQ_PENALTY_PCI_USING;
+ }
+
+ penalty += acpi_irq_pci_sharing_penalty(irq);
+ return penalty;
}
static int acpi_irq_balance = -1; /* 0: static, 1: balance */
@@ -547,12 +565,12 @@
* the use of IRQs 9, 10, 11, and >15.
*/
for (i = (link->irq.possible_count - 1); i >= 0; i--) {
- if (acpi_irq_penalty[irq] >
- acpi_irq_penalty[link->irq.possible[i]])
+ if (acpi_irq_get_penalty(irq) >
+ acpi_irq_get_penalty(link->irq.possible[i]))
irq = link->irq.possible[i];
}
}
- if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
+ if (acpi_irq_get_penalty(irq) >= PIRQ_PENALTY_ISA_ALWAYS) {
printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
"Try pci=noacpi or acpi=off\n",
acpi_device_name(link->device),
@@ -568,7 +586,6 @@
acpi_device_bid(link->device));
return -ENODEV;
} else {
- acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING;
printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
acpi_device_name(link->device),
acpi_device_bid(link->device), link->irq.active);
@@ -778,7 +795,7 @@
}
/*
- * modify acpi_irq_penalty[] from cmdline
+ * modify acpi_isa_irq_penalty[] from cmdline
*/
static int __init acpi_irq_penalty_update(char *str, int used)
{
@@ -787,23 +804,24 @@
for (i = 0; i < 16; i++) {
int retval;
int irq;
+ int new_penalty;
retval = get_option(&str, &irq);
if (!retval)
break; /* no number found */
- if (irq < 0)
- continue;
-
- if (irq >= ARRAY_SIZE(acpi_irq_penalty))
+ /* see if this is a ISA IRQ */
+ if ((irq < 0) || (irq >= ACPI_MAX_ISA_IRQS))
continue;
if (used)
- acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
+ new_penalty = acpi_irq_get_penalty(irq) +
+ PIRQ_PENALTY_ISA_USED;
else
- acpi_irq_penalty[irq] = PIRQ_PENALTY_PCI_AVAILABLE;
+ new_penalty = 0;
+ acpi_isa_irq_penalty[irq] = new_penalty;
if (retval != 2) /* no next number */
break;
}
@@ -819,34 +837,15 @@
*/
void acpi_penalize_isa_irq(int irq, int active)
{
- if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
- if (active)
- acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
- else
- acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
- }
+ if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
+ acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
+ active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING;
}
bool acpi_isa_irq_available(int irq)
{
- return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
- acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
-}
-
-/*
- * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with
- * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for
- * PCI IRQs.
- */
-void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
-{
- if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
- if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
- polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
- acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS;
- else
- acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
- }
+ return irq >= 0 && (irq >= ARRAY_SIZE(acpi_isa_irq_penalty) ||
+ acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
}
/*
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2a8b596..7a2e4d4 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -26,6 +26,11 @@
#include "internal.h"
#include "sleep.h"
+/*
+ * Some HW-full platforms do not have _S5, so they may need
+ * to leverage efi power off for a shutdown.
+ */
+bool acpi_no_s5;
static u8 sleep_states[ACPI_S_STATE_COUNT];
static void acpi_sleep_tts_switch(u32 acpi_state)
@@ -882,6 +887,8 @@
sleep_states[ACPI_STATE_S5] = 1;
pm_power_off_prepare = acpi_power_off_prepare;
pm_power_off = acpi_power_off;
+ } else {
+ acpi_no_s5 = true;
}
supported[0] = 0;
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 0243d37..4b3a9e2 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -555,23 +555,22 @@
static int get_status(u32 index, acpi_event_status *status,
acpi_handle *handle)
{
- int result = 0;
+ int result;
if (index >= num_gpes + ACPI_NUM_FIXED_EVENTS)
- goto end;
+ return -EINVAL;
if (index < num_gpes) {
result = acpi_get_gpe_device(index, handle);
if (result) {
ACPI_EXCEPTION((AE_INFO, AE_NOT_FOUND,
"Invalid GPE 0x%x", index));
- goto end;
+ return result;
}
result = acpi_get_gpe_status(*handle, index, status);
} else if (index < (num_gpes + ACPI_NUM_FIXED_EVENTS))
result = acpi_get_event_status(index - num_gpes, status);
-end:
return result;
}
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index f49c024..a372f9e 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -32,8 +32,14 @@
#include <linux/errno.h>
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include <linux/earlycpio.h>
+#include <linux/memblock.h>
#include "internal.h"
+#ifdef CONFIG_ACPI_CUSTOM_DSDT
+#include CONFIG_ACPI_CUSTOM_DSDT_FILE
+#endif
+
#define ACPI_MAX_TABLES 128
static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
@@ -433,6 +439,314 @@
return;
}
+static void acpi_table_taint(struct acpi_table_header *table)
+{
+ pr_warn("Override [%4.4s-%8.8s], this is unsafe: tainting kernel\n",
+ table->signature, table->oem_table_id);
+ add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
+}
+
+#ifdef CONFIG_ACPI_TABLE_UPGRADE
+static u64 acpi_tables_addr;
+static int all_tables_size;
+
+/* Copied from acpica/tbutils.c:acpi_tb_checksum() */
+static u8 __init acpi_table_checksum(u8 *buffer, u32 length)
+{
+ u8 sum = 0;
+ u8 *end = buffer + length;
+
+ while (buffer < end)
+ sum = (u8) (sum + *(buffer++));
+ return sum;
+}
+
+/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */
+static const char * const table_sigs[] = {
+ ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ,
+ ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT,
+ ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF,
+ ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET,
+ ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI,
+ ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA,
+ ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT,
+ ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT,
+ ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL };
+
+#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
+
+#define NR_ACPI_INITRD_TABLES 64
+static struct cpio_data __initdata acpi_initrd_files[NR_ACPI_INITRD_TABLES];
+static DECLARE_BITMAP(acpi_initrd_installed, NR_ACPI_INITRD_TABLES);
+
+#define MAP_CHUNK_SIZE (NR_FIX_BTMAPS << PAGE_SHIFT)
+
+static void __init acpi_table_initrd_init(void *data, size_t size)
+{
+ int sig, no, table_nr = 0, total_offset = 0;
+ long offset = 0;
+ struct acpi_table_header *table;
+ char cpio_path[32] = "kernel/firmware/acpi/";
+ struct cpio_data file;
+
+ if (data == NULL || size == 0)
+ return;
+
+ for (no = 0; no < NR_ACPI_INITRD_TABLES; no++) {
+ file = find_cpio_data(cpio_path, data, size, &offset);
+ if (!file.data)
+ break;
+
+ data += offset;
+ size -= offset;
+
+ if (file.size < sizeof(struct acpi_table_header)) {
+ pr_err("ACPI OVERRIDE: Table smaller than ACPI header [%s%s]\n",
+ cpio_path, file.name);
+ continue;
+ }
+
+ table = file.data;
+
+ for (sig = 0; table_sigs[sig]; sig++)
+ if (!memcmp(table->signature, table_sigs[sig], 4))
+ break;
+
+ if (!table_sigs[sig]) {
+ pr_err("ACPI OVERRIDE: Unknown signature [%s%s]\n",
+ cpio_path, file.name);
+ continue;
+ }
+ if (file.size != table->length) {
+ pr_err("ACPI OVERRIDE: File length does not match table length [%s%s]\n",
+ cpio_path, file.name);
+ continue;
+ }
+ if (acpi_table_checksum(file.data, table->length)) {
+ pr_err("ACPI OVERRIDE: Bad table checksum [%s%s]\n",
+ cpio_path, file.name);
+ continue;
+ }
+
+ pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n",
+ table->signature, cpio_path, file.name, table->length);
+
+ all_tables_size += table->length;
+ acpi_initrd_files[table_nr].data = file.data;
+ acpi_initrd_files[table_nr].size = file.size;
+ table_nr++;
+ }
+ if (table_nr == 0)
+ return;
+
+ acpi_tables_addr =
+ memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT,
+ all_tables_size, PAGE_SIZE);
+ if (!acpi_tables_addr) {
+ WARN_ON(1);
+ return;
+ }
+ /*
+ * Only calling e820_add_reserve does not work and the
+ * tables are invalid (memory got used) later.
+ * memblock_reserve works as expected and the tables won't get modified.
+ * But it's not enough on X86 because ioremap will
+ * complain later (used by acpi_os_map_memory) that the pages
+ * that should get mapped are not marked "reserved".
+ * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area)
+ * works fine.
+ */
+ memblock_reserve(acpi_tables_addr, all_tables_size);
+ arch_reserve_mem_area(acpi_tables_addr, all_tables_size);
+
+ /*
+ * early_ioremap only can remap 256k one time. If we map all
+ * tables one time, we will hit the limit. Need to map chunks
+ * one by one during copying the same as that in relocate_initrd().
+ */
+ for (no = 0; no < table_nr; no++) {
+ unsigned char *src_p = acpi_initrd_files[no].data;
+ phys_addr_t size = acpi_initrd_files[no].size;
+ phys_addr_t dest_addr = acpi_tables_addr + total_offset;
+ phys_addr_t slop, clen;
+ char *dest_p;
+
+ total_offset += size;
+
+ while (size) {
+ slop = dest_addr & ~PAGE_MASK;
+ clen = size;
+ if (clen > MAP_CHUNK_SIZE - slop)
+ clen = MAP_CHUNK_SIZE - slop;
+ dest_p = early_ioremap(dest_addr & PAGE_MASK,
+ clen + slop);
+ memcpy(dest_p + slop, src_p, clen);
+ early_iounmap(dest_p, clen + slop);
+ src_p += clen;
+ dest_addr += clen;
+ size -= clen;
+ }
+ }
+}
+
+static acpi_status
+acpi_table_initrd_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *address, u32 *length)
+{
+ int table_offset = 0;
+ int table_index = 0;
+ struct acpi_table_header *table;
+ u32 table_length;
+
+ *length = 0;
+ *address = 0;
+ if (!acpi_tables_addr)
+ return AE_OK;
+
+ while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
+ table = acpi_os_map_memory(acpi_tables_addr + table_offset,
+ ACPI_HEADER_SIZE);
+ if (table_offset + table->length > all_tables_size) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ WARN_ON(1);
+ return AE_OK;
+ }
+
+ table_length = table->length;
+
+ /* Only override tables matched */
+ if (memcmp(existing_table->signature, table->signature, 4) ||
+ memcmp(table->oem_id, existing_table->oem_id,
+ ACPI_OEM_ID_SIZE) ||
+ memcmp(table->oem_table_id, existing_table->oem_table_id,
+ ACPI_OEM_TABLE_ID_SIZE)) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ goto next_table;
+ }
+ /*
+ * Mark the table to avoid being used in
+ * acpi_table_initrd_scan() and check the revision.
+ */
+ if (test_and_set_bit(table_index, acpi_initrd_installed) ||
+ existing_table->oem_revision >= table->oem_revision) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ goto next_table;
+ }
+
+ *length = table_length;
+ *address = acpi_tables_addr + table_offset;
+ pr_info("Table Upgrade: override [%4.4s-%6.6s-%8.8s]\n",
+ table->signature, table->oem_id,
+ table->oem_table_id);
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ break;
+
+next_table:
+ table_offset += table_length;
+ table_index++;
+ }
+ return AE_OK;
+}
+
+static void __init acpi_table_initrd_scan(void)
+{
+ int table_offset = 0;
+ int table_index = 0;
+ u32 table_length;
+ struct acpi_table_header *table;
+
+ if (!acpi_tables_addr)
+ return;
+
+ while (table_offset + ACPI_HEADER_SIZE <= all_tables_size) {
+ table = acpi_os_map_memory(acpi_tables_addr + table_offset,
+ ACPI_HEADER_SIZE);
+ if (table_offset + table->length > all_tables_size) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ WARN_ON(1);
+ return;
+ }
+
+ table_length = table->length;
+
+ /* Skip RSDT/XSDT which should only be used for override */
+ if (ACPI_COMPARE_NAME(table->signature, ACPI_SIG_RSDT) ||
+ ACPI_COMPARE_NAME(table->signature, ACPI_SIG_XSDT)) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ goto next_table;
+ }
+ /*
+ * Mark the table to avoid being used in
+ * acpi_table_initrd_override(). Though this is not possible
+ * because override is disabled in acpi_install_table().
+ */
+ if (test_and_set_bit(table_index, acpi_initrd_installed)) {
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ goto next_table;
+ }
+
+ pr_info("Table Upgrade: install [%4.4s-%6.6s-%8.8s]\n",
+ table->signature, table->oem_id,
+ table->oem_table_id);
+ acpi_os_unmap_memory(table, ACPI_HEADER_SIZE);
+ acpi_install_table(acpi_tables_addr + table_offset, TRUE);
+next_table:
+ table_offset += table_length;
+ table_index++;
+ }
+}
+#else
+static void __init acpi_table_initrd_init(void *data, size_t size)
+{
+}
+
+static acpi_status
+acpi_table_initrd_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *address,
+ u32 *table_length)
+{
+ *table_length = 0;
+ *address = 0;
+ return AE_OK;
+}
+
+static void __init acpi_table_initrd_scan(void)
+{
+}
+#endif /* CONFIG_ACPI_TABLE_UPGRADE */
+
+acpi_status
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *address,
+ u32 *table_length)
+{
+ return acpi_table_initrd_override(existing_table, address,
+ table_length);
+}
+
+acpi_status
+acpi_os_table_override(struct acpi_table_header *existing_table,
+ struct acpi_table_header **new_table)
+{
+ if (!existing_table || !new_table)
+ return AE_BAD_PARAMETER;
+
+ *new_table = NULL;
+
+#ifdef CONFIG_ACPI_CUSTOM_DSDT
+ if (strncmp(existing_table->signature, "DSDT", 4) == 0)
+ *new_table = (struct acpi_table_header *)AmlCode;
+#endif
+ if (*new_table != NULL)
+ acpi_table_taint(existing_table);
+ return AE_OK;
+}
+
+void __init early_acpi_table_init(void *data, size_t size)
+{
+ acpi_table_initrd_init(data, size);
+}
+
/*
* acpi_table_init()
*
@@ -457,7 +771,7 @@
status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
if (ACPI_FAILURE(status))
return -EINVAL;
- acpi_initrd_initialize_tables();
+ acpi_table_initrd_scan();
check_multiple_madt();
return 0;
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 050673f..ac832bf 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -707,7 +707,7 @@
EXPORT_SYMBOL(acpi_check_dsm);
/**
- * acpi_dev_present - Detect presence of a given ACPI device in the system.
+ * acpi_dev_found - Detect presence of a given ACPI device in the namespace.
* @hid: Hardware ID of the device.
*
* Return %true if the device was present at the moment of invocation.
@@ -719,7 +719,7 @@
* instead). Calling from module_init() is fine (which is synonymous
* with device_initcall()).
*/
-bool acpi_dev_present(const char *hid)
+bool acpi_dev_found(const char *hid)
{
struct acpi_device_bus_id *acpi_device_bus_id;
bool found = false;
@@ -734,7 +734,7 @@
return found;
}
-EXPORT_SYMBOL(acpi_dev_present);
+EXPORT_SYMBOL(acpi_dev_found);
/*
* acpi_backlight= handling, this is done here rather then in video_detect.c
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 1316ddd..3d13276 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -358,7 +358,7 @@
if (!(video_caps & ACPI_VIDEO_BACKLIGHT))
return acpi_backlight_vendor;
- if (acpi_osi_is_win8() && backlight_device_registered(BACKLIGHT_RAW))
+ if (acpi_osi_is_win8() && backlight_device_get_by_type(BACKLIGHT_RAW))
return acpi_backlight_native;
return acpi_backlight_video;
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 0e64a1b..3657ac1 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -159,7 +159,7 @@
count = of_count_phandle_with_args(dev->of_node, "clocks",
"#clock-cells");
- if (count == 0)
+ if (count <= 0)
return -ENODEV;
clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 56705b5..de23b64 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -229,17 +229,6 @@
return ret;
}
-static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
- return GENPD_DEV_CALLBACK(genpd, int, save_state, dev);
-}
-
-static int genpd_restore_dev(struct generic_pm_domain *genpd,
- struct device *dev)
-{
- return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev);
-}
-
static int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
unsigned long val, void *ptr)
{
@@ -372,17 +361,63 @@
}
/**
- * pm_genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
+ * __genpd_runtime_suspend - walk the hierarchy of ->runtime_suspend() callbacks
+ * @dev: Device to handle.
+ */
+static int __genpd_runtime_suspend(struct device *dev)
+{
+ int (*cb)(struct device *__dev);
+
+ if (dev->type && dev->type->pm)
+ cb = dev->type->pm->runtime_suspend;
+ else if (dev->class && dev->class->pm)
+ cb = dev->class->pm->runtime_suspend;
+ else if (dev->bus && dev->bus->pm)
+ cb = dev->bus->pm->runtime_suspend;
+ else
+ cb = NULL;
+
+ if (!cb && dev->driver && dev->driver->pm)
+ cb = dev->driver->pm->runtime_suspend;
+
+ return cb ? cb(dev) : 0;
+}
+
+/**
+ * __genpd_runtime_resume - walk the hierarchy of ->runtime_resume() callbacks
+ * @dev: Device to handle.
+ */
+static int __genpd_runtime_resume(struct device *dev)
+{
+ int (*cb)(struct device *__dev);
+
+ if (dev->type && dev->type->pm)
+ cb = dev->type->pm->runtime_resume;
+ else if (dev->class && dev->class->pm)
+ cb = dev->class->pm->runtime_resume;
+ else if (dev->bus && dev->bus->pm)
+ cb = dev->bus->pm->runtime_resume;
+ else
+ cb = NULL;
+
+ if (!cb && dev->driver && dev->driver->pm)
+ cb = dev->driver->pm->runtime_resume;
+
+ return cb ? cb(dev) : 0;
+}
+
+/**
+ * genpd_runtime_suspend - Suspend a device belonging to I/O PM domain.
* @dev: Device to suspend.
*
* Carry out a runtime suspend of a device under the assumption that its
* pm_domain field points to the domain member of an object of type
* struct generic_pm_domain representing a PM domain consisting of I/O devices.
*/
-static int pm_genpd_runtime_suspend(struct device *dev)
+static int genpd_runtime_suspend(struct device *dev)
{
struct generic_pm_domain *genpd;
- bool (*stop_ok)(struct device *__dev);
+ bool (*suspend_ok)(struct device *__dev);
struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
bool runtime_pm = pm_runtime_enabled(dev);
ktime_t time_start;
@@ -401,21 +436,21 @@
* runtime PM is disabled. Under these circumstances, we shall skip
* validating/measuring the PM QoS latency.
*/
- stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
- if (runtime_pm && stop_ok && !stop_ok(dev))
+ suspend_ok = genpd->gov ? genpd->gov->suspend_ok : NULL;
+ if (runtime_pm && suspend_ok && !suspend_ok(dev))
return -EBUSY;
/* Measure suspend latency. */
if (runtime_pm)
time_start = ktime_get();
- ret = genpd_save_dev(genpd, dev);
+ ret = __genpd_runtime_suspend(dev);
if (ret)
return ret;
ret = genpd_stop_dev(genpd, dev);
if (ret) {
- genpd_restore_dev(genpd, dev);
+ __genpd_runtime_resume(dev);
return ret;
}
@@ -446,14 +481,14 @@
}
/**
- * pm_genpd_runtime_resume - Resume a device belonging to I/O PM domain.
+ * genpd_runtime_resume - Resume a device belonging to I/O PM domain.
* @dev: Device to resume.
*
* Carry out a runtime resume of a device under the assumption that its
* pm_domain field points to the domain member of an object of type
* struct generic_pm_domain representing a PM domain consisting of I/O devices.
*/
-static int pm_genpd_runtime_resume(struct device *dev)
+static int genpd_runtime_resume(struct device *dev)
{
struct generic_pm_domain *genpd;
struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
@@ -491,7 +526,7 @@
if (ret)
goto err_poweroff;
- ret = genpd_restore_dev(genpd, dev);
+ ret = __genpd_runtime_resume(dev);
if (ret)
goto err_stop;
@@ -695,15 +730,6 @@
* at this point and a system wakeup event should be reported if it's
* set up to wake up the system from sleep states.
*/
- pm_runtime_get_noresume(dev);
- if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
- pm_wakeup_event(dev, 0);
-
- if (pm_wakeup_pending()) {
- pm_runtime_put(dev);
- return -EBUSY;
- }
-
if (resume_needed(dev, genpd))
pm_runtime_resume(dev);
@@ -716,10 +742,8 @@
mutex_unlock(&genpd->lock);
- if (genpd->suspend_power_off) {
- pm_runtime_put_noidle(dev);
+ if (genpd->suspend_power_off)
return 0;
- }
/*
* The PM domain must be in the GPD_STATE_ACTIVE state at this point,
@@ -741,7 +765,6 @@
pm_runtime_enable(dev);
}
- pm_runtime_put(dev);
return ret;
}
@@ -1427,54 +1450,6 @@
}
EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
-/* Default device callbacks for generic PM domains. */
-
-/**
- * pm_genpd_default_save_state - Default "save device state" for PM domains.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_save_state(struct device *dev)
-{
- int (*cb)(struct device *__dev);
-
- if (dev->type && dev->type->pm)
- cb = dev->type->pm->runtime_suspend;
- else if (dev->class && dev->class->pm)
- cb = dev->class->pm->runtime_suspend;
- else if (dev->bus && dev->bus->pm)
- cb = dev->bus->pm->runtime_suspend;
- else
- cb = NULL;
-
- if (!cb && dev->driver && dev->driver->pm)
- cb = dev->driver->pm->runtime_suspend;
-
- return cb ? cb(dev) : 0;
-}
-
-/**
- * pm_genpd_default_restore_state - Default PM domains "restore device state".
- * @dev: Device to handle.
- */
-static int pm_genpd_default_restore_state(struct device *dev)
-{
- int (*cb)(struct device *__dev);
-
- if (dev->type && dev->type->pm)
- cb = dev->type->pm->runtime_resume;
- else if (dev->class && dev->class->pm)
- cb = dev->class->pm->runtime_resume;
- else if (dev->bus && dev->bus->pm)
- cb = dev->bus->pm->runtime_resume;
- else
- cb = NULL;
-
- if (!cb && dev->driver && dev->driver->pm)
- cb = dev->driver->pm->runtime_resume;
-
- return cb ? cb(dev) : 0;
-}
-
/**
* pm_genpd_init - Initialize a generic I/O PM domain object.
* @genpd: PM domain object to initialize.
@@ -1498,8 +1473,8 @@
genpd->device_count = 0;
genpd->max_off_time_ns = -1;
genpd->max_off_time_changed = true;
- genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend;
- genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume;
+ genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
+ genpd->domain.ops.runtime_resume = genpd_runtime_resume;
genpd->domain.ops.prepare = pm_genpd_prepare;
genpd->domain.ops.suspend = pm_genpd_suspend;
genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
@@ -1520,8 +1495,6 @@
genpd->domain.ops.restore_early = pm_genpd_resume_early;
genpd->domain.ops.restore = pm_genpd_resume;
genpd->domain.ops.complete = pm_genpd_complete;
- genpd->dev_ops.save_state = pm_genpd_default_save_state;
- genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
if (genpd->flags & GENPD_FLAG_PM_CLK) {
genpd->dev_ops.stop = pm_clk_suspend;
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 00a5436..2e0fce7 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -37,10 +37,10 @@
}
/**
- * default_stop_ok - Default PM domain governor routine for stopping devices.
+ * default_suspend_ok - Default PM domain governor routine to suspend devices.
* @dev: Device to check.
*/
-static bool default_stop_ok(struct device *dev)
+static bool default_suspend_ok(struct device *dev)
{
struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
unsigned long flags;
@@ -51,13 +51,13 @@
spin_lock_irqsave(&dev->power.lock, flags);
if (!td->constraint_changed) {
- bool ret = td->cached_stop_ok;
+ bool ret = td->cached_suspend_ok;
spin_unlock_irqrestore(&dev->power.lock, flags);
return ret;
}
td->constraint_changed = false;
- td->cached_stop_ok = false;
+ td->cached_suspend_ok = false;
td->effective_constraint_ns = -1;
constraint_ns = __dev_pm_qos_read_value(dev);
@@ -83,13 +83,13 @@
return false;
}
td->effective_constraint_ns = constraint_ns;
- td->cached_stop_ok = constraint_ns >= 0;
+ td->cached_suspend_ok = constraint_ns >= 0;
/*
* The children have been suspended already, so we don't need to take
- * their stop latencies into account here.
+ * their suspend latencies into account here.
*/
- return td->cached_stop_ok;
+ return td->cached_suspend_ok;
}
/**
@@ -150,7 +150,7 @@
*/
td = &to_gpd_data(pdd)->td;
constraint_ns = td->effective_constraint_ns;
- /* default_stop_ok() need not be called before us. */
+ /* default_suspend_ok() need not be called before us. */
if (constraint_ns < 0) {
constraint_ns = dev_pm_qos_read_value(pdd->dev);
constraint_ns *= NSEC_PER_USEC;
@@ -227,7 +227,7 @@
}
struct dev_power_governor simple_qos_governor = {
- .stop_ok = default_stop_ok,
+ .suspend_ok = default_suspend_ok,
.power_down_ok = default_power_down_ok,
};
@@ -236,5 +236,5 @@
*/
struct dev_power_governor pm_domain_always_on_gov = {
.power_down_ok = always_on_power_down_ok,
- .stop_ok = default_stop_ok,
+ .suspend_ok = default_suspend_ok,
};
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 6e7c3cc..c81667d 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1556,7 +1556,6 @@
static int device_prepare(struct device *dev, pm_message_t state)
{
int (*callback)(struct device *) = NULL;
- char *info = NULL;
int ret = 0;
if (dev->power.syscore)
@@ -1579,24 +1578,17 @@
goto unlock;
}
- if (dev->pm_domain) {
- info = "preparing power domain ";
+ if (dev->pm_domain)
callback = dev->pm_domain->ops.prepare;
- } else if (dev->type && dev->type->pm) {
- info = "preparing type ";
+ else if (dev->type && dev->type->pm)
callback = dev->type->pm->prepare;
- } else if (dev->class && dev->class->pm) {
- info = "preparing class ";
+ else if (dev->class && dev->class->pm)
callback = dev->class->pm->prepare;
- } else if (dev->bus && dev->bus->pm) {
- info = "preparing bus ";
+ else if (dev->bus && dev->bus->pm)
callback = dev->bus->pm->prepare;
- }
- if (!callback && dev->driver && dev->driver->pm) {
- info = "preparing driver ";
+ if (!callback && dev->driver && dev->driver->pm)
callback = dev->driver->pm->prepare;
- }
if (callback)
ret = callback(dev);
diff --git a/drivers/base/power/opp/Makefile b/drivers/base/power/opp/Makefile
index 19837ef..e70ceb4 100644
--- a/drivers/base/power/opp/Makefile
+++ b/drivers/base/power/opp/Makefile
@@ -1,3 +1,4 @@
ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
obj-y += core.o cpu.o
+obj-$(CONFIG_OF) += of.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index d8f4cc2..7c04c87 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -18,7 +18,6 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/device.h>
-#include <linux/of.h>
#include <linux/export.h>
#include <linux/regulator/consumer.h>
@@ -29,7 +28,7 @@
* from here, with each opp_table containing the list of opps it supports in
* various states of availability.
*/
-static LIST_HEAD(opp_tables);
+LIST_HEAD(opp_tables);
/* Lock to allow exclusive modification to the device and opp lists */
DEFINE_MUTEX(opp_table_lock);
@@ -53,26 +52,6 @@
return NULL;
}
-static struct opp_table *_managed_opp(const struct device_node *np)
-{
- struct opp_table *opp_table;
-
- list_for_each_entry_rcu(opp_table, &opp_tables, node) {
- if (opp_table->np == np) {
- /*
- * Multiple devices can point to the same OPP table and
- * so will have same node-pointer, np.
- *
- * But the OPPs will be considered as shared only if the
- * OPP table contains a "opp-shared" property.
- */
- return opp_table->shared_opp ? opp_table : NULL;
- }
- }
-
- return NULL;
-}
-
/**
* _find_opp_table() - find opp_table struct using device pointer
* @dev: device pointer used to lookup OPP table
@@ -757,7 +736,6 @@
{
struct opp_table *opp_table;
struct opp_device *opp_dev;
- struct device_node *np;
int ret;
/* Check for existing table for 'dev' first */
@@ -781,20 +759,7 @@
return NULL;
}
- /*
- * Only required for backward compatibility with v1 bindings, but isn't
- * harmful for other cases. And so we do it unconditionally.
- */
- np = of_node_get(dev->of_node);
- if (np) {
- u32 val;
-
- if (!of_property_read_u32(np, "clock-latency", &val))
- opp_table->clock_latency_ns_max = val;
- of_property_read_u32(np, "voltage-tolerance",
- &opp_table->voltage_tolerance_v1);
- of_node_put(np);
- }
+ _of_init_opp_table(opp_table, dev);
/* Set regulator to a non-NULL error value */
opp_table->regulator = ERR_PTR(-ENXIO);
@@ -890,8 +855,8 @@
* It is assumed that the caller holds required mutex for an RCU updater
* strategy.
*/
-static void _opp_remove(struct opp_table *opp_table,
- struct dev_pm_opp *opp, bool notify)
+void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp,
+ bool notify)
{
/*
* Notify the changes in the availability of the operable
@@ -952,8 +917,8 @@
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
-static struct dev_pm_opp *_allocate_opp(struct device *dev,
- struct opp_table **opp_table)
+struct dev_pm_opp *_allocate_opp(struct device *dev,
+ struct opp_table **opp_table)
{
struct dev_pm_opp *opp;
@@ -989,8 +954,8 @@
return true;
}
-static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
- struct opp_table *opp_table)
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
+ struct opp_table *opp_table)
{
struct dev_pm_opp *opp;
struct list_head *head = &opp_table->opp_list;
@@ -1066,8 +1031,8 @@
* Duplicate OPPs (both freq and volt are same) and !opp->available
* -ENOMEM Memory allocation failure
*/
-static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
- bool dynamic)
+int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt,
+ bool dynamic)
{
struct opp_table *opp_table;
struct dev_pm_opp *new_opp;
@@ -1112,83 +1077,6 @@
return ret;
}
-/* TODO: Support multiple regulators */
-static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
- struct opp_table *opp_table)
-{
- u32 microvolt[3] = {0};
- u32 val;
- int count, ret;
- struct property *prop = NULL;
- char name[NAME_MAX];
-
- /* Search for "opp-microvolt-<name>" */
- if (opp_table->prop_name) {
- snprintf(name, sizeof(name), "opp-microvolt-%s",
- opp_table->prop_name);
- prop = of_find_property(opp->np, name, NULL);
- }
-
- if (!prop) {
- /* Search for "opp-microvolt" */
- sprintf(name, "opp-microvolt");
- prop = of_find_property(opp->np, name, NULL);
-
- /* Missing property isn't a problem, but an invalid entry is */
- if (!prop)
- return 0;
- }
-
- count = of_property_count_u32_elems(opp->np, name);
- if (count < 0) {
- dev_err(dev, "%s: Invalid %s property (%d)\n",
- __func__, name, count);
- return count;
- }
-
- /* There can be one or three elements here */
- if (count != 1 && count != 3) {
- dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
- __func__, name, count);
- return -EINVAL;
- }
-
- ret = of_property_read_u32_array(opp->np, name, microvolt, count);
- if (ret) {
- dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
- return -EINVAL;
- }
-
- opp->u_volt = microvolt[0];
-
- if (count == 1) {
- opp->u_volt_min = opp->u_volt;
- opp->u_volt_max = opp->u_volt;
- } else {
- opp->u_volt_min = microvolt[1];
- opp->u_volt_max = microvolt[2];
- }
-
- /* Search for "opp-microamp-<name>" */
- prop = NULL;
- if (opp_table->prop_name) {
- snprintf(name, sizeof(name), "opp-microamp-%s",
- opp_table->prop_name);
- prop = of_find_property(opp->np, name, NULL);
- }
-
- if (!prop) {
- /* Search for "opp-microamp" */
- sprintf(name, "opp-microamp");
- prop = of_find_property(opp->np, name, NULL);
- }
-
- if (prop && !of_property_read_u32(opp->np, name, &val))
- opp->u_amp = val;
-
- return 0;
-}
-
/**
* dev_pm_opp_set_supported_hw() - Set supported platforms
* @dev: Device for which supported-hw has to be set.
@@ -1517,144 +1405,6 @@
}
EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulator);
-static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
- struct device_node *np)
-{
- unsigned int count = opp_table->supported_hw_count;
- u32 version;
- int ret;
-
- if (!opp_table->supported_hw)
- return true;
-
- while (count--) {
- ret = of_property_read_u32_index(np, "opp-supported-hw", count,
- &version);
- if (ret) {
- dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
- __func__, count, ret);
- return false;
- }
-
- /* Both of these are bitwise masks of the versions */
- if (!(version & opp_table->supported_hw[count]))
- return false;
- }
-
- return true;
-}
-
-/**
- * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
- * @dev: device for which we do this operation
- * @np: device node
- *
- * This function adds an opp definition to the opp table and returns status. The
- * opp can be controlled using dev_pm_opp_enable/disable functions and may be
- * removed by dev_pm_opp_remove.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function internally uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
- *
- * Return:
- * 0 On success OR
- * Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST Freq are same and volt are different OR
- * Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM Memory allocation failure
- * -EINVAL Failed parsing the OPP node
- */
-static int _opp_add_static_v2(struct device *dev, struct device_node *np)
-{
- struct opp_table *opp_table;
- struct dev_pm_opp *new_opp;
- u64 rate;
- u32 val;
- int ret;
-
- /* Hold our table modification lock here */
- mutex_lock(&opp_table_lock);
-
- new_opp = _allocate_opp(dev, &opp_table);
- if (!new_opp) {
- ret = -ENOMEM;
- goto unlock;
- }
-
- ret = of_property_read_u64(np, "opp-hz", &rate);
- if (ret < 0) {
- dev_err(dev, "%s: opp-hz not found\n", __func__);
- goto free_opp;
- }
-
- /* Check if the OPP supports hardware's hierarchy of versions or not */
- if (!_opp_is_supported(dev, opp_table, np)) {
- dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
- goto free_opp;
- }
-
- /*
- * Rate is defined as an unsigned long in clk API, and so casting
- * explicitly to its type. Must be fixed once rate is 64 bit
- * guaranteed in clk API.
- */
- new_opp->rate = (unsigned long)rate;
- new_opp->turbo = of_property_read_bool(np, "turbo-mode");
-
- new_opp->np = np;
- new_opp->dynamic = false;
- new_opp->available = true;
-
- if (!of_property_read_u32(np, "clock-latency-ns", &val))
- new_opp->clock_latency_ns = val;
-
- ret = opp_parse_supplies(new_opp, dev, opp_table);
- if (ret)
- goto free_opp;
-
- ret = _opp_add(dev, new_opp, opp_table);
- if (ret)
- goto free_opp;
-
- /* OPP to select on device suspend */
- if (of_property_read_bool(np, "opp-suspend")) {
- if (opp_table->suspend_opp) {
- dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
- __func__, opp_table->suspend_opp->rate,
- new_opp->rate);
- } else {
- new_opp->suspend = true;
- opp_table->suspend_opp = new_opp;
- }
- }
-
- if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
- opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
-
- mutex_unlock(&opp_table_lock);
-
- pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
- __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
- new_opp->u_volt_min, new_opp->u_volt_max,
- new_opp->clock_latency_ns);
-
- /*
- * Notify the changes in the availability of the operable
- * frequency/voltage list.
- */
- srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
- return 0;
-
-free_opp:
- _opp_remove(opp_table, new_opp, false);
-unlock:
- mutex_unlock(&opp_table_lock);
- return ret;
-}
-
/**
* dev_pm_opp_add() - Add an OPP table from a table definitions
* @dev: device for which we do this operation
@@ -1842,21 +1592,11 @@
}
EXPORT_SYMBOL_GPL(dev_pm_opp_get_notifier);
-#ifdef CONFIG_OF
-/**
- * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
- * entries
- * @dev: device pointer used to lookup OPP table.
- *
- * Free OPPs created using static entries present in DT.
- *
- * Locking: The internal opp_table and opp structures are RCU protected.
- * Hence this function indirectly uses RCU updater strategy with mutex locks
- * to keep the integrity of the internal data structures. Callers should ensure
- * that this function is *NOT* called under RCU protection or in contexts where
- * mutex cannot be locked.
+/*
+ * Free OPPs either created using static entries present in DT or even the
+ * dynamically added entries based on remove_all param.
*/
-void dev_pm_opp_of_remove_table(struct device *dev)
+void _dev_pm_opp_remove_table(struct device *dev, bool remove_all)
{
struct opp_table *opp_table;
struct dev_pm_opp *opp, *tmp;
@@ -1881,7 +1621,7 @@
if (list_is_singular(&opp_table->dev_list)) {
/* Free static OPPs */
list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
- if (!opp->dynamic)
+ if (remove_all || !opp->dynamic)
_opp_remove(opp_table, opp, true);
}
} else {
@@ -1891,160 +1631,22 @@
unlock:
mutex_unlock(&opp_table_lock);
}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
-
-/* Returns opp descriptor node for a device, caller must do of_node_put() */
-struct device_node *_of_get_opp_desc_node(struct device *dev)
-{
- /*
- * TODO: Support for multiple OPP tables.
- *
- * There should be only ONE phandle present in "operating-points-v2"
- * property.
- */
-
- return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
-}
-
-/* Initializes OPP tables based on new bindings */
-static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
-{
- struct device_node *np;
- struct opp_table *opp_table;
- int ret = 0, count = 0;
-
- mutex_lock(&opp_table_lock);
-
- opp_table = _managed_opp(opp_np);
- if (opp_table) {
- /* OPPs are already managed */
- if (!_add_opp_dev(dev, opp_table))
- ret = -ENOMEM;
- mutex_unlock(&opp_table_lock);
- return ret;
- }
- mutex_unlock(&opp_table_lock);
-
- /* We have opp-table node now, iterate over it and add OPPs */
- for_each_available_child_of_node(opp_np, np) {
- count++;
-
- ret = _opp_add_static_v2(dev, np);
- if (ret) {
- dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
- ret);
- goto free_table;
- }
- }
-
- /* There should be one of more OPP defined */
- if (WARN_ON(!count))
- return -ENOENT;
-
- mutex_lock(&opp_table_lock);
-
- opp_table = _find_opp_table(dev);
- if (WARN_ON(IS_ERR(opp_table))) {
- ret = PTR_ERR(opp_table);
- mutex_unlock(&opp_table_lock);
- goto free_table;
- }
-
- opp_table->np = opp_np;
- opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
-
- mutex_unlock(&opp_table_lock);
-
- return 0;
-
-free_table:
- dev_pm_opp_of_remove_table(dev);
-
- return ret;
-}
-
-/* Initializes OPP tables based on old-deprecated bindings */
-static int _of_add_opp_table_v1(struct device *dev)
-{
- const struct property *prop;
- const __be32 *val;
- int nr;
-
- prop = of_find_property(dev->of_node, "operating-points", NULL);
- if (!prop)
- return -ENODEV;
- if (!prop->value)
- return -ENODATA;
-
- /*
- * Each OPP is a set of tuples consisting of frequency and
- * voltage like <freq-kHz vol-uV>.
- */
- nr = prop->length / sizeof(u32);
- if (nr % 2) {
- dev_err(dev, "%s: Invalid OPP table\n", __func__);
- return -EINVAL;
- }
-
- val = prop->value;
- while (nr) {
- unsigned long freq = be32_to_cpup(val++) * 1000;
- unsigned long volt = be32_to_cpup(val++);
-
- if (_opp_add_v1(dev, freq, volt, false))
- dev_warn(dev, "%s: Failed to add OPP %ld\n",
- __func__, freq);
- nr -= 2;
- }
-
- return 0;
-}
/**
- * dev_pm_opp_of_add_table() - Initialize opp table from device tree
+ * dev_pm_opp_remove_table() - Free all OPPs associated with the device
* @dev: device pointer used to lookup OPP table.
*
- * Register the initial OPP table with the OPP library for given device.
+ * Free both OPPs created using static entries present in DT and the
+ * dynamically added entries.
*
* Locking: The internal opp_table and opp structures are RCU protected.
* Hence this function indirectly uses RCU updater strategy with mutex locks
* to keep the integrity of the internal data structures. Callers should ensure
* that this function is *NOT* called under RCU protection or in contexts where
* mutex cannot be locked.
- *
- * Return:
- * 0 On success OR
- * Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST Freq are same and volt are different OR
- * Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM Memory allocation failure
- * -ENODEV when 'operating-points' property is not found or is invalid data
- * in device node.
- * -ENODATA when empty 'operating-points' property is found
- * -EINVAL when invalid entries are found in opp-v2 table
*/
-int dev_pm_opp_of_add_table(struct device *dev)
+void dev_pm_opp_remove_table(struct device *dev)
{
- struct device_node *opp_np;
- int ret;
-
- /*
- * OPPs have two version of bindings now. The older one is deprecated,
- * try for the new binding first.
- */
- opp_np = _of_get_opp_desc_node(dev);
- if (!opp_np) {
- /*
- * Try old-deprecated bindings for backward compatibility with
- * older dtbs.
- */
- return _of_add_opp_table_v1(dev);
- }
-
- ret = _of_add_opp_table_v2(dev, opp_np);
- of_node_put(opp_np);
-
- return ret;
+ _dev_pm_opp_remove_table(dev, true);
}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
-#endif
+EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table);
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index ba2bdbd..83d6e7b 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -18,7 +18,6 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/export.h>
-#include <linux/of.h>
#include <linux/slab.h>
#include "opp.h"
@@ -119,8 +118,66 @@
EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
#endif /* CONFIG_CPU_FREQ */
-/* Required only for V1 bindings, as v2 can manage it from DT itself */
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
+{
+ struct device *cpu_dev;
+ int cpu;
+
+ WARN_ON(cpumask_empty(cpumask));
+
+ for_each_cpu(cpu, cpumask) {
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__,
+ cpu);
+ continue;
+ }
+
+ if (of)
+ dev_pm_opp_of_remove_table(cpu_dev);
+ else
+ dev_pm_opp_remove_table(cpu_dev);
+ }
+}
+
+/**
+ * dev_pm_opp_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask: cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used to remove all the OPPs entries associated with
+ * the cpus in @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
+{
+ _dev_pm_opp_cpumask_remove_table(cpumask, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
+
+/**
+ * dev_pm_opp_set_sharing_cpus() - Mark OPP table as shared by few CPUs
+ * @cpu_dev: CPU device for which we do this operation
+ * @cpumask: cpumask of the CPUs which share the OPP table with @cpu_dev
+ *
+ * This marks OPP table of the @cpu_dev as shared by the CPUs present in
+ * @cpumask.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
struct opp_device *opp_dev;
struct opp_table *opp_table;
@@ -131,7 +188,7 @@
opp_table = _find_opp_table(cpu_dev);
if (IS_ERR(opp_table)) {
- ret = -EINVAL;
+ ret = PTR_ERR(opp_table);
goto unlock;
}
@@ -152,6 +209,9 @@
__func__, cpu);
continue;
}
+
+ /* Mark opp-table as multiple CPUs are sharing it now */
+ opp_table->shared_opp = true;
}
unlock:
mutex_unlock(&opp_table_lock);
@@ -160,112 +220,47 @@
}
EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
-#ifdef CONFIG_OF
-void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
-{
- struct device *cpu_dev;
- int cpu;
-
- WARN_ON(cpumask_empty(cpumask));
-
- for_each_cpu(cpu, cpumask) {
- cpu_dev = get_cpu_device(cpu);
- if (!cpu_dev) {
- pr_err("%s: failed to get cpu%d device\n", __func__,
- cpu);
- continue;
- }
-
- dev_pm_opp_of_remove_table(cpu_dev);
- }
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
-
-int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
-{
- struct device *cpu_dev;
- int cpu, ret = 0;
-
- WARN_ON(cpumask_empty(cpumask));
-
- for_each_cpu(cpu, cpumask) {
- cpu_dev = get_cpu_device(cpu);
- if (!cpu_dev) {
- pr_err("%s: failed to get cpu%d device\n", __func__,
- cpu);
- continue;
- }
-
- ret = dev_pm_opp_of_add_table(cpu_dev);
- if (ret) {
- pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
- __func__, cpu, ret);
-
- /* Free all other OPPs */
- dev_pm_opp_of_cpumask_remove_table(cpumask);
- break;
- }
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
-
-/*
- * Works only for OPP v2 bindings.
+/**
+ * dev_pm_opp_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with @cpu_dev
+ * @cpu_dev: CPU device for which we do this operation
+ * @cpumask: cpumask to update with information of sharing CPUs
*
- * Returns -ENOENT if operating-points-v2 bindings aren't supported.
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENODEV if OPP table isn't already present.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
*/
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
{
- struct device_node *np, *tmp_np;
- struct device *tcpu_dev;
- int cpu, ret = 0;
+ struct opp_device *opp_dev;
+ struct opp_table *opp_table;
+ int ret = 0;
- /* Get OPP descriptor node */
- np = _of_get_opp_desc_node(cpu_dev);
- if (!np) {
- dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
- return -ENOENT;
+ mutex_lock(&opp_table_lock);
+
+ opp_table = _find_opp_table(cpu_dev);
+ if (IS_ERR(opp_table)) {
+ ret = PTR_ERR(opp_table);
+ goto unlock;
}
- cpumask_set_cpu(cpu_dev->id, cpumask);
+ cpumask_clear(cpumask);
- /* OPPs are shared ? */
- if (!of_property_read_bool(np, "opp-shared"))
- goto put_cpu_node;
-
- for_each_possible_cpu(cpu) {
- if (cpu == cpu_dev->id)
- continue;
-
- tcpu_dev = get_cpu_device(cpu);
- if (!tcpu_dev) {
- dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
- __func__, cpu);
- ret = -ENODEV;
- goto put_cpu_node;
- }
-
- /* Get OPP descriptor node */
- tmp_np = _of_get_opp_desc_node(tcpu_dev);
- if (!tmp_np) {
- dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
- __func__);
- ret = -ENOENT;
- goto put_cpu_node;
- }
-
- /* CPUs are sharing opp node */
- if (np == tmp_np)
- cpumask_set_cpu(cpu, cpumask);
-
- of_node_put(tmp_np);
+ if (opp_table->shared_opp) {
+ list_for_each_entry(opp_dev, &opp_table->dev_list, node)
+ cpumask_set_cpu(opp_dev->dev->id, cpumask);
+ } else {
+ cpumask_set_cpu(cpu_dev->id, cpumask);
}
-put_cpu_node:
- of_node_put(np);
+unlock:
+ mutex_unlock(&opp_table_lock);
+
return ret;
}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
-#endif
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_sharing_cpus);
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
new file mode 100644
index 0000000..94d2010
--- /dev/null
+++ b/drivers/base/power/opp/of.c
@@ -0,0 +1,591 @@
+/*
+ * Generic OPP OF helpers
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ * Nishanth Menon
+ * Romit Dasgupta
+ * Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/export.h>
+
+#include "opp.h"
+
+static struct opp_table *_managed_opp(const struct device_node *np)
+{
+ struct opp_table *opp_table;
+
+ list_for_each_entry_rcu(opp_table, &opp_tables, node) {
+ if (opp_table->np == np) {
+ /*
+ * Multiple devices can point to the same OPP table and
+ * so will have same node-pointer, np.
+ *
+ * But the OPPs will be considered as shared only if the
+ * OPP table contains a "opp-shared" property.
+ */
+ return opp_table->shared_opp ? opp_table : NULL;
+ }
+ }
+
+ return NULL;
+}
+
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev)
+{
+ struct device_node *np;
+
+ /*
+ * Only required for backward compatibility with v1 bindings, but isn't
+ * harmful for other cases. And so we do it unconditionally.
+ */
+ np = of_node_get(dev->of_node);
+ if (np) {
+ u32 val;
+
+ if (!of_property_read_u32(np, "clock-latency", &val))
+ opp_table->clock_latency_ns_max = val;
+ of_property_read_u32(np, "voltage-tolerance",
+ &opp_table->voltage_tolerance_v1);
+ of_node_put(np);
+ }
+}
+
+static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
+ struct device_node *np)
+{
+ unsigned int count = opp_table->supported_hw_count;
+ u32 version;
+ int ret;
+
+ if (!opp_table->supported_hw)
+ return true;
+
+ while (count--) {
+ ret = of_property_read_u32_index(np, "opp-supported-hw", count,
+ &version);
+ if (ret) {
+ dev_warn(dev, "%s: failed to read opp-supported-hw property at index %d: %d\n",
+ __func__, count, ret);
+ return false;
+ }
+
+ /* Both of these are bitwise masks of the versions */
+ if (!(version & opp_table->supported_hw[count]))
+ return false;
+ }
+
+ return true;
+}
+
+/* TODO: Support multiple regulators */
+static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev,
+ struct opp_table *opp_table)
+{
+ u32 microvolt[3] = {0};
+ u32 val;
+ int count, ret;
+ struct property *prop = NULL;
+ char name[NAME_MAX];
+
+ /* Search for "opp-microvolt-<name>" */
+ if (opp_table->prop_name) {
+ snprintf(name, sizeof(name), "opp-microvolt-%s",
+ opp_table->prop_name);
+ prop = of_find_property(opp->np, name, NULL);
+ }
+
+ if (!prop) {
+ /* Search for "opp-microvolt" */
+ sprintf(name, "opp-microvolt");
+ prop = of_find_property(opp->np, name, NULL);
+
+ /* Missing property isn't a problem, but an invalid entry is */
+ if (!prop)
+ return 0;
+ }
+
+ count = of_property_count_u32_elems(opp->np, name);
+ if (count < 0) {
+ dev_err(dev, "%s: Invalid %s property (%d)\n",
+ __func__, name, count);
+ return count;
+ }
+
+ /* There can be one or three elements here */
+ if (count != 1 && count != 3) {
+ dev_err(dev, "%s: Invalid number of elements in %s property (%d)\n",
+ __func__, name, count);
+ return -EINVAL;
+ }
+
+ ret = of_property_read_u32_array(opp->np, name, microvolt, count);
+ if (ret) {
+ dev_err(dev, "%s: error parsing %s: %d\n", __func__, name, ret);
+ return -EINVAL;
+ }
+
+ opp->u_volt = microvolt[0];
+
+ if (count == 1) {
+ opp->u_volt_min = opp->u_volt;
+ opp->u_volt_max = opp->u_volt;
+ } else {
+ opp->u_volt_min = microvolt[1];
+ opp->u_volt_max = microvolt[2];
+ }
+
+ /* Search for "opp-microamp-<name>" */
+ prop = NULL;
+ if (opp_table->prop_name) {
+ snprintf(name, sizeof(name), "opp-microamp-%s",
+ opp_table->prop_name);
+ prop = of_find_property(opp->np, name, NULL);
+ }
+
+ if (!prop) {
+ /* Search for "opp-microamp" */
+ sprintf(name, "opp-microamp");
+ prop = of_find_property(opp->np, name, NULL);
+ }
+
+ if (prop && !of_property_read_u32(opp->np, name, &val))
+ opp->u_amp = val;
+
+ return 0;
+}
+
+/**
+ * dev_pm_opp_of_remove_table() - Free OPP table entries created from static DT
+ * entries
+ * @dev: device pointer used to lookup OPP table.
+ *
+ * Free OPPs created using static entries present in DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_remove_table(struct device *dev)
+{
+ _dev_pm_opp_remove_table(dev, false);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
+
+/* Returns opp descriptor node for a device, caller must do of_node_put() */
+struct device_node *_of_get_opp_desc_node(struct device *dev)
+{
+ /*
+ * TODO: Support for multiple OPP tables.
+ *
+ * There should be only ONE phandle present in "operating-points-v2"
+ * property.
+ */
+
+ return of_parse_phandle(dev->of_node, "operating-points-v2", 0);
+}
+
+/**
+ * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
+ * @dev: device for which we do this operation
+ * @np: device node
+ *
+ * This function adds an opp definition to the opp table and returns status. The
+ * opp can be controlled using dev_pm_opp_enable/disable functions and may be
+ * removed by dev_pm_opp_remove.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0 On success OR
+ * Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST Freq are same and volt are different OR
+ * Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM Memory allocation failure
+ * -EINVAL Failed parsing the OPP node
+ */
+static int _opp_add_static_v2(struct device *dev, struct device_node *np)
+{
+ struct opp_table *opp_table;
+ struct dev_pm_opp *new_opp;
+ u64 rate;
+ u32 val;
+ int ret;
+
+ /* Hold our table modification lock here */
+ mutex_lock(&opp_table_lock);
+
+ new_opp = _allocate_opp(dev, &opp_table);
+ if (!new_opp) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = of_property_read_u64(np, "opp-hz", &rate);
+ if (ret < 0) {
+ dev_err(dev, "%s: opp-hz not found\n", __func__);
+ goto free_opp;
+ }
+
+ /* Check if the OPP supports hardware's hierarchy of versions or not */
+ if (!_opp_is_supported(dev, opp_table, np)) {
+ dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
+ goto free_opp;
+ }
+
+ /*
+ * Rate is defined as an unsigned long in clk API, and so casting
+ * explicitly to its type. Must be fixed once rate is 64 bit
+ * guaranteed in clk API.
+ */
+ new_opp->rate = (unsigned long)rate;
+ new_opp->turbo = of_property_read_bool(np, "turbo-mode");
+
+ new_opp->np = np;
+ new_opp->dynamic = false;
+ new_opp->available = true;
+
+ if (!of_property_read_u32(np, "clock-latency-ns", &val))
+ new_opp->clock_latency_ns = val;
+
+ ret = opp_parse_supplies(new_opp, dev, opp_table);
+ if (ret)
+ goto free_opp;
+
+ ret = _opp_add(dev, new_opp, opp_table);
+ if (ret)
+ goto free_opp;
+
+ /* OPP to select on device suspend */
+ if (of_property_read_bool(np, "opp-suspend")) {
+ if (opp_table->suspend_opp) {
+ dev_warn(dev, "%s: Multiple suspend OPPs found (%lu %lu)\n",
+ __func__, opp_table->suspend_opp->rate,
+ new_opp->rate);
+ } else {
+ new_opp->suspend = true;
+ opp_table->suspend_opp = new_opp;
+ }
+ }
+
+ if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max)
+ opp_table->clock_latency_ns_max = new_opp->clock_latency_ns;
+
+ mutex_unlock(&opp_table_lock);
+
+ pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n",
+ __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt,
+ new_opp->u_volt_min, new_opp->u_volt_max,
+ new_opp->clock_latency_ns);
+
+ /*
+ * Notify the changes in the availability of the operable
+ * frequency/voltage list.
+ */
+ srcu_notifier_call_chain(&opp_table->srcu_head, OPP_EVENT_ADD, new_opp);
+ return 0;
+
+free_opp:
+ _opp_remove(opp_table, new_opp, false);
+unlock:
+ mutex_unlock(&opp_table_lock);
+ return ret;
+}
+
+/* Initializes OPP tables based on new bindings */
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
+{
+ struct device_node *np;
+ struct opp_table *opp_table;
+ int ret = 0, count = 0;
+
+ mutex_lock(&opp_table_lock);
+
+ opp_table = _managed_opp(opp_np);
+ if (opp_table) {
+ /* OPPs are already managed */
+ if (!_add_opp_dev(dev, opp_table))
+ ret = -ENOMEM;
+ mutex_unlock(&opp_table_lock);
+ return ret;
+ }
+ mutex_unlock(&opp_table_lock);
+
+ /* We have opp-table node now, iterate over it and add OPPs */
+ for_each_available_child_of_node(opp_np, np) {
+ count++;
+
+ ret = _opp_add_static_v2(dev, np);
+ if (ret) {
+ dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
+ ret);
+ goto free_table;
+ }
+ }
+
+ /* There should be one of more OPP defined */
+ if (WARN_ON(!count))
+ return -ENOENT;
+
+ mutex_lock(&opp_table_lock);
+
+ opp_table = _find_opp_table(dev);
+ if (WARN_ON(IS_ERR(opp_table))) {
+ ret = PTR_ERR(opp_table);
+ mutex_unlock(&opp_table_lock);
+ goto free_table;
+ }
+
+ opp_table->np = opp_np;
+ opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+
+ mutex_unlock(&opp_table_lock);
+
+ return 0;
+
+free_table:
+ dev_pm_opp_of_remove_table(dev);
+
+ return ret;
+}
+
+/* Initializes OPP tables based on old-deprecated bindings */
+static int _of_add_opp_table_v1(struct device *dev)
+{
+ const struct property *prop;
+ const __be32 *val;
+ int nr;
+
+ prop = of_find_property(dev->of_node, "operating-points", NULL);
+ if (!prop)
+ return -ENODEV;
+ if (!prop->value)
+ return -ENODATA;
+
+ /*
+ * Each OPP is a set of tuples consisting of frequency and
+ * voltage like <freq-kHz vol-uV>.
+ */
+ nr = prop->length / sizeof(u32);
+ if (nr % 2) {
+ dev_err(dev, "%s: Invalid OPP table\n", __func__);
+ return -EINVAL;
+ }
+
+ val = prop->value;
+ while (nr) {
+ unsigned long freq = be32_to_cpup(val++) * 1000;
+ unsigned long volt = be32_to_cpup(val++);
+
+ if (_opp_add_v1(dev, freq, volt, false))
+ dev_warn(dev, "%s: Failed to add OPP %ld\n",
+ __func__, freq);
+ nr -= 2;
+ }
+
+ return 0;
+}
+
+/**
+ * dev_pm_opp_of_add_table() - Initialize opp table from device tree
+ * @dev: device pointer used to lookup OPP table.
+ *
+ * Register the initial OPP table with the OPP library for given device.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ *
+ * Return:
+ * 0 On success OR
+ * Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST Freq are same and volt are different OR
+ * Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM Memory allocation failure
+ * -ENODEV when 'operating-points' property is not found or is invalid data
+ * in device node.
+ * -ENODATA when empty 'operating-points' property is found
+ * -EINVAL when invalid entries are found in opp-v2 table
+ */
+int dev_pm_opp_of_add_table(struct device *dev)
+{
+ struct device_node *opp_np;
+ int ret;
+
+ /*
+ * OPPs have two version of bindings now. The older one is deprecated,
+ * try for the new binding first.
+ */
+ opp_np = _of_get_opp_desc_node(dev);
+ if (!opp_np) {
+ /*
+ * Try old-deprecated bindings for backward compatibility with
+ * older dtbs.
+ */
+ return _of_add_opp_table_v1(dev);
+ }
+
+ ret = _of_add_opp_table_v2(dev, opp_np);
+ of_node_put(opp_np);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
+
+/* CPU device specific helpers */
+
+/**
+ * dev_pm_opp_of_cpumask_remove_table() - Removes OPP table for @cpumask
+ * @cpumask: cpumask for which OPP table needs to be removed
+ *
+ * This removes the OPP tables for CPUs present in the @cpumask.
+ * This should be used only to remove static entries created from DT.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
+{
+ _dev_pm_opp_cpumask_remove_table(cpumask, true);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
+
+/**
+ * dev_pm_opp_of_cpumask_add_table() - Adds OPP table for @cpumask
+ * @cpumask: cpumask for which OPP table needs to be added.
+ *
+ * This adds the OPP tables for CPUs present in the @cpumask.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
+{
+ struct device *cpu_dev;
+ int cpu, ret = 0;
+
+ WARN_ON(cpumask_empty(cpumask));
+
+ for_each_cpu(cpu, cpumask) {
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__,
+ cpu);
+ continue;
+ }
+
+ ret = dev_pm_opp_of_add_table(cpu_dev);
+ if (ret) {
+ pr_err("%s: couldn't find opp table for cpu:%d, %d\n",
+ __func__, cpu, ret);
+
+ /* Free all other OPPs */
+ dev_pm_opp_of_cpumask_remove_table(cpumask);
+ break;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
+
+/*
+ * Works only for OPP v2 bindings.
+ *
+ * Returns -ENOENT if operating-points-v2 bindings aren't supported.
+ */
+/**
+ * dev_pm_opp_of_get_sharing_cpus() - Get cpumask of CPUs sharing OPPs with
+ * @cpu_dev using operating-points-v2
+ * bindings.
+ *
+ * @cpu_dev: CPU device for which we do this operation
+ * @cpumask: cpumask to update with information of sharing CPUs
+ *
+ * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
+ *
+ * Returns -ENOENT if operating-points-v2 isn't present for @cpu_dev.
+ *
+ * Locking: The internal opp_table and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev,
+ struct cpumask *cpumask)
+{
+ struct device_node *np, *tmp_np;
+ struct device *tcpu_dev;
+ int cpu, ret = 0;
+
+ /* Get OPP descriptor node */
+ np = _of_get_opp_desc_node(cpu_dev);
+ if (!np) {
+ dev_dbg(cpu_dev, "%s: Couldn't find cpu_dev node.\n", __func__);
+ return -ENOENT;
+ }
+
+ cpumask_set_cpu(cpu_dev->id, cpumask);
+
+ /* OPPs are shared ? */
+ if (!of_property_read_bool(np, "opp-shared"))
+ goto put_cpu_node;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == cpu_dev->id)
+ continue;
+
+ tcpu_dev = get_cpu_device(cpu);
+ if (!tcpu_dev) {
+ dev_err(cpu_dev, "%s: failed to get cpu%d device\n",
+ __func__, cpu);
+ ret = -ENODEV;
+ goto put_cpu_node;
+ }
+
+ /* Get OPP descriptor node */
+ tmp_np = _of_get_opp_desc_node(tcpu_dev);
+ if (!tmp_np) {
+ dev_err(tcpu_dev, "%s: Couldn't find tcpu_dev node.\n",
+ __func__);
+ ret = -ENOENT;
+ goto put_cpu_node;
+ }
+
+ /* CPUs are sharing opp node */
+ if (np == tmp_np)
+ cpumask_set_cpu(cpu, cpumask);
+
+ of_node_put(tmp_np);
+ }
+
+put_cpu_node:
+ of_node_put(np);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index f67f806..20f3be2 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -28,6 +28,8 @@
/* Lock to allow exclusive modification to the device and opp lists */
extern struct mutex opp_table_lock;
+extern struct list_head opp_tables;
+
/*
* Internal data structure organization with the OPP layer library is as
* follows:
@@ -183,6 +185,18 @@
struct opp_table *_find_opp_table(struct device *dev);
struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
struct device_node *_of_get_opp_desc_node(struct device *dev);
+void _dev_pm_opp_remove_table(struct device *dev, bool remove_all);
+struct dev_pm_opp *_allocate_opp(struct device *dev, struct opp_table **opp_table);
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table);
+void _opp_remove(struct opp_table *opp_table, struct dev_pm_opp *opp, bool notify);
+int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, bool dynamic);
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of);
+
+#ifdef CONFIG_OF
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev);
+#else
+static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) {}
+#endif
#ifdef CONFIG_DEBUG_FS
void opp_debug_remove_one(struct dev_pm_opp *opp);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 4c70550..b746904 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1506,11 +1506,16 @@
goto out;
}
- ret = callback(dev);
+ ret = pm_runtime_set_active(dev);
if (ret)
goto out;
- pm_runtime_set_active(dev);
+ ret = callback(dev);
+ if (ret) {
+ pm_runtime_set_suspended(dev);
+ goto out;
+ }
+
pm_runtime_mark_last_busy(dev);
out:
pm_runtime_enable(dev);
diff --git a/drivers/base/regmap/regcache-flat.c b/drivers/base/regmap/regcache-flat.c
index 3ee7255..4d2e50b 100644
--- a/drivers/base/regmap/regcache-flat.c
+++ b/drivers/base/regmap/regcache-flat.c
@@ -27,7 +27,7 @@
int i;
unsigned int *cache;
- if (!map || map->reg_stride_order < 0)
+ if (!map || map->reg_stride_order < 0 || !map->max_register)
return -EINVAL;
map->cache = kcalloc(regcache_flat_get_index(map, map->max_register)
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 4170b7d..df7ff729 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -529,7 +529,7 @@
* regcache_cache_bypass: Put a register map into cache bypass mode
*
* @map: map to configure
- * @cache_bypass: flag if changes should not be written to the hardware
+ * @cache_bypass: flag if changes should not be written to the cache
*
* When a register map is marked with the cache bypass option, writes
* to the register map API will only update the hardware and not the
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 1e25b52..7b1c412 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -104,7 +104,7 @@
#define IPMI_BT_INTMASK_ENABLE_IRQ_BIT 1
enum si_type {
- SI_KCS, SI_SMIC, SI_BT
+ SI_KCS, SI_SMIC, SI_BT
};
static const char * const si_to_str[] = { "kcs", "smic", "bt" };
@@ -410,7 +410,7 @@
rv = SI_SM_CALL_WITHOUT_DELAY;
}
- out:
+out:
return rv;
}
@@ -539,7 +539,7 @@
static void handle_flags(struct smi_info *smi_info)
{
- retry:
+retry:
if (smi_info->msg_flags & WDT_PRE_TIMEOUT_INT) {
/* Watchdog pre-timeout */
smi_inc_stat(smi_info, watchdog_pretimeouts);
@@ -831,7 +831,7 @@
{
enum si_sm_result si_sm_result;
- restart:
+restart:
/*
* There used to be a loop here that waited a little while
* (around 25us) before giving up. That turned out to be
@@ -944,7 +944,7 @@
smi_info->timer_running = false;
}
- out:
+out:
return si_sm_result;
}
@@ -1190,7 +1190,7 @@
timeout = jiffies + SI_TIMEOUT_JIFFIES;
}
- do_mod_timer:
+do_mod_timer:
if (smi_result != SI_SM_IDLE)
smi_mod_timer(smi_info, timeout);
else
@@ -1576,10 +1576,9 @@
if (request_region(addr + idx * info->io.regspacing,
info->io.regsize, DEVICE_NAME) == NULL) {
/* Undo allocations */
- while (idx--) {
+ while (idx--)
release_region(addr + idx * info->io.regspacing,
info->io.regsize);
- }
return -EIO;
}
}
@@ -1638,25 +1637,28 @@
}
#endif
-static void mem_cleanup(struct smi_info *info)
+static void mem_region_cleanup(struct smi_info *info, int num)
{
unsigned long addr = info->io.addr_data;
- int mapsize;
+ int idx;
+ for (idx = 0; idx < num; idx++)
+ release_mem_region(addr + idx * info->io.regspacing,
+ info->io.regsize);
+}
+
+static void mem_cleanup(struct smi_info *info)
+{
if (info->io.addr) {
iounmap(info->io.addr);
-
- mapsize = ((info->io_size * info->io.regspacing)
- - (info->io.regspacing - info->io.regsize));
-
- release_mem_region(addr, mapsize);
+ mem_region_cleanup(info, info->io_size);
}
}
static int mem_setup(struct smi_info *info)
{
unsigned long addr = info->io.addr_data;
- int mapsize;
+ int mapsize, idx;
if (!addr)
return -ENODEV;
@@ -1693,6 +1695,21 @@
}
/*
+ * Some BIOSes reserve disjoint memory regions in their ACPI
+ * tables. This causes problems when trying to request the
+ * entire region. Therefore we must request each register
+ * separately.
+ */
+ for (idx = 0; idx < info->io_size; idx++) {
+ if (request_mem_region(addr + idx * info->io.regspacing,
+ info->io.regsize, DEVICE_NAME) == NULL) {
+ /* Undo allocations */
+ mem_region_cleanup(info, idx);
+ return -EIO;
+ }
+ }
+
+ /*
* Calculate the total amount of memory to claim. This is an
* unusual looking calculation, but it avoids claiming any
* more memory than it has to. It will claim everything
@@ -1701,13 +1718,9 @@
*/
mapsize = ((info->io_size * info->io.regspacing)
- (info->io.regspacing - info->io.regsize));
-
- if (request_mem_region(addr, mapsize, DEVICE_NAME) == NULL)
- return -EIO;
-
info->io.addr = ioremap(addr, mapsize);
if (info->io.addr == NULL) {
- release_mem_region(addr, mapsize);
+ mem_region_cleanup(info, info->io_size);
return -EIO;
}
return 0;
@@ -1975,7 +1988,7 @@
}
}
rv = len;
- out:
+out:
kfree(str);
return rv;
}
@@ -2945,7 +2958,7 @@
/* Check and record info from the get device id, in case we need it. */
rv = ipmi_demangle_device_id(resp, resp_len, &smi_info->device_id);
- out:
+out:
kfree(resp);
return rv;
}
@@ -3192,7 +3205,7 @@
else
smi_info->supports_event_msg_buff = true;
- out:
+out:
kfree(resp);
return rv;
}
@@ -3718,10 +3731,10 @@
return 0;
- out_err_stop_timer:
+out_err_stop_timer:
wait_for_timer_and_thread(new_smi);
- out_err:
+out_err:
new_smi->interrupt_disabled = true;
if (new_smi->intf) {
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 8b3be8b..097c868 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1870,7 +1870,7 @@
return -EIO;
}
- myaddr = spmi->addr.address >> 1;
+ myaddr = spmi->addr.address & 0x7f;
return new_ssif_client(myaddr, NULL, 0, 0, SI_SPMI);
}
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index a7f4585..b7445b6 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -18,7 +18,11 @@
if CPU_FREQ
+config CPU_FREQ_GOV_ATTR_SET
+ bool
+
config CPU_FREQ_GOV_COMMON
+ select CPU_FREQ_GOV_ATTR_SET
select IRQ_WORK
bool
@@ -103,6 +107,17 @@
Be aware that not all cpufreq drivers support the conservative
governor. If unsure have a look at the help section of the
driver. Fallback governor will be the performance governor.
+
+config CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+ bool "schedutil"
+ depends on SMP
+ select CPU_FREQ_GOV_SCHEDUTIL
+ select CPU_FREQ_GOV_PERFORMANCE
+ help
+ Use the 'schedutil' CPUFreq governor by default. If unsure,
+ have a look at the help section of that governor. The fallback
+ governor will be 'performance'.
+
endchoice
config CPU_FREQ_GOV_PERFORMANCE
@@ -184,6 +199,26 @@
If in doubt, say N.
+config CPU_FREQ_GOV_SCHEDUTIL
+ tristate "'schedutil' cpufreq policy governor"
+ depends on CPU_FREQ && SMP
+ select CPU_FREQ_GOV_ATTR_SET
+ select IRQ_WORK
+ help
+ This governor makes decisions based on the utilization data provided
+ by the scheduler. It sets the CPU frequency to be proportional to
+ the utilization/capacity ratio coming from the scheduler. If the
+ utilization is frequency-invariant, the new frequency is also
+ proportional to the maximum available frequency. If that is not the
+ case, it is proportional to the current frequency of the CPU. The
+ frequency tipping point is at utilization/capacity equal to 80% in
+ both cases.
+
+ To compile this driver as a module, choose M here: the module will
+ be called cpufreq_schedutil.
+
+ If in doubt, say N.
+
comment "CPU frequency scaling drivers"
config CPUFREQ_DT
@@ -191,6 +226,7 @@
depends on HAVE_CLK && OF
# if CPU_THERMAL is on and THERMAL=m, CPUFREQ_DT cannot be =y:
depends on !CPU_THERMAL || THERMAL
+ select CPUFREQ_DT_PLATDEV
select PM_OPP
help
This adds a generic DT based cpufreq driver for frequency management.
@@ -199,6 +235,15 @@
If in doubt, say N.
+config CPUFREQ_DT_PLATDEV
+ bool
+ help
+ This adds a generic DT based cpufreq platdev driver for frequency
+ management. This creates a 'cpufreq-dt' platform device, on the
+ supported platforms.
+
+ If in doubt, say N.
+
if X86
source "drivers/cpufreq/Kconfig.x86"
endif
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 14b1f93..d89b8af 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -50,15 +50,6 @@
If in doubt, say N.
-config ARM_HISI_ACPU_CPUFREQ
- tristate "Hisilicon ACPU CPUfreq driver"
- depends on ARCH_HISI && CPUFREQ_DT
- select PM_OPP
- help
- This enables the hisilicon ACPU CPUfreq driver.
-
- If in doubt, say N.
-
config ARM_IMX6Q_CPUFREQ
tristate "Freescale i.MX6 cpufreq support"
depends on ARCH_MXC
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index c59bdcb..adbd1de 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -5,6 +5,7 @@
config X86_INTEL_PSTATE
bool "Intel P state control"
depends on X86
+ select ACPI_PROCESSOR if ACPI
help
This driver provides a P state for Intel core processors.
The driver implements an internal governor and will become
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9e63fb1..e1eb11e 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -11,8 +11,10 @@
obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o
obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o
obj-$(CONFIG_CPU_FREQ_GOV_COMMON) += cpufreq_governor.o
+obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o
obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o
+obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o
##################################################################################
# x86 drivers.
@@ -53,7 +55,6 @@
obj-$(CONFIG_UX500_SOC_DB8500) += dbx500-cpufreq.o
obj-$(CONFIG_ARM_EXYNOS5440_CPUFREQ) += exynos5440-cpufreq.o
obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o
-obj-$(CONFIG_ARM_HISI_ACPU_CPUFREQ) += hisi-acpu-cpufreq.o
obj-$(CONFIG_ARM_IMX6Q_CPUFREQ) += imx6q-cpufreq.o
obj-$(CONFIG_ARM_INTEGRATOR) += integrator-cpufreq.o
obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o
@@ -78,6 +79,7 @@
obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o
obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o
obj-$(CONFIG_ACPI_CPPC_CPUFREQ) += cppc_cpufreq.o
+obj-$(CONFIG_MACH_MVEBU_V7) += mvebu-cpufreq.o
##################################################################################
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index fb57121..32a1505 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -25,6 +25,8 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -50,8 +52,6 @@
MODULE_DESCRIPTION("ACPI Processor P-States Driver");
MODULE_LICENSE("GPL");
-#define PFX "acpi-cpufreq: "
-
enum {
UNDEFINED_CAPABLE = 0,
SYSTEM_INTEL_MSR_CAPABLE,
@@ -65,7 +65,6 @@
#define MSR_K7_HWCR_CPB_DIS (1ULL << 25)
struct acpi_cpufreq_data {
- struct cpufreq_frequency_table *freq_table;
unsigned int resume;
unsigned int cpu_feature;
unsigned int acpi_perf_cpu;
@@ -200,8 +199,9 @@
return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
}
-static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
+static unsigned extract_io(struct cpufreq_policy *policy, u32 value)
{
+ struct acpi_cpufreq_data *data = policy->driver_data;
struct acpi_processor_performance *perf;
int i;
@@ -209,13 +209,14 @@
for (i = 0; i < perf->state_count; i++) {
if (value == perf->states[i].status)
- return data->freq_table[i].frequency;
+ return policy->freq_table[i].frequency;
}
return 0;
}
-static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
+static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
{
+ struct acpi_cpufreq_data *data = policy->driver_data;
struct cpufreq_frequency_table *pos;
struct acpi_processor_performance *perf;
@@ -226,20 +227,22 @@
perf = to_perf_data(data);
- cpufreq_for_each_entry(pos, data->freq_table)
+ cpufreq_for_each_entry(pos, policy->freq_table)
if (msr == perf->states[pos->driver_data].status)
return pos->frequency;
- return data->freq_table[0].frequency;
+ return policy->freq_table[0].frequency;
}
-static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
+static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
{
+ struct acpi_cpufreq_data *data = policy->driver_data;
+
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
case SYSTEM_AMD_MSR_CAPABLE:
- return extract_msr(val, data);
+ return extract_msr(policy, val);
case SYSTEM_IO_CAPABLE:
- return extract_io(val, data);
+ return extract_io(policy, val);
default:
return 0;
}
@@ -374,11 +377,11 @@
return 0;
data = policy->driver_data;
- if (unlikely(!data || !data->freq_table))
+ if (unlikely(!data || !policy->freq_table))
return 0;
- cached_freq = data->freq_table[to_perf_data(data)->state].frequency;
- freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data);
+ cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
+ freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
if (freq != cached_freq) {
/*
* The dreaded BIOS frequency change behind our back.
@@ -392,14 +395,15 @@
return freq;
}
-static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
- struct acpi_cpufreq_data *data)
+static unsigned int check_freqs(struct cpufreq_policy *policy,
+ const struct cpumask *mask, unsigned int freq)
{
+ struct acpi_cpufreq_data *data = policy->driver_data;
unsigned int cur_freq;
unsigned int i;
for (i = 0; i < 100; i++) {
- cur_freq = extract_freq(get_cur_val(mask, data), data);
+ cur_freq = extract_freq(policy, get_cur_val(mask, data));
if (cur_freq == freq)
return 1;
udelay(10);
@@ -416,12 +420,12 @@
unsigned int next_perf_state = 0; /* Index into perf table */
int result = 0;
- if (unlikely(data == NULL || data->freq_table == NULL)) {
+ if (unlikely(!data)) {
return -ENODEV;
}
perf = to_perf_data(data);
- next_perf_state = data->freq_table[index].driver_data;
+ next_perf_state = policy->freq_table[index].driver_data;
if (perf->state == next_perf_state) {
if (unlikely(data->resume)) {
pr_debug("Called after resume, resetting to P%d\n",
@@ -444,8 +448,8 @@
drv_write(data, mask, perf->states[next_perf_state].control);
if (acpi_pstate_strict) {
- if (!check_freqs(mask, data->freq_table[index].frequency,
- data)) {
+ if (!check_freqs(policy, mask,
+ policy->freq_table[index].frequency)) {
pr_debug("acpi_cpufreq_target failed (%d)\n",
policy->cpu);
result = -EAGAIN;
@@ -458,6 +462,43 @@
return result;
}
+unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ struct acpi_cpufreq_data *data = policy->driver_data;
+ struct acpi_processor_performance *perf;
+ struct cpufreq_frequency_table *entry;
+ unsigned int next_perf_state, next_freq, freq;
+
+ /*
+ * Find the closest frequency above target_freq.
+ *
+ * The table is sorted in the reverse order with respect to the
+ * frequency and all of the entries are valid (see the initialization).
+ */
+ entry = policy->freq_table;
+ do {
+ entry++;
+ freq = entry->frequency;
+ } while (freq >= target_freq && freq != CPUFREQ_TABLE_END);
+ entry--;
+ next_freq = entry->frequency;
+ next_perf_state = entry->driver_data;
+
+ perf = to_perf_data(data);
+ if (perf->state == next_perf_state) {
+ if (unlikely(data->resume))
+ data->resume = 0;
+ else
+ return next_freq;
+ }
+
+ data->cpu_freq_write(&perf->control_register,
+ perf->states[next_perf_state].control);
+ perf->state = next_perf_state;
+ return next_freq;
+}
+
static unsigned long
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
{
@@ -611,10 +652,7 @@
if ((c->x86 == 15) &&
(c->x86_model == 6) &&
(c->x86_mask == 8)) {
- printk(KERN_INFO "acpi-cpufreq: Intel(R) "
- "Xeon(R) 7100 Errata AL30, processors may "
- "lock up on frequency changes: disabling "
- "acpi-cpufreq.\n");
+ pr_info("Intel(R) Xeon(R) 7100 Errata AL30, processors may lock up on frequency changes: disabling acpi-cpufreq\n");
return -ENODEV;
}
}
@@ -631,6 +669,7 @@
unsigned int result = 0;
struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
struct acpi_processor_performance *perf;
+ struct cpufreq_frequency_table *freq_table;
#ifdef CONFIG_SMP
static int blacklisted;
#endif
@@ -690,7 +729,7 @@
cpumask_copy(data->freqdomain_cpus,
topology_sibling_cpumask(cpu));
policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
- pr_info_once(PFX "overriding BIOS provided _PSD data\n");
+ pr_info_once("overriding BIOS provided _PSD data\n");
}
#endif
@@ -742,9 +781,9 @@
goto err_unreg;
}
- data->freq_table = kzalloc(sizeof(*data->freq_table) *
+ freq_table = kzalloc(sizeof(*freq_table) *
(perf->state_count+1), GFP_KERNEL);
- if (!data->freq_table) {
+ if (!freq_table) {
result = -ENOMEM;
goto err_unreg;
}
@@ -762,30 +801,29 @@
if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
policy->cpuinfo.transition_latency > 20 * 1000) {
policy->cpuinfo.transition_latency = 20 * 1000;
- printk_once(KERN_INFO
- "P-state transition latency capped at 20 uS\n");
+ pr_info_once("P-state transition latency capped at 20 uS\n");
}
/* table init */
for (i = 0; i < perf->state_count; i++) {
if (i > 0 && perf->states[i].core_frequency >=
- data->freq_table[valid_states-1].frequency / 1000)
+ freq_table[valid_states-1].frequency / 1000)
continue;
- data->freq_table[valid_states].driver_data = i;
- data->freq_table[valid_states].frequency =
+ freq_table[valid_states].driver_data = i;
+ freq_table[valid_states].frequency =
perf->states[i].core_frequency * 1000;
valid_states++;
}
- data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+ freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
perf->state = 0;
- result = cpufreq_table_validate_and_show(policy, data->freq_table);
+ result = cpufreq_table_validate_and_show(policy, freq_table);
if (result)
goto err_freqfree;
if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
- printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+ pr_warn(FW_WARN "P-state 0 is not max freq\n");
switch (perf->control_register.space_id) {
case ACPI_ADR_SPACE_SYSTEM_IO:
@@ -821,10 +859,13 @@
*/
data->resume = 1;
+ policy->fast_switch_possible = !acpi_pstate_strict &&
+ !(policy_is_shared(policy) && policy->shared_type != CPUFREQ_SHARED_TYPE_ANY);
+
return result;
err_freqfree:
- kfree(data->freq_table);
+ kfree(freq_table);
err_unreg:
acpi_processor_unregister_performance(cpu);
err_free_mask:
@@ -842,13 +883,12 @@
pr_debug("acpi_cpufreq_cpu_exit\n");
- if (data) {
- policy->driver_data = NULL;
- acpi_processor_unregister_performance(data->acpi_perf_cpu);
- free_cpumask_var(data->freqdomain_cpus);
- kfree(data->freq_table);
- kfree(data);
- }
+ policy->fast_switch_possible = false;
+ policy->driver_data = NULL;
+ acpi_processor_unregister_performance(data->acpi_perf_cpu);
+ free_cpumask_var(data->freqdomain_cpus);
+ kfree(policy->freq_table);
+ kfree(data);
return 0;
}
@@ -876,6 +916,7 @@
static struct cpufreq_driver acpi_cpufreq_driver = {
.verify = cpufreq_generic_frequency_table_verify,
.target_index = acpi_cpufreq_target,
+ .fast_switch = acpi_cpufreq_fast_switch,
.bios_limit = acpi_processor_get_bios_limit,
.init = acpi_cpufreq_cpu_init,
.exit = acpi_cpufreq_cpu_exit,
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index c251247..4180422 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -298,7 +298,8 @@
return 0;
}
-static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
@@ -308,11 +309,12 @@
clk_put(clk[cluster]);
dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table[cluster]);
if (arm_bL_ops->free_opp_table)
- arm_bL_ops->free_opp_table(cpu_dev);
+ arm_bL_ops->free_opp_table(cpumask);
dev_dbg(cpu_dev, "%s: cluster: %d\n", __func__, cluster);
}
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
u32 cluster = cpu_to_cluster(cpu_dev->id);
int i;
@@ -321,7 +323,7 @@
return;
if (cluster < MAX_CLUSTERS)
- return _put_cluster_clk_and_freq_table(cpu_dev);
+ return _put_cluster_clk_and_freq_table(cpu_dev, cpumask);
for_each_present_cpu(i) {
struct device *cdev = get_cpu_device(i);
@@ -330,14 +332,15 @@
return;
}
- _put_cluster_clk_and_freq_table(cdev);
+ _put_cluster_clk_and_freq_table(cdev, cpumask);
}
/* free virtual table */
kfree(freq_table[cluster]);
}
-static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
u32 cluster = raw_cpu_to_cluster(cpu_dev->id);
int ret;
@@ -345,7 +348,7 @@
if (freq_table[cluster])
return 0;
- ret = arm_bL_ops->init_opp_table(cpu_dev);
+ ret = arm_bL_ops->init_opp_table(cpumask);
if (ret) {
dev_err(cpu_dev, "%s: init_opp_table failed, cpu: %d, err: %d\n",
__func__, cpu_dev->id, ret);
@@ -374,14 +377,15 @@
free_opp_table:
if (arm_bL_ops->free_opp_table)
- arm_bL_ops->free_opp_table(cpu_dev);
+ arm_bL_ops->free_opp_table(cpumask);
out:
dev_err(cpu_dev, "%s: Failed to get data for cluster: %d\n", __func__,
cluster);
return ret;
}
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev,
+ const struct cpumask *cpumask)
{
u32 cluster = cpu_to_cluster(cpu_dev->id);
int i, ret;
@@ -390,7 +394,7 @@
return 0;
if (cluster < MAX_CLUSTERS) {
- ret = _get_cluster_clk_and_freq_table(cpu_dev);
+ ret = _get_cluster_clk_and_freq_table(cpu_dev, cpumask);
if (ret)
atomic_dec(&cluster_usage[cluster]);
return ret;
@@ -407,7 +411,7 @@
return -ENODEV;
}
- ret = _get_cluster_clk_and_freq_table(cdev);
+ ret = _get_cluster_clk_and_freq_table(cdev, cpumask);
if (ret)
goto put_clusters;
}
@@ -433,7 +437,7 @@
return -ENODEV;
}
- _put_cluster_clk_and_freq_table(cdev);
+ _put_cluster_clk_and_freq_table(cdev, cpumask);
}
atomic_dec(&cluster_usage[cluster]);
@@ -455,18 +459,6 @@
return -ENODEV;
}
- ret = get_cluster_clk_and_freq_table(cpu_dev);
- if (ret)
- return ret;
-
- ret = cpufreq_table_validate_and_show(policy, freq_table[cur_cluster]);
- if (ret) {
- dev_err(cpu_dev, "CPU %d, cluster: %d invalid freq table\n",
- policy->cpu, cur_cluster);
- put_cluster_clk_and_freq_table(cpu_dev);
- return ret;
- }
-
if (cur_cluster < MAX_CLUSTERS) {
int cpu;
@@ -479,6 +471,18 @@
per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
}
+ ret = get_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+ if (ret)
+ return ret;
+
+ ret = cpufreq_table_validate_and_show(policy, freq_table[cur_cluster]);
+ if (ret) {
+ dev_err(cpu_dev, "CPU %d, cluster: %d invalid freq table\n",
+ policy->cpu, cur_cluster);
+ put_cluster_clk_and_freq_table(cpu_dev, policy->cpus);
+ return ret;
+ }
+
if (arm_bL_ops->get_transition_latency)
policy->cpuinfo.transition_latency =
arm_bL_ops->get_transition_latency(cpu_dev);
@@ -509,7 +513,7 @@
return -ENODEV;
}
- put_cluster_clk_and_freq_table(cpu_dev);
+ put_cluster_clk_and_freq_table(cpu_dev, policy->related_cpus);
dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
return 0;
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index b88889d..184d7c3 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -30,11 +30,11 @@
* This must set opp table for cpu_dev in a similar way as done by
* dev_pm_opp_of_add_table().
*/
- int (*init_opp_table)(struct device *cpu_dev);
+ int (*init_opp_table)(const struct cpumask *cpumask);
/* Optional */
int (*get_transition_latency)(struct device *cpu_dev);
- void (*free_opp_table)(struct device *cpu_dev);
+ void (*free_opp_table)(const struct cpumask *cpumask);
};
int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
index 16ddeef..39b3f51 100644
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ b/drivers/cpufreq/arm_big_little_dt.c
@@ -43,23 +43,6 @@
return np;
}
-static int dt_init_opp_table(struct device *cpu_dev)
-{
- struct device_node *np;
- int ret;
-
- np = of_node_get(cpu_dev->of_node);
- if (!np) {
- pr_err("failed to find cpu%d node\n", cpu_dev->id);
- return -ENOENT;
- }
-
- ret = dev_pm_opp_of_add_table(cpu_dev);
- of_node_put(np);
-
- return ret;
-}
-
static int dt_get_transition_latency(struct device *cpu_dev)
{
struct device_node *np;
@@ -81,8 +64,8 @@
static struct cpufreq_arm_bL_ops dt_bL_ops = {
.name = "dt-bl",
.get_transition_latency = dt_get_transition_latency,
- .init_opp_table = dt_init_opp_table,
- .free_opp_table = dev_pm_opp_of_remove_table,
+ .init_opp_table = dev_pm_opp_of_cpumask_add_table,
+ .free_opp_table = dev_pm_opp_of_cpumask_remove_table,
};
static int generic_bL_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 7c0bdfb..8882b8e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -173,4 +173,25 @@
return -ENODEV;
}
+static void __exit cppc_cpufreq_exit(void)
+{
+ struct cpudata *cpu;
+ int i;
+
+ cpufreq_unregister_driver(&cppc_cpufreq_driver);
+
+ for_each_possible_cpu(i) {
+ cpu = all_cpu_data[i];
+ free_cpumask_var(cpu->shared_cpu_map);
+ kfree(cpu);
+ }
+
+ kfree(all_cpu_data);
+}
+
+module_exit(cppc_cpufreq_exit);
+MODULE_AUTHOR("Ashwin Chaugule");
+MODULE_DESCRIPTION("CPUFreq driver based on the ACPI CPPC v5.0+ spec");
+MODULE_LICENSE("GPL");
+
late_initcall(cppc_cpufreq_init);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
new file mode 100644
index 0000000..3646b14
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2016 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+static const struct of_device_id machines[] __initconst = {
+ { .compatible = "allwinner,sun4i-a10", },
+ { .compatible = "allwinner,sun5i-a10s", },
+ { .compatible = "allwinner,sun5i-a13", },
+ { .compatible = "allwinner,sun5i-r8", },
+ { .compatible = "allwinner,sun6i-a31", },
+ { .compatible = "allwinner,sun6i-a31s", },
+ { .compatible = "allwinner,sun7i-a20", },
+ { .compatible = "allwinner,sun8i-a23", },
+ { .compatible = "allwinner,sun8i-a33", },
+ { .compatible = "allwinner,sun8i-a83t", },
+ { .compatible = "allwinner,sun8i-h3", },
+
+ { .compatible = "hisilicon,hi6220", },
+
+ { .compatible = "fsl,imx27", },
+ { .compatible = "fsl,imx51", },
+ { .compatible = "fsl,imx53", },
+ { .compatible = "fsl,imx7d", },
+
+ { .compatible = "marvell,berlin", },
+
+ { .compatible = "samsung,exynos3250", },
+ { .compatible = "samsung,exynos4210", },
+ { .compatible = "samsung,exynos4212", },
+ { .compatible = "samsung,exynos4412", },
+ { .compatible = "samsung,exynos5250", },
+#ifndef CONFIG_BL_SWITCHER
+ { .compatible = "samsung,exynos5420", },
+ { .compatible = "samsung,exynos5800", },
+#endif
+
+ { .compatible = "renesas,emev2", },
+ { .compatible = "renesas,r7s72100", },
+ { .compatible = "renesas,r8a73a4", },
+ { .compatible = "renesas,r8a7740", },
+ { .compatible = "renesas,r8a7778", },
+ { .compatible = "renesas,r8a7779", },
+ { .compatible = "renesas,r8a7790", },
+ { .compatible = "renesas,r8a7791", },
+ { .compatible = "renesas,r8a7793", },
+ { .compatible = "renesas,r8a7794", },
+ { .compatible = "renesas,sh73a0", },
+
+ { .compatible = "rockchip,rk2928", },
+ { .compatible = "rockchip,rk3036", },
+ { .compatible = "rockchip,rk3066a", },
+ { .compatible = "rockchip,rk3066b", },
+ { .compatible = "rockchip,rk3188", },
+ { .compatible = "rockchip,rk3228", },
+ { .compatible = "rockchip,rk3288", },
+ { .compatible = "rockchip,rk3366", },
+ { .compatible = "rockchip,rk3368", },
+ { .compatible = "rockchip,rk3399", },
+
+ { .compatible = "sigma,tango4" },
+
+ { .compatible = "ti,omap2", },
+ { .compatible = "ti,omap3", },
+ { .compatible = "ti,omap4", },
+ { .compatible = "ti,omap5", },
+
+ { .compatible = "xlnx,zynq-7000", },
+};
+
+static int __init cpufreq_dt_platdev_init(void)
+{
+ struct device_node *np = of_find_node_by_path("/");
+
+ if (!np)
+ return -ENODEV;
+
+ if (!of_match_node(machines, np))
+ return -ENODEV;
+
+ of_node_put(of_root);
+
+ return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
+ NULL, 0));
+}
+device_initcall(cpufreq_dt_platdev_init);
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 5f8dbe6..3957de8 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -15,7 +15,6 @@
#include <linux/cpu.h>
#include <linux/cpu_cooling.h>
#include <linux/cpufreq.h>
-#include <linux/cpufreq-dt.h>
#include <linux/cpumask.h>
#include <linux/err.h>
#include <linux/module.h>
@@ -147,7 +146,7 @@
struct clk *cpu_clk;
struct dev_pm_opp *suspend_opp;
unsigned int transition_latency;
- bool opp_v1 = false;
+ bool fallback = false;
const char *name;
int ret;
@@ -167,14 +166,16 @@
/* Get OPP-sharing information from "operating-points-v2" bindings */
ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, policy->cpus);
if (ret) {
+ if (ret != -ENOENT)
+ goto out_put_clk;
+
/*
* operating-points-v2 not supported, fallback to old method of
- * finding shared-OPPs for backward compatibility.
+ * finding shared-OPPs for backward compatibility if the
+ * platform hasn't set sharing CPUs.
*/
- if (ret == -ENOENT)
- opp_v1 = true;
- else
- goto out_put_clk;
+ if (dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus))
+ fallback = true;
}
/*
@@ -214,11 +215,8 @@
goto out_free_opp;
}
- if (opp_v1) {
- struct cpufreq_dt_platform_data *pd = cpufreq_get_driver_data();
-
- if (!pd || !pd->independent_clocks)
- cpumask_setall(policy->cpus);
+ if (fallback) {
+ cpumask_setall(policy->cpus);
/*
* OPP tables are initialized only for policy->cpu, do it for
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index db69eeb..5503d49 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -7,6 +7,8 @@
* BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -56,8 +58,6 @@
MODULE_PARM_DESC(min_fsb,
"Minimum FSB to use, if not defined: current FSB - 50");
-#define PFX "cpufreq-nforce2: "
-
/**
* nforce2_calc_fsb - calculate FSB
* @pll: PLL value
@@ -174,13 +174,13 @@
int pll = 0;
if ((fsb > max_fsb) || (fsb < NFORCE2_MIN_FSB)) {
- printk(KERN_ERR PFX "FSB %d is out of range!\n", fsb);
+ pr_err("FSB %d is out of range!\n", fsb);
return -EINVAL;
}
tfsb = nforce2_fsb_read(0);
if (!tfsb) {
- printk(KERN_ERR PFX "Error while reading the FSB\n");
+ pr_err("Error while reading the FSB\n");
return -EINVAL;
}
@@ -276,8 +276,7 @@
/* local_irq_save(flags); */
if (nforce2_set_fsb(target_fsb) < 0)
- printk(KERN_ERR PFX "Changing FSB to %d failed\n",
- target_fsb);
+ pr_err("Changing FSB to %d failed\n", target_fsb);
else
pr_debug("Changed FSB successfully to %d\n",
target_fsb);
@@ -325,8 +324,7 @@
/* FIX: Get FID from CPU */
if (!fid) {
if (!cpu_khz) {
- printk(KERN_WARNING PFX
- "cpu_khz not set, can't calculate multiplier!\n");
+ pr_warn("cpu_khz not set, can't calculate multiplier!\n");
return -ENODEV;
}
@@ -341,8 +339,8 @@
}
}
- printk(KERN_INFO PFX "FSB currently at %i MHz, FID %d.%d\n", fsb,
- fid / 10, fid % 10);
+ pr_info("FSB currently at %i MHz, FID %d.%d\n",
+ fsb, fid / 10, fid % 10);
/* Set maximum FSB to FSB at boot time */
max_fsb = nforce2_fsb_read(1);
@@ -401,11 +399,9 @@
if (nforce2_dev == NULL)
return -ENODEV;
- printk(KERN_INFO PFX "Detected nForce2 chipset revision %X\n",
- nforce2_dev->revision);
- printk(KERN_INFO PFX
- "FSB changing is maybe unstable and can lead to "
- "crashes and data loss.\n");
+ pr_info("Detected nForce2 chipset revision %X\n",
+ nforce2_dev->revision);
+ pr_info("FSB changing is maybe unstable and can lead to crashes and data loss\n");
return 0;
}
@@ -423,7 +419,7 @@
/* detect chipset */
if (nforce2_detect_chipset()) {
- printk(KERN_INFO PFX "No nForce2 chipset.\n");
+ pr_info("No nForce2 chipset\n");
return -ENODEV;
}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c4acfc5..035513b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -78,6 +78,11 @@
static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
static int cpufreq_start_governor(struct cpufreq_policy *policy);
+static inline int cpufreq_exit_governor(struct cpufreq_policy *policy)
+{
+ return cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+}
+
/**
* Two notifier lists: the "policy" list is involved in the
* validation process for a new CPU frequency policy; the
@@ -429,6 +434,73 @@
}
EXPORT_SYMBOL_GPL(cpufreq_freq_transition_end);
+/*
+ * Fast frequency switching status count. Positive means "enabled", negative
+ * means "disabled" and 0 means "not decided yet".
+ */
+static int cpufreq_fast_switch_count;
+static DEFINE_MUTEX(cpufreq_fast_switch_lock);
+
+static void cpufreq_list_transition_notifiers(void)
+{
+ struct notifier_block *nb;
+
+ pr_info("Registered transition notifiers:\n");
+
+ mutex_lock(&cpufreq_transition_notifier_list.mutex);
+
+ for (nb = cpufreq_transition_notifier_list.head; nb; nb = nb->next)
+ pr_info("%pF\n", nb->notifier_call);
+
+ mutex_unlock(&cpufreq_transition_notifier_list.mutex);
+}
+
+/**
+ * cpufreq_enable_fast_switch - Enable fast frequency switching for policy.
+ * @policy: cpufreq policy to enable fast frequency switching for.
+ *
+ * Try to enable fast frequency switching for @policy.
+ *
+ * The attempt will fail if there is at least one transition notifier registered
+ * at this point, as fast frequency switching is quite fundamentally at odds
+ * with transition notifiers. Thus if successful, it will make registration of
+ * transition notifiers fail going forward.
+ */
+void cpufreq_enable_fast_switch(struct cpufreq_policy *policy)
+{
+ lockdep_assert_held(&policy->rwsem);
+
+ if (!policy->fast_switch_possible)
+ return;
+
+ mutex_lock(&cpufreq_fast_switch_lock);
+ if (cpufreq_fast_switch_count >= 0) {
+ cpufreq_fast_switch_count++;
+ policy->fast_switch_enabled = true;
+ } else {
+ pr_warn("CPU%u: Fast frequency switching not enabled\n",
+ policy->cpu);
+ cpufreq_list_transition_notifiers();
+ }
+ mutex_unlock(&cpufreq_fast_switch_lock);
+}
+EXPORT_SYMBOL_GPL(cpufreq_enable_fast_switch);
+
+/**
+ * cpufreq_disable_fast_switch - Disable fast frequency switching for policy.
+ * @policy: cpufreq policy to disable fast frequency switching for.
+ */
+void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
+{
+ mutex_lock(&cpufreq_fast_switch_lock);
+ if (policy->fast_switch_enabled) {
+ policy->fast_switch_enabled = false;
+ if (!WARN_ON(cpufreq_fast_switch_count <= 0))
+ cpufreq_fast_switch_count--;
+ }
+ mutex_unlock(&cpufreq_fast_switch_lock);
+}
+EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch);
/*********************************************************************
* SYSFS INTERFACE *
@@ -1248,26 +1320,24 @@
*/
static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
{
+ struct cpufreq_policy *policy;
unsigned cpu = dev->id;
- int ret;
dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
- if (cpu_online(cpu)) {
- ret = cpufreq_online(cpu);
- } else {
- /*
- * A hotplug notifier will follow and we will handle it as CPU
- * online then. For now, just create the sysfs link, unless
- * there is no policy or the link is already present.
- */
- struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
+ if (cpu_online(cpu))
+ return cpufreq_online(cpu);
- ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus)
- ? add_cpu_dev_symlink(policy, cpu) : 0;
- }
+ /*
+ * A hotplug notifier will follow and we will handle it as CPU online
+ * then. For now, just create the sysfs link, unless there is no policy
+ * or the link is already present.
+ */
+ policy = per_cpu(cpufreq_cpu_data, cpu);
+ if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
+ return 0;
- return ret;
+ return add_cpu_dev_symlink(policy, cpu);
}
static void cpufreq_offline(unsigned int cpu)
@@ -1319,7 +1389,7 @@
/* If cpu is last user of policy, free policy */
if (has_target()) {
- ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+ ret = cpufreq_exit_governor(policy);
if (ret)
pr_err("%s: Failed to exit governor\n", __func__);
}
@@ -1447,8 +1517,12 @@
ret_freq = cpufreq_driver->get(policy->cpu);
- /* Updating inactive policies is invalid, so avoid doing that. */
- if (unlikely(policy_is_inactive(policy)))
+ /*
+ * Updating inactive policies is invalid, so avoid doing that. Also
+ * if fast frequency switching is used with the given policy, the check
+ * against policy->cur is pointless, so skip it in that case too.
+ */
+ if (unlikely(policy_is_inactive(policy)) || policy->fast_switch_enabled)
return ret_freq;
if (ret_freq && policy->cur &&
@@ -1679,8 +1753,18 @@
switch (list) {
case CPUFREQ_TRANSITION_NOTIFIER:
+ mutex_lock(&cpufreq_fast_switch_lock);
+
+ if (cpufreq_fast_switch_count > 0) {
+ mutex_unlock(&cpufreq_fast_switch_lock);
+ return -EBUSY;
+ }
ret = srcu_notifier_chain_register(
&cpufreq_transition_notifier_list, nb);
+ if (!ret)
+ cpufreq_fast_switch_count--;
+
+ mutex_unlock(&cpufreq_fast_switch_lock);
break;
case CPUFREQ_POLICY_NOTIFIER:
ret = blocking_notifier_chain_register(
@@ -1713,8 +1797,14 @@
switch (list) {
case CPUFREQ_TRANSITION_NOTIFIER:
+ mutex_lock(&cpufreq_fast_switch_lock);
+
ret = srcu_notifier_chain_unregister(
&cpufreq_transition_notifier_list, nb);
+ if (!ret && !WARN_ON(cpufreq_fast_switch_count >= 0))
+ cpufreq_fast_switch_count++;
+
+ mutex_unlock(&cpufreq_fast_switch_lock);
break;
case CPUFREQ_POLICY_NOTIFIER:
ret = blocking_notifier_chain_unregister(
@@ -1733,6 +1823,37 @@
* GOVERNORS *
*********************************************************************/
+/**
+ * cpufreq_driver_fast_switch - Carry out a fast CPU frequency switch.
+ * @policy: cpufreq policy to switch the frequency for.
+ * @target_freq: New frequency to set (may be approximate).
+ *
+ * Carry out a fast frequency switch without sleeping.
+ *
+ * The driver's ->fast_switch() callback invoked by this function must be
+ * suitable for being called from within RCU-sched read-side critical sections
+ * and it is expected to select the minimum available frequency greater than or
+ * equal to @target_freq (CPUFREQ_RELATION_L).
+ *
+ * This function must not be called if policy->fast_switch_enabled is unset.
+ *
+ * Governors calling this function must guarantee that it will never be invoked
+ * twice in parallel for the same policy and that it will never be called in
+ * parallel with either ->target() or ->target_index() for the same policy.
+ *
+ * If CPUFREQ_ENTRY_INVALID is returned by the driver's ->fast_switch()
+ * callback to indicate an error condition, the hardware configuration must be
+ * preserved.
+ */
+unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ clamp_val(target_freq, policy->min, policy->max);
+
+ return cpufreq_driver->fast_switch(policy, target_freq);
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
+
/* Must set freqs->new to intermediate frequency */
static int __target_intermediate(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, int index)
@@ -2108,7 +2229,7 @@
return ret;
}
- ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+ ret = cpufreq_exit_governor(policy);
if (ret) {
pr_err("%s: Failed to Exit Governor: %s (%d)\n",
__func__, old_gov->name, ret);
@@ -2125,7 +2246,7 @@
pr_debug("cpufreq: governor change\n");
return 0;
}
- cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
+ cpufreq_exit_governor(policy);
}
/* new governor failed, so re-start old one */
@@ -2193,16 +2314,13 @@
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
cpufreq_online(cpu);
break;
case CPU_DOWN_PREPARE:
cpufreq_offline(cpu);
break;
-
- case CPU_DOWN_FAILED:
- cpufreq_online(cpu);
- break;
}
return NOTIFY_OK;
}
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index bf4913f..316df24 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -129,9 +129,10 @@
/************************** sysfs interface ************************/
static struct dbs_governor cs_dbs_gov;
-static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
- const char *buf, size_t count)
+static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
@@ -143,9 +144,10 @@
return count;
}
-static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
- size_t count)
+static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
@@ -158,9 +160,10 @@
return count;
}
-static ssize_t store_down_threshold(struct dbs_data *dbs_data, const char *buf,
- size_t count)
+static ssize_t store_down_threshold(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
@@ -175,9 +178,10 @@
return count;
}
-static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
- const char *buf, size_t count)
+static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
@@ -199,9 +203,10 @@
return count;
}
-static ssize_t store_freq_step(struct dbs_data *dbs_data, const char *buf,
- size_t count)
+static ssize_t store_freq_step(struct gov_attr_set *attr_set, const char *buf,
+ size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
unsigned int input;
int ret;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 5f1147f..be498d5 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -43,9 +43,10 @@
* This must be called with dbs_data->mutex held, otherwise traversing
* policy_dbs_list isn't safe.
*/
-ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct policy_dbs_info *policy_dbs;
unsigned int rate;
int ret;
@@ -59,7 +60,7 @@
* We are operating under dbs_data->mutex and so the list and its
* entries can't be freed concurrently.
*/
- list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+ list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
mutex_lock(&policy_dbs->timer_mutex);
/*
* On 32-bit architectures this may race with the
@@ -96,13 +97,13 @@
{
struct policy_dbs_info *policy_dbs;
- list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+ list_for_each_entry(policy_dbs, &dbs_data->attr_set.policy_list, list) {
unsigned int j;
for_each_cpu(j, policy_dbs->policy->cpus) {
struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
- j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall,
+ j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
dbs_data->io_is_busy);
if (dbs_data->ignore_nice_load)
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
@@ -111,54 +112,6 @@
}
EXPORT_SYMBOL_GPL(gov_update_cpu_data);
-static inline struct dbs_data *to_dbs_data(struct kobject *kobj)
-{
- return container_of(kobj, struct dbs_data, kobj);
-}
-
-static inline struct governor_attr *to_gov_attr(struct attribute *attr)
-{
- return container_of(attr, struct governor_attr, attr);
-}
-
-static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct dbs_data *dbs_data = to_dbs_data(kobj);
- struct governor_attr *gattr = to_gov_attr(attr);
-
- return gattr->show(dbs_data, buf);
-}
-
-static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
-{
- struct dbs_data *dbs_data = to_dbs_data(kobj);
- struct governor_attr *gattr = to_gov_attr(attr);
- int ret = -EBUSY;
-
- mutex_lock(&dbs_data->mutex);
-
- if (dbs_data->usage_count)
- ret = gattr->store(dbs_data, buf, count);
-
- mutex_unlock(&dbs_data->mutex);
-
- return ret;
-}
-
-/*
- * Sysfs Ops for accessing governor attributes.
- *
- * All show/store invocations for governor specific sysfs attributes, will first
- * call the below show/store callbacks and the attribute specific callback will
- * be called from within it.
- */
-static const struct sysfs_ops governor_sysfs_ops = {
- .show = governor_show,
- .store = governor_store,
-};
-
unsigned int dbs_update(struct cpufreq_policy *policy)
{
struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -184,14 +137,14 @@
/* Get Absolute Load */
for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
- u64 cur_wall_time, cur_idle_time;
- unsigned int idle_time, wall_time;
+ u64 update_time, cur_idle_time;
+ unsigned int idle_time, time_elapsed;
unsigned int load;
- cur_idle_time = get_cpu_idle_time(j, &cur_wall_time, io_busy);
+ cur_idle_time = get_cpu_idle_time(j, &update_time, io_busy);
- wall_time = cur_wall_time - j_cdbs->prev_cpu_wall;
- j_cdbs->prev_cpu_wall = cur_wall_time;
+ time_elapsed = update_time - j_cdbs->prev_update_time;
+ j_cdbs->prev_update_time = update_time;
idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
j_cdbs->prev_cpu_idle = cur_idle_time;
@@ -203,47 +156,62 @@
j_cdbs->prev_cpu_nice = cur_nice;
}
- if (unlikely(!wall_time || wall_time < idle_time))
- continue;
-
- /*
- * If the CPU had gone completely idle, and a task just woke up
- * on this CPU now, it would be unfair to calculate 'load' the
- * usual way for this elapsed time-window, because it will show
- * near-zero load, irrespective of how CPU intensive that task
- * actually is. This is undesirable for latency-sensitive bursty
- * workloads.
- *
- * To avoid this, we reuse the 'load' from the previous
- * time-window and give this task a chance to start with a
- * reasonably high CPU frequency. (However, we shouldn't over-do
- * this copy, lest we get stuck at a high load (high frequency)
- * for too long, even when the current system load has actually
- * dropped down. So we perform the copy only once, upon the
- * first wake-up from idle.)
- *
- * Detecting this situation is easy: the governor's utilization
- * update handler would not have run during CPU-idle periods.
- * Hence, an unusually large 'wall_time' (as compared to the
- * sampling rate) indicates this scenario.
- *
- * prev_load can be zero in two cases and we must recalculate it
- * for both cases:
- * - during long idle intervals
- * - explicitly set to zero
- */
- if (unlikely(wall_time > (2 * sampling_rate) &&
- j_cdbs->prev_load)) {
- load = j_cdbs->prev_load;
-
+ if (unlikely(!time_elapsed)) {
/*
- * Perform a destructive copy, to ensure that we copy
- * the previous load only once, upon the first wake-up
- * from idle.
+ * That can only happen when this function is called
+ * twice in a row with a very short interval between the
+ * calls, so the previous load value can be used then.
*/
+ load = j_cdbs->prev_load;
+ } else if (unlikely(time_elapsed > 2 * sampling_rate &&
+ j_cdbs->prev_load)) {
+ /*
+ * If the CPU had gone completely idle and a task has
+ * just woken up on this CPU now, it would be unfair to
+ * calculate 'load' the usual way for this elapsed
+ * time-window, because it would show near-zero load,
+ * irrespective of how CPU intensive that task actually
+ * was. This is undesirable for latency-sensitive bursty
+ * workloads.
+ *
+ * To avoid this, reuse the 'load' from the previous
+ * time-window and give this task a chance to start with
+ * a reasonably high CPU frequency. However, that
+ * shouldn't be over-done, lest we get stuck at a high
+ * load (high frequency) for too long, even when the
+ * current system load has actually dropped down, so
+ * clear prev_load to guarantee that the load will be
+ * computed again next time.
+ *
+ * Detecting this situation is easy: the governor's
+ * utilization update handler would not have run during
+ * CPU-idle periods. Hence, an unusually large
+ * 'time_elapsed' (as compared to the sampling rate)
+ * indicates this scenario.
+ */
+ load = j_cdbs->prev_load;
j_cdbs->prev_load = 0;
} else {
- load = 100 * (wall_time - idle_time) / wall_time;
+ if (time_elapsed >= idle_time) {
+ load = 100 * (time_elapsed - idle_time) / time_elapsed;
+ } else {
+ /*
+ * That can happen if idle_time is returned by
+ * get_cpu_idle_time_jiffy(). In that case
+ * idle_time is roughly equal to the difference
+ * between time_elapsed and "busy time" obtained
+ * from CPU statistics. Then, the "busy time"
+ * can end up being greater than time_elapsed
+ * (for example, if jiffies_64 and the CPU
+ * statistics are updated by different CPUs),
+ * so idle_time may in fact be negative. That
+ * means, though, that the CPU was busy all
+ * the time (on the rough average) during the
+ * last sampling interval and 100 can be
+ * returned as the load.
+ */
+ load = (int)idle_time < 0 ? 100 : 0;
+ }
j_cdbs->prev_load = load;
}
@@ -254,43 +222,6 @@
}
EXPORT_SYMBOL_GPL(dbs_update);
-static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
- unsigned int delay_us)
-{
- struct cpufreq_policy *policy = policy_dbs->policy;
- int cpu;
-
- gov_update_sample_delay(policy_dbs, delay_us);
- policy_dbs->last_sample_time = 0;
-
- for_each_cpu(cpu, policy->cpus) {
- struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
-
- cpufreq_set_update_util_data(cpu, &cdbs->update_util);
- }
-}
-
-static inline void gov_clear_update_util(struct cpufreq_policy *policy)
-{
- int i;
-
- for_each_cpu(i, policy->cpus)
- cpufreq_set_update_util_data(i, NULL);
-
- synchronize_sched();
-}
-
-static void gov_cancel_work(struct cpufreq_policy *policy)
-{
- struct policy_dbs_info *policy_dbs = policy->governor_data;
-
- gov_clear_update_util(policy_dbs->policy);
- irq_work_sync(&policy_dbs->irq_work);
- cancel_work_sync(&policy_dbs->work);
- atomic_set(&policy_dbs->work_count, 0);
- policy_dbs->work_in_progress = false;
-}
-
static void dbs_work_handler(struct work_struct *work)
{
struct policy_dbs_info *policy_dbs;
@@ -378,6 +309,44 @@
irq_work_queue(&policy_dbs->irq_work);
}
+static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
+ unsigned int delay_us)
+{
+ struct cpufreq_policy *policy = policy_dbs->policy;
+ int cpu;
+
+ gov_update_sample_delay(policy_dbs, delay_us);
+ policy_dbs->last_sample_time = 0;
+
+ for_each_cpu(cpu, policy->cpus) {
+ struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
+
+ cpufreq_add_update_util_hook(cpu, &cdbs->update_util,
+ dbs_update_util_handler);
+ }
+}
+
+static inline void gov_clear_update_util(struct cpufreq_policy *policy)
+{
+ int i;
+
+ for_each_cpu(i, policy->cpus)
+ cpufreq_remove_update_util_hook(i);
+
+ synchronize_sched();
+}
+
+static void gov_cancel_work(struct cpufreq_policy *policy)
+{
+ struct policy_dbs_info *policy_dbs = policy->governor_data;
+
+ gov_clear_update_util(policy_dbs->policy);
+ irq_work_sync(&policy_dbs->irq_work);
+ cancel_work_sync(&policy_dbs->work);
+ atomic_set(&policy_dbs->work_count, 0);
+ policy_dbs->work_in_progress = false;
+}
+
static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
struct dbs_governor *gov)
{
@@ -400,7 +369,6 @@
struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
j_cdbs->policy_dbs = policy_dbs;
- j_cdbs->update_util.func = dbs_update_util_handler;
}
return policy_dbs;
}
@@ -449,10 +417,7 @@
policy_dbs->dbs_data = dbs_data;
policy->governor_data = policy_dbs;
- mutex_lock(&dbs_data->mutex);
- dbs_data->usage_count++;
- list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
- mutex_unlock(&dbs_data->mutex);
+ gov_attr_set_get(&dbs_data->attr_set, &policy_dbs->list);
goto out;
}
@@ -462,8 +427,7 @@
goto free_policy_dbs_info;
}
- INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
- mutex_init(&dbs_data->mutex);
+ gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
ret = gov->init(dbs_data, !policy->governor->initialized);
if (ret)
@@ -483,14 +447,11 @@
if (!have_governor_per_policy())
gov->gdbs_data = dbs_data;
+ policy_dbs->dbs_data = dbs_data;
policy->governor_data = policy_dbs;
- policy_dbs->dbs_data = dbs_data;
- dbs_data->usage_count = 1;
- list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
-
gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
- ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
+ ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
get_governor_parent_kobj(policy),
"%s", gov->gov.name);
if (!ret)
@@ -519,29 +480,21 @@
struct dbs_governor *gov = dbs_governor_of(policy);
struct policy_dbs_info *policy_dbs = policy->governor_data;
struct dbs_data *dbs_data = policy_dbs->dbs_data;
- int count;
+ unsigned int count;
/* Protect gov->gdbs_data against concurrent updates. */
mutex_lock(&gov_dbs_data_mutex);
- mutex_lock(&dbs_data->mutex);
- list_del(&policy_dbs->list);
- count = --dbs_data->usage_count;
- mutex_unlock(&dbs_data->mutex);
+ count = gov_attr_set_put(&dbs_data->attr_set, &policy_dbs->list);
+
+ policy->governor_data = NULL;
if (!count) {
- kobject_put(&dbs_data->kobj);
-
- policy->governor_data = NULL;
-
if (!have_governor_per_policy())
gov->gdbs_data = NULL;
gov->exit(dbs_data, policy->governor->initialized == 1);
- mutex_destroy(&dbs_data->mutex);
kfree(dbs_data);
- } else {
- policy->governor_data = NULL;
}
free_policy_dbs_info(policy_dbs, gov);
@@ -570,12 +523,12 @@
for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
- unsigned int prev_load;
- j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy);
-
- prev_load = j_cdbs->prev_cpu_wall - j_cdbs->prev_cpu_idle;
- j_cdbs->prev_load = 100 * prev_load / (unsigned int)j_cdbs->prev_cpu_wall;
+ j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, io_busy);
+ /*
+ * Make the first invocation of dbs_update() compute the load.
+ */
+ j_cdbs->prev_load = 0;
if (ignore_nice)
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 61ff82f..34eb214 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -24,20 +24,6 @@
#include <linux/module.h>
#include <linux/mutex.h>
-/*
- * The polling frequency depends on the capability of the processor. Default
- * polling frequency is 1000 times the transition latency of the processor. The
- * governor will work on any processor with transition latency <= 10ms, using
- * appropriate sampling rate.
- *
- * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL)
- * this governor will not work. All times here are in us (micro seconds).
- */
-#define MIN_SAMPLING_RATE_RATIO (2)
-#define LATENCY_MULTIPLIER (1000)
-#define MIN_LATENCY_MULTIPLIER (20)
-#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
-
/* Ondemand Sampling types */
enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE};
@@ -52,7 +38,7 @@
/* Governor demand based switching data (per-policy or global). */
struct dbs_data {
- int usage_count;
+ struct gov_attr_set attr_set;
void *tuners;
unsigned int min_sampling_rate;
unsigned int ignore_nice_load;
@@ -60,37 +46,27 @@
unsigned int sampling_down_factor;
unsigned int up_threshold;
unsigned int io_is_busy;
-
- struct kobject kobj;
- struct list_head policy_dbs_list;
- /*
- * Protect concurrent updates to governor tunables from sysfs,
- * policy_dbs_list and usage_count.
- */
- struct mutex mutex;
};
-/* Governor's specific attributes */
-struct dbs_data;
-struct governor_attr {
- struct attribute attr;
- ssize_t (*show)(struct dbs_data *dbs_data, char *buf);
- ssize_t (*store)(struct dbs_data *dbs_data, const char *buf,
- size_t count);
-};
+static inline struct dbs_data *to_dbs_data(struct gov_attr_set *attr_set)
+{
+ return container_of(attr_set, struct dbs_data, attr_set);
+}
#define gov_show_one(_gov, file_name) \
static ssize_t show_##file_name \
-(struct dbs_data *dbs_data, char *buf) \
+(struct gov_attr_set *attr_set, char *buf) \
{ \
+ struct dbs_data *dbs_data = to_dbs_data(attr_set); \
struct _gov##_dbs_tuners *tuners = dbs_data->tuners; \
return sprintf(buf, "%u\n", tuners->file_name); \
}
#define gov_show_one_common(file_name) \
static ssize_t show_##file_name \
-(struct dbs_data *dbs_data, char *buf) \
+(struct gov_attr_set *attr_set, char *buf) \
{ \
+ struct dbs_data *dbs_data = to_dbs_data(attr_set); \
return sprintf(buf, "%u\n", dbs_data->file_name); \
}
@@ -135,7 +111,7 @@
/* Per cpu structures */
struct cpu_dbs_info {
u64 prev_cpu_idle;
- u64 prev_cpu_wall;
+ u64 prev_update_time;
u64 prev_cpu_nice;
/*
* Used to keep track of load in the previous interval. However, when
@@ -184,7 +160,7 @@
(struct cpufreq_policy *, unsigned int, unsigned int),
unsigned int powersave_bias);
void od_unregister_powersave_bias_handler(void);
-ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
+ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
size_t count);
void gov_update_cpu_data(struct dbs_data *dbs_data);
#endif /* _CPUFREQ_GOVERNOR_H */
diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c
new file mode 100644
index 0000000..52841f8
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_governor_attr_set.c
@@ -0,0 +1,84 @@
+/*
+ * Abstract code for CPUFreq governor tunable sysfs attributes.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cpufreq_governor.h"
+
+static inline struct gov_attr_set *to_gov_attr_set(struct kobject *kobj)
+{
+ return container_of(kobj, struct gov_attr_set, kobj);
+}
+
+static inline struct governor_attr *to_gov_attr(struct attribute *attr)
+{
+ return container_of(attr, struct governor_attr, attr);
+}
+
+static ssize_t governor_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct governor_attr *gattr = to_gov_attr(attr);
+
+ return gattr->show(to_gov_attr_set(kobj), buf);
+}
+
+static ssize_t governor_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct gov_attr_set *attr_set = to_gov_attr_set(kobj);
+ struct governor_attr *gattr = to_gov_attr(attr);
+ int ret;
+
+ mutex_lock(&attr_set->update_lock);
+ ret = attr_set->usage_count ? gattr->store(attr_set, buf, count) : -EBUSY;
+ mutex_unlock(&attr_set->update_lock);
+ return ret;
+}
+
+const struct sysfs_ops governor_sysfs_ops = {
+ .show = governor_show,
+ .store = governor_store,
+};
+EXPORT_SYMBOL_GPL(governor_sysfs_ops);
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+ INIT_LIST_HEAD(&attr_set->policy_list);
+ mutex_init(&attr_set->update_lock);
+ attr_set->usage_count = 1;
+ list_add(list_node, &attr_set->policy_list);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_init);
+
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+ mutex_lock(&attr_set->update_lock);
+ attr_set->usage_count++;
+ list_add(list_node, &attr_set->policy_list);
+ mutex_unlock(&attr_set->update_lock);
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_get);
+
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node)
+{
+ unsigned int count;
+
+ mutex_lock(&attr_set->update_lock);
+ list_del(list_node);
+ count = --attr_set->usage_count;
+ mutex_unlock(&attr_set->update_lock);
+ if (count)
+ return count;
+
+ kobject_put(&attr_set->kobj);
+ mutex_destroy(&attr_set->update_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gov_attr_set_put);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index acd8027..3001634 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -207,9 +207,10 @@
/************************** sysfs interface ************************/
static struct dbs_governor od_dbs_gov;
-static ssize_t store_io_is_busy(struct dbs_data *dbs_data, const char *buf,
- size_t count)
+static ssize_t store_io_is_busy(struct gov_attr_set *attr_set, const char *buf,
+ size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
@@ -224,9 +225,10 @@
return count;
}
-static ssize_t store_up_threshold(struct dbs_data *dbs_data, const char *buf,
- size_t count)
+static ssize_t store_up_threshold(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
ret = sscanf(buf, "%u", &input);
@@ -240,9 +242,10 @@
return count;
}
-static ssize_t store_sampling_down_factor(struct dbs_data *dbs_data,
- const char *buf, size_t count)
+static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct policy_dbs_info *policy_dbs;
unsigned int input;
int ret;
@@ -254,7 +257,7 @@
dbs_data->sampling_down_factor = input;
/* Reset down sampling multiplier in case it was active */
- list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
+ list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
/*
* Doing this without locking might lead to using different
* rate_mult values in od_update() and od_dbs_timer().
@@ -267,9 +270,10 @@
return count;
}
-static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
- const char *buf, size_t count)
+static ssize_t store_ignore_nice_load(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
unsigned int input;
int ret;
@@ -291,9 +295,10 @@
return count;
}
-static ssize_t store_powersave_bias(struct dbs_data *dbs_data, const char *buf,
- size_t count)
+static ssize_t store_powersave_bias(struct gov_attr_set *attr_set,
+ const char *buf, size_t count)
{
+ struct dbs_data *dbs_data = to_dbs_data(attr_set);
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
struct policy_dbs_info *policy_dbs;
unsigned int input;
@@ -308,7 +313,7 @@
od_tuners->powersave_bias = input;
- list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list)
+ list_for_each_entry(policy_dbs, &attr_set->policy_list, list)
ondemand_powersave_bias_init(policy_dbs->policy);
return count;
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 4d16f45..9f3dec9 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -17,6 +17,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
static DEFINE_PER_CPU(unsigned int, cpu_is_managed);
static DEFINE_MUTEX(userspace_mutex);
@@ -31,6 +32,7 @@
static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq)
{
int ret = -EINVAL;
+ unsigned int *setspeed = policy->governor_data;
pr_debug("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq);
@@ -38,6 +40,8 @@
if (!per_cpu(cpu_is_managed, policy->cpu))
goto err;
+ *setspeed = freq;
+
ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
err:
mutex_unlock(&userspace_mutex);
@@ -49,19 +53,45 @@
return sprintf(buf, "%u\n", policy->cur);
}
+static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy)
+{
+ unsigned int *setspeed;
+
+ setspeed = kzalloc(sizeof(*setspeed), GFP_KERNEL);
+ if (!setspeed)
+ return -ENOMEM;
+
+ policy->governor_data = setspeed;
+ return 0;
+}
+
static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
unsigned int event)
{
+ unsigned int *setspeed = policy->governor_data;
unsigned int cpu = policy->cpu;
int rc = 0;
+ if (event == CPUFREQ_GOV_POLICY_INIT)
+ return cpufreq_userspace_policy_init(policy);
+
+ if (!setspeed)
+ return -EINVAL;
+
switch (event) {
+ case CPUFREQ_GOV_POLICY_EXIT:
+ mutex_lock(&userspace_mutex);
+ policy->governor_data = NULL;
+ kfree(setspeed);
+ mutex_unlock(&userspace_mutex);
+ break;
case CPUFREQ_GOV_START:
BUG_ON(!policy->cur);
pr_debug("started managing cpu %u\n", cpu);
mutex_lock(&userspace_mutex);
per_cpu(cpu_is_managed, cpu) = 1;
+ *setspeed = policy->cur;
mutex_unlock(&userspace_mutex);
break;
case CPUFREQ_GOV_STOP:
@@ -69,20 +99,23 @@
mutex_lock(&userspace_mutex);
per_cpu(cpu_is_managed, cpu) = 0;
+ *setspeed = 0;
mutex_unlock(&userspace_mutex);
break;
case CPUFREQ_GOV_LIMITS:
mutex_lock(&userspace_mutex);
- pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
- cpu, policy->min, policy->max,
- policy->cur);
+ pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
+ cpu, policy->min, policy->max, policy->cur, *setspeed);
- if (policy->max < policy->cur)
+ if (policy->max < *setspeed)
__cpufreq_driver_target(policy, policy->max,
CPUFREQ_RELATION_H);
- else if (policy->min > policy->cur)
+ else if (policy->min > *setspeed)
__cpufreq_driver_target(policy, policy->min,
CPUFREQ_RELATION_L);
+ else
+ __cpufreq_driver_target(policy, *setspeed,
+ CPUFREQ_RELATION_L);
mutex_unlock(&userspace_mutex);
break;
}
diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c
index 4085244c..cdf097b 100644
--- a/drivers/cpufreq/e_powersaver.c
+++ b/drivers/cpufreq/e_powersaver.c
@@ -6,6 +6,8 @@
* BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -20,7 +22,7 @@
#include <asm/msr.h>
#include <asm/tsc.h>
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
#include <linux/acpi.h>
#include <acpi/processor.h>
#endif
@@ -33,7 +35,7 @@
struct eps_cpu_data {
u32 fsb;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
u32 bios_limit;
#endif
struct cpufreq_frequency_table freq_table[];
@@ -46,7 +48,7 @@
static int voltage_failsafe_off;
static int set_max_voltage;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
static int ignore_acpi_limit;
static struct acpi_processor_performance *eps_acpi_cpu_perf;
@@ -141,11 +143,9 @@
/* Print voltage and multiplier */
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
current_voltage = lo & 0xff;
- printk(KERN_INFO "eps: Current voltage = %dmV\n",
- current_voltage * 16 + 700);
+ pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700);
current_multiplier = (lo >> 8) & 0xff;
- printk(KERN_INFO "eps: Current multiplier = %d\n",
- current_multiplier);
+ pr_info("Current multiplier = %d\n", current_multiplier);
}
#endif
return 0;
@@ -166,7 +166,7 @@
dest_state = centaur->freq_table[index].driver_data & 0xffff;
ret = eps_set_state(centaur, policy, dest_state);
if (ret)
- printk(KERN_ERR "eps: Timeout!\n");
+ pr_err("Timeout!\n");
return ret;
}
@@ -186,7 +186,7 @@
int k, step, voltage;
int ret;
int states;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
unsigned int limit;
#endif
@@ -194,36 +194,36 @@
return -ENODEV;
/* Check brand */
- printk(KERN_INFO "eps: Detected VIA ");
+ pr_info("Detected VIA ");
switch (c->x86_model) {
case 10:
rdmsr(0x1153, lo, hi);
brand = (((lo >> 2) ^ lo) >> 18) & 3;
- printk(KERN_CONT "Model A ");
+ pr_cont("Model A ");
break;
case 13:
rdmsr(0x1154, lo, hi);
brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
- printk(KERN_CONT "Model D ");
+ pr_cont("Model D ");
break;
}
switch (brand) {
case EPS_BRAND_C7M:
- printk(KERN_CONT "C7-M\n");
+ pr_cont("C7-M\n");
break;
case EPS_BRAND_C7:
- printk(KERN_CONT "C7\n");
+ pr_cont("C7\n");
break;
case EPS_BRAND_EDEN:
- printk(KERN_CONT "Eden\n");
+ pr_cont("Eden\n");
break;
case EPS_BRAND_C7D:
- printk(KERN_CONT "C7-D\n");
+ pr_cont("C7-D\n");
break;
case EPS_BRAND_C3:
- printk(KERN_CONT "C3\n");
+ pr_cont("C3\n");
return -ENODEV;
break;
}
@@ -235,7 +235,7 @@
/* Can be locked at 0 */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
- printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
+ pr_info("Can't enable Enhanced PowerSaver\n");
return -ENODEV;
}
}
@@ -243,22 +243,19 @@
/* Print voltage and multiplier */
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
current_voltage = lo & 0xff;
- printk(KERN_INFO "eps: Current voltage = %dmV\n",
- current_voltage * 16 + 700);
+ pr_info("Current voltage = %dmV\n", current_voltage * 16 + 700);
current_multiplier = (lo >> 8) & 0xff;
- printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
+ pr_info("Current multiplier = %d\n", current_multiplier);
/* Print limits */
max_voltage = hi & 0xff;
- printk(KERN_INFO "eps: Highest voltage = %dmV\n",
- max_voltage * 16 + 700);
+ pr_info("Highest voltage = %dmV\n", max_voltage * 16 + 700);
max_multiplier = (hi >> 8) & 0xff;
- printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
+ pr_info("Highest multiplier = %d\n", max_multiplier);
min_voltage = (hi >> 16) & 0xff;
- printk(KERN_INFO "eps: Lowest voltage = %dmV\n",
- min_voltage * 16 + 700);
+ pr_info("Lowest voltage = %dmV\n", min_voltage * 16 + 700);
min_multiplier = (hi >> 24) & 0xff;
- printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
+ pr_info("Lowest multiplier = %d\n", min_multiplier);
/* Sanity checks */
if (current_multiplier == 0 || max_multiplier == 0
@@ -276,34 +273,30 @@
/* Check for systems using underclocked CPU */
if (!freq_failsafe_off && max_multiplier != current_multiplier) {
- printk(KERN_INFO "eps: Your processor is running at different "
- "frequency then its maximum. Aborting.\n");
- printk(KERN_INFO "eps: You can use freq_failsafe_off option "
- "to disable this check.\n");
+ pr_info("Your processor is running at different frequency then its maximum. Aborting.\n");
+ pr_info("You can use freq_failsafe_off option to disable this check.\n");
return -EINVAL;
}
if (!voltage_failsafe_off && max_voltage != current_voltage) {
- printk(KERN_INFO "eps: Your processor is running at different "
- "voltage then its maximum. Aborting.\n");
- printk(KERN_INFO "eps: You can use voltage_failsafe_off "
- "option to disable this check.\n");
+ pr_info("Your processor is running at different voltage then its maximum. Aborting.\n");
+ pr_info("You can use voltage_failsafe_off option to disable this check.\n");
return -EINVAL;
}
/* Calc FSB speed */
fsb = cpu_khz / current_multiplier;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
/* Check for ACPI processor speed limit */
if (!ignore_acpi_limit && !eps_acpi_init()) {
if (!acpi_processor_get_bios_limit(policy->cpu, &limit)) {
- printk(KERN_INFO "eps: ACPI limit %u.%uGHz\n",
+ pr_info("ACPI limit %u.%uGHz\n",
limit/1000000,
(limit%1000000)/10000);
eps_acpi_exit(policy);
/* Check if max_multiplier is in BIOS limits */
if (limit && max_multiplier * fsb > limit) {
- printk(KERN_INFO "eps: Aborting.\n");
+ pr_info("Aborting\n");
return -EINVAL;
}
}
@@ -319,8 +312,7 @@
v = (set_max_voltage - 700) / 16;
/* Check if voltage is within limits */
if (v >= min_voltage && v <= max_voltage) {
- printk(KERN_INFO "eps: Setting %dmV as maximum.\n",
- v * 16 + 700);
+ pr_info("Setting %dmV as maximum\n", v * 16 + 700);
max_voltage = v;
}
}
@@ -341,7 +333,7 @@
/* Copy basic values */
centaur->fsb = fsb;
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
centaur->bios_limit = limit;
#endif
@@ -426,7 +418,7 @@
MODULE_PARM_DESC(freq_failsafe_off, "Disable current vs max frequency check");
module_param(voltage_failsafe_off, int, 0644);
MODULE_PARM_DESC(voltage_failsafe_off, "Disable current vs max voltage check");
-#if defined CONFIG_ACPI_PROCESSOR || defined CONFIG_ACPI_PROCESSOR_MODULE
+#if IS_ENABLED(CONFIG_ACPI_PROCESSOR)
module_param(ignore_acpi_limit, int, 0644);
MODULE_PARM_DESC(ignore_acpi_limit, "Don't check ACPI's processor speed limit");
#endif
diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c
index 1c06e78..bfce11c 100644
--- a/drivers/cpufreq/elanfreq.c
+++ b/drivers/cpufreq/elanfreq.c
@@ -16,6 +16,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -185,7 +187,7 @@
static int __init elanfreq_setup(char *str)
{
max_freq = simple_strtoul(str, &str, 0);
- printk(KERN_WARNING "You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
+ pr_warn("You're using the deprecated elanfreq command line option. Use elanfreq.max_freq instead, please!\n");
return 1;
}
__setup("elanfreq=", elanfreq_setup);
diff --git a/drivers/cpufreq/hisi-acpu-cpufreq.c b/drivers/cpufreq/hisi-acpu-cpufreq.c
deleted file mode 100644
index 026d5b2..0000000
--- a/drivers/cpufreq/hisi-acpu-cpufreq.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Hisilicon Platforms Using ACPU CPUFreq Support
- *
- * Copyright (c) 2015 Hisilicon Limited.
- * Copyright (c) 2015 Linaro Limited.
- *
- * Leo Yan <leo.yan@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-
-static int __init hisi_acpu_cpufreq_driver_init(void)
-{
- struct platform_device *pdev;
-
- if (!of_machine_is_compatible("hisilicon,hi6220"))
- return -ENODEV;
-
- pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
- return PTR_ERR_OR_ZERO(pdev);
-}
-module_init(hisi_acpu_cpufreq_driver_init);
-
-MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
-MODULE_DESCRIPTION("Hisilicon acpu cpufreq driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index 0202429..759612d 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -8,6 +8,8 @@
* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -118,8 +120,7 @@
if (ret) {
set_cpus_allowed_ptr(current, &saved_mask);
- printk(KERN_WARNING "get performance failed with error %d\n",
- ret);
+ pr_warn("get performance failed with error %d\n", ret);
ret = 0;
goto migrate_end;
}
@@ -177,7 +178,7 @@
ret = processor_set_pstate(value);
if (ret) {
- printk(KERN_WARNING "Transition failed with error %d\n", ret);
+ pr_warn("Transition failed with error %d\n", ret);
retval = -ENODEV;
goto migrate_end;
}
@@ -291,8 +292,7 @@
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
- printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
- "activated.\n", cpu);
+ pr_info("CPU%u - ACPI performance management activated\n", cpu);
for (i = 0; i < data->acpi_data.state_count; i++)
pr_debug(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b230eba..b76a98d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -10,6 +10,8 @@
* of the License.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
@@ -39,10 +41,17 @@
#define ATOM_TURBO_RATIOS 0x66c
#define ATOM_TURBO_VIDS 0x66d
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
+
#define FRAC_BITS 8
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
#define fp_toint(X) ((X) >> FRAC_BITS)
+#define EXT_BITS 6
+#define EXT_FRAC_BITS (EXT_BITS + FRAC_BITS)
+
static inline int32_t mul_fp(int32_t x, int32_t y)
{
return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
@@ -64,12 +73,22 @@
return ret;
}
+static inline u64 mul_ext_fp(u64 x, u64 y)
+{
+ return (x * y) >> EXT_FRAC_BITS;
+}
+
+static inline u64 div_ext_fp(u64 x, u64 y)
+{
+ return div64_u64(x << EXT_FRAC_BITS, y);
+}
+
/**
* struct sample - Store performance sample
- * @core_pct_busy: Ratio of APERF/MPERF in percent, which is actual
+ * @core_avg_perf: Ratio of APERF/MPERF which is the actual average
* performance during last sample period
* @busy_scaled: Scaled busy value which is used to calculate next
- * P state. This can be different than core_pct_busy
+ * P state. This can be different than core_avg_perf
* to account for cpu idle period
* @aperf: Difference of actual performance frequency clock count
* read from APERF MSR between last and current sample
@@ -84,7 +103,7 @@
* data for choosing next P State.
*/
struct sample {
- int32_t core_pct_busy;
+ int32_t core_avg_perf;
int32_t busy_scaled;
u64 aperf;
u64 mperf;
@@ -162,6 +181,7 @@
* struct cpudata - Per CPU instance data storage
* @cpu: CPU number for this instance data
* @update_util: CPUFreq utility callback information
+ * @update_util_set: CPUFreq utility callback is set
* @pstate: Stores P state limits for this CPU
* @vid: Stores VID limits for this CPU
* @pid: Stores PID parameters for this CPU
@@ -172,6 +192,8 @@
* @prev_cummulative_iowait: IO Wait time difference from last and
* current sample
* @sample: Storage for storing last Sample data
+ * @acpi_perf_data: Stores ACPI perf information read from _PSS
+ * @valid_pss_table: Set to true for valid ACPI _PSS entries found
*
* This structure stores per CPU instance data for all CPUs.
*/
@@ -179,6 +201,7 @@
int cpu;
struct update_util_data update_util;
+ bool update_util_set;
struct pstate_data pstate;
struct vid_data vid;
@@ -190,6 +213,10 @@
u64 prev_tsc;
u64 prev_cummulative_iowait;
struct sample sample;
+#ifdef CONFIG_ACPI
+ struct acpi_processor_performance acpi_perf_data;
+ bool valid_pss_table;
+#endif
};
static struct cpudata **all_cpu_data;
@@ -258,6 +285,9 @@
static struct pstate_funcs pstate_funcs;
static int hwp_active;
+#ifdef CONFIG_ACPI
+static bool acpi_ppc;
+#endif
/**
* struct perf_limits - Store user and policy limits
@@ -331,6 +361,124 @@
static struct perf_limits *limits = &powersave_limits;
#endif
+#ifdef CONFIG_ACPI
+
+static bool intel_pstate_get_ppc_enable_status(void)
+{
+ if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
+ acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
+ return true;
+
+ return acpi_ppc;
+}
+
+/*
+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
+ * target ratio 0x17. The _PSS control value stores in a format which can be
+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
+ * This function converts the _PSS control value to intel pstate driver format
+ * for comparison and assignment.
+ */
+static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
+{
+ return cpu->acpi_perf_data.states[index].control >> 8;
+}
+
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu;
+ int turbo_pss_ctl;
+ int ret;
+ int i;
+
+ if (hwp_active)
+ return;
+
+ if (!intel_pstate_get_ppc_enable_status())
+ return;
+
+ cpu = all_cpu_data[policy->cpu];
+
+ ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
+ policy->cpu);
+ if (ret)
+ return;
+
+ /*
+ * Check if the control value in _PSS is for PERF_CTL MSR, which should
+ * guarantee that the states returned by it map to the states in our
+ * list directly.
+ */
+ if (cpu->acpi_perf_data.control_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE)
+ goto err;
+
+ /*
+ * If there is only one entry _PSS, simply ignore _PSS and continue as
+ * usual without taking _PSS into account
+ */
+ if (cpu->acpi_perf_data.state_count < 2)
+ goto err;
+
+ pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
+ for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
+ pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
+ (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
+ (u32) cpu->acpi_perf_data.states[i].core_frequency,
+ (u32) cpu->acpi_perf_data.states[i].power,
+ (u32) cpu->acpi_perf_data.states[i].control);
+ }
+
+ /*
+ * The _PSS table doesn't contain whole turbo frequency range.
+ * This just contains +1 MHZ above the max non turbo frequency,
+ * with control value corresponding to max turbo ratio. But
+ * when cpufreq set policy is called, it will call with this
+ * max frequency, which will cause a reduced performance as
+ * this driver uses real max turbo frequency as the max
+ * frequency. So correct this frequency in _PSS table to
+ * correct max turbo frequency based on the turbo ratio.
+ * Also need to convert to MHz as _PSS freq is in MHz.
+ */
+ turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
+ if (turbo_pss_ctl > cpu->pstate.max_pstate)
+ cpu->acpi_perf_data.states[0].core_frequency =
+ policy->cpuinfo.max_freq / 1000;
+ cpu->valid_pss_table = true;
+ pr_info("_PPC limits will be enforced\n");
+
+ return;
+
+ err:
+ cpu->valid_pss_table = false;
+ acpi_processor_unregister_performance(policy->cpu);
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu;
+
+ cpu = all_cpu_data[policy->cpu];
+ if (!cpu->valid_pss_table)
+ return;
+
+ acpi_processor_unregister_performance(policy->cpu);
+}
+
+#else
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+}
+#endif
+
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
int deadband, int integral) {
pid->setpoint = int_tofp(setpoint);
@@ -341,17 +489,17 @@
static inline void pid_p_gain_set(struct _pid *pid, int percent)
{
- pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+ pid->p_gain = div_fp(percent, 100);
}
static inline void pid_i_gain_set(struct _pid *pid, int percent)
{
- pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+ pid->i_gain = div_fp(percent, 100);
}
static inline void pid_d_gain_set(struct _pid *pid, int percent)
{
- pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+ pid->d_gain = div_fp(percent, 100);
}
static signed int pid_calc(struct _pid *pid, int32_t busy)
@@ -537,7 +685,7 @@
total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
- turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
+ turbo_fp = div_fp(no_turbo, total);
turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
return sprintf(buf, "%u\n", turbo_pct);
}
@@ -579,7 +727,7 @@
update_turbo_state();
if (limits->turbo_disabled) {
- pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
+ pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
return -EPERM;
}
@@ -608,8 +756,7 @@
limits->max_perf_pct);
limits->max_perf_pct = max(limits->min_perf_pct,
limits->max_perf_pct);
- limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
- int_tofp(100));
+ limits->max_perf = div_fp(limits->max_perf_pct, 100);
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
@@ -633,8 +780,7 @@
limits->min_perf_pct);
limits->min_perf_pct = min(limits->max_perf_pct,
limits->min_perf_pct);
- limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
- int_tofp(100));
+ limits->min_perf = div_fp(limits->min_perf_pct, 100);
if (hwp_active)
intel_pstate_hwp_set_online_cpus();
@@ -1019,15 +1165,11 @@
intel_pstate_set_min_pstate(cpu);
}
-static inline void intel_pstate_calc_busy(struct cpudata *cpu)
+static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
- int64_t core_pct;
- core_pct = int_tofp(sample->aperf) * int_tofp(100);
- core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
-
- sample->core_pct_busy = (int32_t)core_pct;
+ sample->core_avg_perf = div_ext_fp(sample->aperf, sample->mperf);
}
static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
@@ -1070,9 +1212,14 @@
static inline int32_t get_avg_frequency(struct cpudata *cpu)
{
- return fp_toint(mul_fp(cpu->sample.core_pct_busy,
- int_tofp(cpu->pstate.max_pstate_physical *
- cpu->pstate.scaling / 100)));
+ return mul_ext_fp(cpu->sample.core_avg_perf,
+ cpu->pstate.max_pstate_physical * cpu->pstate.scaling);
+}
+
+static inline int32_t get_avg_pstate(struct cpudata *cpu)
+{
+ return mul_ext_fp(cpu->pstate.max_pstate_physical,
+ cpu->sample.core_avg_perf);
}
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
@@ -1107,49 +1254,43 @@
cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
cpu->sample.busy_scaled = cpu_load;
- return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
+ return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
{
- int32_t core_busy, max_pstate, current_pstate, sample_ratio;
+ int32_t perf_scaled, max_pstate, current_pstate, sample_ratio;
u64 duration_ns;
/*
- * core_busy is the ratio of actual performance to max
- * max_pstate is the max non turbo pstate available
- * current_pstate was the pstate that was requested during
- * the last sample period.
- *
- * We normalize core_busy, which was our actual percent
- * performance to what we requested during the last sample
- * period. The result will be a percentage of busy at a
- * specified pstate.
+ * perf_scaled is the average performance during the last sampling
+ * period scaled by the ratio of the maximum P-state to the P-state
+ * requested last time (in percent). That measures the system's
+ * response to the previous P-state selection.
*/
- core_busy = cpu->sample.core_pct_busy;
- max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
- current_pstate = int_tofp(cpu->pstate.current_pstate);
- core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
+ max_pstate = cpu->pstate.max_pstate_physical;
+ current_pstate = cpu->pstate.current_pstate;
+ perf_scaled = mul_ext_fp(cpu->sample.core_avg_perf,
+ div_fp(100 * max_pstate, current_pstate));
/*
* Since our utilization update callback will not run unless we are
* in C0, check if the actual elapsed time is significantly greater (3x)
* than our sample interval. If it is, then we were idle for a long
- * enough period of time to adjust our busyness.
+ * enough period of time to adjust our performance metric.
*/
duration_ns = cpu->sample.time - cpu->last_sample_time;
if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
- sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
- int_tofp(duration_ns));
- core_busy = mul_fp(core_busy, sample_ratio);
+ sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns);
+ perf_scaled = mul_fp(perf_scaled, sample_ratio);
} else {
sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
if (sample_ratio < int_tofp(1))
- core_busy = 0;
+ perf_scaled = 0;
}
- cpu->sample.busy_scaled = core_busy;
- return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
+ cpu->sample.busy_scaled = perf_scaled;
+ return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
}
static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
@@ -1179,7 +1320,7 @@
intel_pstate_update_pstate(cpu, target_pstate);
sample = &cpu->sample;
- trace_pstate_sample(fp_toint(sample->core_pct_busy),
+ trace_pstate_sample(mul_ext_fp(100, sample->core_avg_perf),
fp_toint(sample->busy_scaled),
from,
cpu->pstate.current_pstate,
@@ -1199,7 +1340,7 @@
bool sample_taken = intel_pstate_sample(cpu, time);
if (sample_taken) {
- intel_pstate_calc_busy(cpu);
+ intel_pstate_calc_avg_perf(cpu);
if (!hwp_active)
intel_pstate_adjust_busy_pstate(cpu);
}
@@ -1261,23 +1402,16 @@
intel_pstate_busy_pid_reset(cpu);
- cpu->update_util.func = intel_pstate_update_util;
-
- pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
+ pr_debug("controlling: cpu %d\n", cpunum);
return 0;
}
static unsigned int intel_pstate_get(unsigned int cpu_num)
{
- struct sample *sample;
- struct cpudata *cpu;
+ struct cpudata *cpu = all_cpu_data[cpu_num];
- cpu = all_cpu_data[cpu_num];
- if (!cpu)
- return 0;
- sample = &cpu->sample;
- return get_avg_frequency(cpu);
+ return cpu ? get_avg_frequency(cpu) : 0;
}
static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
@@ -1286,12 +1420,20 @@
/* Prevent intel_pstate_update_util() from using stale data. */
cpu->sample.time = 0;
- cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
+ cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
+ intel_pstate_update_util);
+ cpu->update_util_set = true;
}
static void intel_pstate_clear_update_util_hook(unsigned int cpu)
{
- cpufreq_set_update_util_data(cpu, NULL);
+ struct cpudata *cpu_data = all_cpu_data[cpu];
+
+ if (!cpu_data->update_util_set)
+ return;
+
+ cpufreq_remove_update_util_hook(cpu);
+ cpu_data->update_util_set = false;
synchronize_sched();
}
@@ -1311,20 +1453,31 @@
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
+ struct cpudata *cpu;
+
if (!policy->cpuinfo.max_freq)
return -ENODEV;
intel_pstate_clear_update_util_hook(policy->cpu);
+ cpu = all_cpu_data[0];
+ if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) {
+ if (policy->max < policy->cpuinfo.max_freq &&
+ policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
+ pr_debug("policy->max > max non turbo frequency\n");
+ policy->max = policy->cpuinfo.max_freq;
+ }
+ }
+
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits = &performance_limits;
if (policy->max >= policy->cpuinfo.max_freq) {
- pr_debug("intel_pstate: set performance\n");
+ pr_debug("set performance\n");
intel_pstate_set_performance_limits(limits);
goto out;
}
} else {
- pr_debug("intel_pstate: set powersave\n");
+ pr_debug("set powersave\n");
limits = &powersave_limits;
}
@@ -1348,10 +1501,8 @@
/* Make sure min_perf_pct <= max_perf_pct */
limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
- limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
- int_tofp(100));
- limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
- int_tofp(100));
+ limits->min_perf = div_fp(limits->min_perf_pct, 100);
+ limits->max_perf = div_fp(limits->max_perf_pct, 100);
out:
intel_pstate_set_update_util_hook(policy->cpu);
@@ -1377,7 +1528,7 @@
int cpu_num = policy->cpu;
struct cpudata *cpu = all_cpu_data[cpu_num];
- pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
+ pr_debug("CPU %d exiting\n", cpu_num);
intel_pstate_clear_update_util_hook(cpu_num);
@@ -1410,12 +1561,20 @@
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->cpuinfo.max_freq =
cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+ intel_pstate_init_acpi_perf_limits(policy);
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);
return 0;
}
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+ intel_pstate_exit_perf_limits(policy);
+
+ return 0;
+}
+
static struct cpufreq_driver intel_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
@@ -1423,6 +1582,7 @@
.resume = intel_pstate_hwp_set_policy,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
+ .exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
.name = "intel_pstate",
};
@@ -1466,8 +1626,7 @@
}
-#if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
+#ifdef CONFIG_ACPI
static bool intel_pstate_no_acpi_pss(void)
{
@@ -1623,7 +1782,7 @@
if (intel_pstate_platform_pwr_mgmt_exists())
return -ENODEV;
- pr_info("Intel P-state driver initializing.\n");
+ pr_info("Intel P-state driver initializing\n");
all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
if (!all_cpu_data)
@@ -1640,7 +1799,7 @@
intel_pstate_sysfs_expose_params();
if (hwp_active)
- pr_info("intel_pstate: HWP enabled\n");
+ pr_info("HWP enabled\n");
return rc;
out:
@@ -1666,13 +1825,19 @@
if (!strcmp(str, "disable"))
no_load = 1;
if (!strcmp(str, "no_hwp")) {
- pr_info("intel_pstate: HWP disabled\n");
+ pr_info("HWP disabled\n");
no_hwp = 1;
}
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
+
+#ifdef CONFIG_ACPI
+ if (!strcmp(str, "support_acpi_ppc"))
+ acpi_ppc = true;
+#endif
+
return 0;
}
early_param("intel_pstate", intel_pstate_setup);
diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index 0f6b229..c46a12d 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c
@@ -21,6 +21,8 @@
* BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -40,8 +42,6 @@
#include "longhaul.h"
-#define PFX "longhaul: "
-
#define TYPE_LONGHAUL_V1 1
#define TYPE_LONGHAUL_V2 2
#define TYPE_POWERSAVER 3
@@ -347,14 +347,13 @@
freqs.new = calc_speed(longhaul_get_cpu_mult());
/* Check if requested frequency is set. */
if (unlikely(freqs.new != speed)) {
- printk(KERN_INFO PFX "Failed to set requested frequency!\n");
+ pr_info("Failed to set requested frequency!\n");
/* Revision ID = 1 but processor is expecting revision key
* equal to 0. Jumpers at the bottom of processor will change
* multiplier and FSB, but will not change bits in Longhaul
* MSR nor enable voltage scaling. */
if (!revid_errata) {
- printk(KERN_INFO PFX "Enabling \"Ignore Revision ID\" "
- "option.\n");
+ pr_info("Enabling \"Ignore Revision ID\" option\n");
revid_errata = 1;
msleep(200);
goto retry_loop;
@@ -364,11 +363,10 @@
* but it doesn't change frequency. I tried poking various
* bits in northbridge registers, but without success. */
if (longhaul_flags & USE_ACPI_C3) {
- printk(KERN_INFO PFX "Disabling ACPI C3 support.\n");
+ pr_info("Disabling ACPI C3 support\n");
longhaul_flags &= ~USE_ACPI_C3;
if (revid_errata) {
- printk(KERN_INFO PFX "Disabling \"Ignore "
- "Revision ID\" option.\n");
+ pr_info("Disabling \"Ignore Revision ID\" option\n");
revid_errata = 0;
}
msleep(200);
@@ -379,7 +377,7 @@
* RevID = 1. RevID errata will make things right. Just
* to be 100% sure. */
if (longhaul_version == TYPE_LONGHAUL_V2) {
- printk(KERN_INFO PFX "Switching to Longhaul ver. 1\n");
+ pr_info("Switching to Longhaul ver. 1\n");
longhaul_version = TYPE_LONGHAUL_V1;
msleep(200);
goto retry_loop;
@@ -387,8 +385,7 @@
}
if (!bm_timeout) {
- printk(KERN_INFO PFX "Warning: Timeout while waiting for "
- "idle PCI bus.\n");
+ pr_info("Warning: Timeout while waiting for idle PCI bus\n");
return -EBUSY;
}
@@ -433,12 +430,12 @@
/* Get current frequency */
mult = longhaul_get_cpu_mult();
if (mult == -1) {
- printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
+ pr_info("Invalid (reserved) multiplier!\n");
return -EINVAL;
}
fsb = guess_fsb(mult);
if (fsb == 0) {
- printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
+ pr_info("Invalid (reserved) FSB!\n");
return -EINVAL;
}
/* Get max multiplier - as we always did.
@@ -468,11 +465,11 @@
print_speed(highest_speed/1000));
if (lowest_speed == highest_speed) {
- printk(KERN_INFO PFX "highestspeed == lowest, aborting.\n");
+ pr_info("highestspeed == lowest, aborting\n");
return -EINVAL;
}
if (lowest_speed > highest_speed) {
- printk(KERN_INFO PFX "nonsense! lowest (%d > %d) !\n",
+ pr_info("nonsense! lowest (%d > %d) !\n",
lowest_speed, highest_speed);
return -EINVAL;
}
@@ -538,16 +535,16 @@
rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
if (!(longhaul.bits.RevisionID & 1)) {
- printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
+ pr_info("Voltage scaling not supported by CPU\n");
return;
}
if (!longhaul.bits.VRMRev) {
- printk(KERN_INFO PFX "VRM 8.5\n");
+ pr_info("VRM 8.5\n");
vrm_mV_table = &vrm85_mV[0];
mV_vrm_table = &mV_vrm85[0];
} else {
- printk(KERN_INFO PFX "Mobile VRM\n");
+ pr_info("Mobile VRM\n");
if (cpu_model < CPU_NEHEMIAH)
return;
vrm_mV_table = &mobilevrm_mV[0];
@@ -558,27 +555,21 @@
maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
- printk(KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
- "Voltage scaling disabled.\n",
- minvid.mV/1000, minvid.mV%1000,
- maxvid.mV/1000, maxvid.mV%1000);
+ pr_info("Bogus values Min:%d.%03d Max:%d.%03d - Voltage scaling disabled\n",
+ minvid.mV/1000, minvid.mV%1000,
+ maxvid.mV/1000, maxvid.mV%1000);
return;
}
if (minvid.mV == maxvid.mV) {
- printk(KERN_INFO PFX "Claims to support voltage scaling but "
- "min & max are both %d.%03d. "
- "Voltage scaling disabled\n",
- maxvid.mV/1000, maxvid.mV%1000);
+ pr_info("Claims to support voltage scaling but min & max are both %d.%03d - Voltage scaling disabled\n",
+ maxvid.mV/1000, maxvid.mV%1000);
return;
}
/* How many voltage steps*/
numvscales = maxvid.pos - minvid.pos + 1;
- printk(KERN_INFO PFX
- "Max VID=%d.%03d "
- "Min VID=%d.%03d, "
- "%d possible voltage scales\n",
+ pr_info("Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n",
maxvid.mV/1000, maxvid.mV%1000,
minvid.mV/1000, minvid.mV%1000,
numvscales);
@@ -617,12 +608,12 @@
pos = minvid.pos;
freq_pos->driver_data |= mV_vrm_table[pos] << 8;
vid = vrm_mV_table[mV_vrm_table[pos]];
- printk(KERN_INFO PFX "f: %d kHz, index: %d, vid: %d mV\n",
+ pr_info("f: %d kHz, index: %d, vid: %d mV\n",
speed, (int)(freq_pos - longhaul_table), vid.mV);
}
can_scale_voltage = 1;
- printk(KERN_INFO PFX "Voltage scaling enabled.\n");
+ pr_info("Voltage scaling enabled\n");
}
@@ -720,8 +711,7 @@
pci_write_config_byte(dev, reg, pci_cmd);
pci_read_config_byte(dev, reg, &pci_cmd);
if (!(pci_cmd & 1<<7)) {
- printk(KERN_ERR PFX
- "Can't enable access to port 0x22.\n");
+ pr_err("Can't enable access to port 0x22\n");
status = 0;
}
}
@@ -758,8 +748,7 @@
if (pci_cmd & 1 << 7) {
pci_read_config_dword(dev, 0x88, &acpi_regs_addr);
acpi_regs_addr &= 0xff00;
- printk(KERN_INFO PFX "ACPI I/O at 0x%x\n",
- acpi_regs_addr);
+ pr_info("ACPI I/O at 0x%x\n", acpi_regs_addr);
}
pci_dev_put(dev);
@@ -853,14 +842,14 @@
longhaul_version = TYPE_LONGHAUL_V1;
}
- printk(KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
+ pr_info("VIA %s CPU detected. ", cpuname);
switch (longhaul_version) {
case TYPE_LONGHAUL_V1:
case TYPE_LONGHAUL_V2:
- printk(KERN_CONT "Longhaul v%d supported.\n", longhaul_version);
+ pr_cont("Longhaul v%d supported\n", longhaul_version);
break;
case TYPE_POWERSAVER:
- printk(KERN_CONT "Powersaver supported.\n");
+ pr_cont("Powersaver supported\n");
break;
};
@@ -889,15 +878,14 @@
if (!(longhaul_flags & USE_ACPI_C3
|| longhaul_flags & USE_NORTHBRIDGE)
&& ((pr == NULL) || !(pr->flags.bm_control))) {
- printk(KERN_ERR PFX
- "No ACPI support. Unsupported northbridge.\n");
+ pr_err("No ACPI support: Unsupported northbridge\n");
return -ENODEV;
}
if (longhaul_flags & USE_NORTHBRIDGE)
- printk(KERN_INFO PFX "Using northbridge support.\n");
+ pr_info("Using northbridge support\n");
if (longhaul_flags & USE_ACPI_C3)
- printk(KERN_INFO PFX "Using ACPI support.\n");
+ pr_info("Using ACPI support\n");
ret = longhaul_get_ranges();
if (ret != 0)
@@ -934,20 +922,18 @@
return -ENODEV;
if (!enable) {
- printk(KERN_ERR PFX "Option \"enable\" not set. Aborting.\n");
+ pr_err("Option \"enable\" not set - Aborting\n");
return -ENODEV;
}
#ifdef CONFIG_SMP
if (num_online_cpus() > 1) {
- printk(KERN_ERR PFX "More than 1 CPU detected, "
- "longhaul disabled.\n");
+ pr_err("More than 1 CPU detected, longhaul disabled\n");
return -ENODEV;
}
#endif
#ifdef CONFIG_X86_IO_APIC
- if (cpu_has_apic) {
- printk(KERN_ERR PFX "APIC detected. Longhaul is currently "
- "broken in this configuration.\n");
+ if (boot_cpu_has(X86_FEATURE_APIC)) {
+ pr_err("APIC detected. Longhaul is currently broken in this configuration.\n");
return -ENODEV;
}
#endif
@@ -955,7 +941,7 @@
case 6 ... 9:
return cpufreq_register_driver(&longhaul_driver);
case 10:
- printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n");
+ pr_err("Use acpi-cpufreq driver for VIA C7\n");
default:
;
}
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index cd593c1..6bbdac1 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -10,6 +10,9 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/err.h>
@@ -76,7 +79,7 @@
cpuclk = clk_get(NULL, "cpu_clk");
if (IS_ERR(cpuclk)) {
- printk(KERN_ERR "cpufreq: couldn't get CPU clk\n");
+ pr_err("couldn't get CPU clk\n");
return PTR_ERR(cpuclk);
}
@@ -163,7 +166,7 @@
if (ret)
return ret;
- pr_info("cpufreq: Loongson-2F CPU frequency driver.\n");
+ pr_info("Loongson-2F CPU frequency driver\n");
cpufreq_register_notifier(&loongson2_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c
index cc3408f..d9df893 100644
--- a/drivers/cpufreq/maple-cpufreq.c
+++ b/drivers/cpufreq/maple-cpufreq.c
@@ -13,6 +13,8 @@
#undef DEBUG
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -174,7 +176,7 @@
/* Get first CPU node */
cpunode = of_cpu_device_node_get(0);
if (cpunode == NULL) {
- printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n");
+ pr_err("Can't find any CPU 0 node\n");
goto bail_noprops;
}
@@ -182,8 +184,7 @@
/* we actually don't care on which CPU to access PVR */
pvr_hi = PVR_VER(mfspr(SPRN_PVR));
if (pvr_hi != 0x3c && pvr_hi != 0x44) {
- printk(KERN_ERR "cpufreq: Unsupported CPU version (%x)\n",
- pvr_hi);
+ pr_err("Unsupported CPU version (%x)\n", pvr_hi);
goto bail_noprops;
}
@@ -222,8 +223,8 @@
maple_pmode_cur = -1;
maple_scom_switch_freq(maple_scom_query_freq());
- printk(KERN_INFO "Registering Maple CPU frequency driver\n");
- printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+ pr_info("Registering Maple CPU frequency driver\n");
+ pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
maple_cpu_freqs[1].frequency/1000,
maple_cpu_freqs[0].frequency/1000,
maple_cpu_freqs[maple_pmode_cur].frequency/1000);
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 2058e6d..6f602c7 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -59,11 +59,8 @@
static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu)
{
struct mtk_cpu_dvfs_info *info;
- struct list_head *list;
- list_for_each(list, &dvfs_info_list) {
- info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
-
+ list_for_each_entry(info, &dvfs_info_list, list_head) {
if (cpumask_test_cpu(cpu, &info->cpus))
return info;
}
@@ -524,8 +521,7 @@
static int mt8173_cpufreq_probe(struct platform_device *pdev)
{
- struct mtk_cpu_dvfs_info *info;
- struct list_head *list, *tmp;
+ struct mtk_cpu_dvfs_info *info, *tmp;
int cpu, ret;
for_each_possible_cpu(cpu) {
@@ -559,11 +555,9 @@
return 0;
release_dvfs_info_list:
- list_for_each_safe(list, tmp, &dvfs_info_list) {
- info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
-
+ list_for_each_entry_safe(info, tmp, &dvfs_info_list, list_head) {
mtk_cpu_dvfs_info_release(info);
- list_del(list);
+ list_del(&info->list_head);
}
return ret;
diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c
new file mode 100644
index 0000000..e920889
--- /dev/null
+++ b/drivers/cpufreq/mvebu-cpufreq.c
@@ -0,0 +1,107 @@
+/*
+ * CPUFreq support for Armada 370/XP platforms.
+ *
+ * Copyright (C) 2012-2016 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory Clement <gregory.clement@free-electrons.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#define pr_fmt(fmt) "mvebu-pmsu: " fmt
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/resource.h>
+
+static int __init armada_xp_pmsu_cpufreq_init(void)
+{
+ struct device_node *np;
+ struct resource res;
+ int ret, cpu;
+
+ if (!of_machine_is_compatible("marvell,armadaxp"))
+ return 0;
+
+ /*
+ * In order to have proper cpufreq handling, we need to ensure
+ * that the Device Tree description of the CPU clock includes
+ * the definition of the PMU DFS registers. If not, we do not
+ * register the clock notifier and the cpufreq driver. This
+ * piece of code is only for compatibility with old Device
+ * Trees.
+ */
+ np = of_find_compatible_node(NULL, NULL, "marvell,armada-xp-cpu-clock");
+ if (!np)
+ return 0;
+
+ ret = of_address_to_resource(np, 1, &res);
+ if (ret) {
+ pr_warn(FW_WARN "not enabling cpufreq, deprecated armada-xp-cpu-clock binding\n");
+ of_node_put(np);
+ return 0;
+ }
+
+ of_node_put(np);
+
+ /*
+ * For each CPU, this loop registers the operating points
+ * supported (which are the nominal CPU frequency and half of
+ * it), and registers the clock notifier that will take care
+ * of doing the PMSU part of a frequency transition.
+ */
+ for_each_possible_cpu(cpu) {
+ struct device *cpu_dev;
+ struct clk *clk;
+ int ret;
+
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev) {
+ pr_err("Cannot get CPU %d\n", cpu);
+ continue;
+ }
+
+ clk = clk_get(cpu_dev, 0);
+ if (IS_ERR(clk)) {
+ pr_err("Cannot get clock for CPU %d\n", cpu);
+ return PTR_ERR(clk);
+ }
+
+ /*
+ * In case of a failure of dev_pm_opp_add(), we don't
+ * bother with cleaning up the registered OPP (there's
+ * no function to do so), and simply cancel the
+ * registration of the cpufreq device.
+ */
+ ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk), 0);
+ if (ret) {
+ clk_put(clk);
+ return ret;
+ }
+
+ ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
+ if (ret) {
+ clk_put(clk);
+ return ret;
+ }
+
+ ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
+ cpumask_of(cpu_dev->id));
+ if (ret)
+ dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+ __func__, ret);
+ }
+
+ platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+ return 0;
+}
+device_initcall(armada_xp_pmsu_cpufreq_init);
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index e3866e0..cead9be 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -13,6 +13,9 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -163,13 +166,13 @@
{
mpu_dev = get_cpu_device(0);
if (!mpu_dev) {
- pr_warning("%s: unable to get the mpu device\n", __func__);
+ pr_warn("%s: unable to get the MPU device\n", __func__);
return -EINVAL;
}
mpu_reg = regulator_get(mpu_dev, "vcc");
if (IS_ERR(mpu_reg)) {
- pr_warning("%s: unable to get MPU regulator\n", __func__);
+ pr_warn("%s: unable to get MPU regulator\n", __func__);
mpu_reg = NULL;
} else {
/*
diff --git a/drivers/cpufreq/p4-clockmod.c b/drivers/cpufreq/p4-clockmod.c
index 5dd95da..fd77812 100644
--- a/drivers/cpufreq/p4-clockmod.c
+++ b/drivers/cpufreq/p4-clockmod.c
@@ -20,6 +20,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -35,8 +37,6 @@
#include "speedstep-lib.h"
-#define PFX "p4-clockmod: "
-
/*
* Duty Cycle (3bits), note DC_DISABLE is not specified in
* intel docs i just use it to mean disable
@@ -124,11 +124,7 @@
{
if (c->x86 == 0x06) {
if (cpu_has(c, X86_FEATURE_EST))
- printk_once(KERN_WARNING PFX "Warning: EST-capable "
- "CPU detected. The acpi-cpufreq module offers "
- "voltage scaling in addition to frequency "
- "scaling. You should use that instead of "
- "p4-clockmod, if possible.\n");
+ pr_warn_once("Warning: EST-capable CPU detected. The acpi-cpufreq module offers voltage scaling in addition to frequency scaling. You should use that instead of p4-clockmod, if possible.\n");
switch (c->x86_model) {
case 0x0E: /* Core */
case 0x0F: /* Core Duo */
@@ -152,11 +148,7 @@
p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS;
if (speedstep_detect_processor() == SPEEDSTEP_CPU_P4M) {
- printk(KERN_WARNING PFX "Warning: Pentium 4-M detected. "
- "The speedstep-ich or acpi cpufreq modules offer "
- "voltage scaling in addition of frequency scaling. "
- "You should use either one instead of p4-clockmod, "
- "if possible.\n");
+ pr_warn("Warning: Pentium 4-M detected. The speedstep-ich or acpi cpufreq modules offer voltage scaling in addition of frequency scaling. You should use either one instead of p4-clockmod, if possible.\n");
return speedstep_get_frequency(SPEEDSTEP_CPU_P4M);
}
@@ -265,8 +257,7 @@
ret = cpufreq_register_driver(&p4clockmod_driver);
if (!ret)
- printk(KERN_INFO PFX "P4/Xeon(TM) CPU On-Demand Clock "
- "Modulation available\n");
+ pr_info("P4/Xeon(TM) CPU On-Demand Clock Modulation available\n");
return ret;
}
diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c
index 1f49d97..b7b576e 100644
--- a/drivers/cpufreq/pmac32-cpufreq.c
+++ b/drivers/cpufreq/pmac32-cpufreq.c
@@ -13,6 +13,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -481,13 +483,13 @@
freqs = of_get_property(cpunode, "bus-frequencies", &lenp);
lenp /= sizeof(u32);
if (freqs == NULL || lenp != 2) {
- printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
+ pr_err("bus-frequencies incorrect or missing\n");
return 1;
}
ratio = of_get_property(cpunode, "processor-to-bus-ratio*2",
NULL);
if (ratio == NULL) {
- printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
+ pr_err("processor-to-bus-ratio*2 missing\n");
return 1;
}
@@ -550,7 +552,7 @@
if (volt_gpio_np)
voltage_gpio = read_gpio(volt_gpio_np);
if (!voltage_gpio){
- printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
+ pr_err("missing cpu-vcore-select gpio\n");
return 1;
}
@@ -675,9 +677,9 @@
pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
ppc_proc_freq = cur_freq * 1000ul;
- printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
- printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
- low_freq/1000, hi_freq/1000, cur_freq/1000);
+ pr_info("Registering PowerMac CPU frequency driver\n");
+ pr_info("Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
+ low_freq/1000, hi_freq/1000, cur_freq/1000);
return cpufreq_register_driver(&pmac_cpufreq_driver);
}
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 4ff8687..267e089 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -12,6 +12,8 @@
#undef DEBUG
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
@@ -138,7 +140,7 @@
usleep_range(1000, 1000);
}
if (done == 0)
- printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
+ pr_warn("Timeout in clock slewing !\n");
}
@@ -266,7 +268,7 @@
rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL);
if (rc)
- printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc);
+ pr_warn("pfunc switch error %d\n", rc);
/* It's an irq GPIO so we should be able to just block here,
* I'll do that later after I've properly tested the IRQ code for
@@ -282,7 +284,7 @@
usleep_range(500, 500);
}
if (done == 0)
- printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n");
+ pr_warn("Timeout in clock slewing !\n");
/* If frequency is going down, last ramp the voltage */
if (speed_mode > g5_pmode_cur)
@@ -368,7 +370,7 @@
}
pvr_hi = (*valp) >> 16;
if (pvr_hi != 0x3c && pvr_hi != 0x44) {
- printk(KERN_ERR "cpufreq: Unsupported CPU version\n");
+ pr_err("Unsupported CPU version\n");
goto bail_noprops;
}
@@ -403,8 +405,7 @@
root = of_find_node_by_path("/");
if (root == NULL) {
- printk(KERN_ERR "cpufreq: Can't find root of "
- "device tree\n");
+ pr_err("Can't find root of device tree\n");
goto bail_noprops;
}
pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0");
@@ -412,8 +413,7 @@
pmf_find_function(root, "slewing-done");
if (pfunc_set_vdnap0 == NULL ||
pfunc_vdnap0_complete == NULL) {
- printk(KERN_ERR "cpufreq: Can't find required "
- "platform function\n");
+ pr_err("Can't find required platform function\n");
goto bail_noprops;
}
@@ -453,10 +453,10 @@
g5_pmode_cur = -1;
g5_switch_freq(g5_query_freq());
- printk(KERN_INFO "Registering G5 CPU frequency driver\n");
- printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n",
- freq_method, volt_method);
- printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+ pr_info("Registering G5 CPU frequency driver\n");
+ pr_info("Frequency method: %s, Voltage method: %s\n",
+ freq_method, volt_method);
+ pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
g5_cpu_freqs[1].frequency/1000,
g5_cpu_freqs[0].frequency/1000,
g5_cpu_freqs[g5_pmode_cur].frequency/1000);
@@ -493,7 +493,7 @@
if (cpuid != NULL)
eeprom = of_get_property(cpuid, "cpuid", NULL);
if (eeprom == NULL) {
- printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n");
+ pr_err("Can't find cpuid EEPROM !\n");
rc = -ENODEV;
goto bail;
}
@@ -511,7 +511,7 @@
break;
}
if (hwclock == NULL) {
- printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n");
+ pr_err("Can't find i2c clock chip !\n");
rc = -ENODEV;
goto bail;
}
@@ -539,7 +539,7 @@
/* Check we have minimum requirements */
if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL ||
pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) {
- printk(KERN_ERR "cpufreq: Can't find platform functions !\n");
+ pr_err("Can't find platform functions !\n");
rc = -ENODEV;
goto bail;
}
@@ -567,7 +567,7 @@
/* Get max frequency from device-tree */
valp = of_get_property(cpunode, "clock-frequency", NULL);
if (!valp) {
- printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n");
+ pr_err("Can't find CPU frequency !\n");
rc = -ENODEV;
goto bail;
}
@@ -583,8 +583,7 @@
/* Check for machines with no useful settings */
if (il == ih) {
- printk(KERN_WARNING "cpufreq: No low frequency mode available"
- " on this model !\n");
+ pr_warn("No low frequency mode available on this model !\n");
rc = -ENODEV;
goto bail;
}
@@ -595,7 +594,7 @@
/* Sanity check */
if (min_freq >= max_freq || min_freq < 1000) {
- printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n");
+ pr_err("Can't calculate low frequency !\n");
rc = -ENXIO;
goto bail;
}
@@ -619,10 +618,10 @@
g5_pmode_cur = -1;
g5_switch_freq(g5_query_freq());
- printk(KERN_INFO "Registering G5 CPU frequency driver\n");
- printk(KERN_INFO "Frequency method: i2c/pfunc, "
- "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none");
- printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
+ pr_info("Registering G5 CPU frequency driver\n");
+ pr_info("Frequency method: i2c/pfunc, Voltage method: %s\n",
+ has_volt ? "i2c/pfunc" : "none");
+ pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n",
g5_cpu_freqs[1].frequency/1000,
g5_cpu_freqs[0].frequency/1000,
g5_cpu_freqs[g5_pmode_cur].frequency/1000);
@@ -654,7 +653,7 @@
/* Get first CPU node */
cpunode = of_cpu_device_node_get(0);
if (cpunode == NULL) {
- pr_err("cpufreq: Can't find any CPU node\n");
+ pr_err("Can't find any CPU node\n");
return -ENODEV;
}
diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index e6f24b2..dedd256 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c
@@ -8,6 +8,8 @@
* BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -22,7 +24,6 @@
#define POWERNOW_IOPORT 0xfff0 /* it doesn't matter where, as long
as it is unused */
-#define PFX "powernow-k6: "
static unsigned int busfreq; /* FSB, in 10 kHz */
static unsigned int max_multiplier;
@@ -141,7 +142,7 @@
{
if (clock_ratio[best_i].driver_data > max_multiplier) {
- printk(KERN_ERR PFX "invalid target frequency\n");
+ pr_err("invalid target frequency\n");
return -EINVAL;
}
@@ -175,13 +176,14 @@
max_multiplier = param_max_multiplier;
goto have_max_multiplier;
}
- printk(KERN_ERR "powernow-k6: invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
+ pr_err("invalid max_multiplier parameter, valid parameters 20, 30, 35, 40, 45, 50, 55, 60\n");
return -EINVAL;
}
if (!max_multiplier) {
- printk(KERN_WARNING "powernow-k6: unknown frequency %u, cannot determine current multiplier\n", khz);
- printk(KERN_WARNING "powernow-k6: use module parameters max_multiplier and bus_frequency\n");
+ pr_warn("unknown frequency %u, cannot determine current multiplier\n",
+ khz);
+ pr_warn("use module parameters max_multiplier and bus_frequency\n");
return -EOPNOTSUPP;
}
@@ -193,7 +195,7 @@
busfreq = param_busfreq / 10;
goto have_busfreq;
}
- printk(KERN_ERR "powernow-k6: invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
+ pr_err("invalid bus_frequency parameter, allowed range 50000 - 150000 kHz\n");
return -EINVAL;
}
@@ -275,7 +277,7 @@
return -ENODEV;
if (!request_region(POWERNOW_IOPORT, 16, "PowerNow!")) {
- printk(KERN_INFO PFX "PowerNow IOPORT region already used.\n");
+ pr_info("PowerNow IOPORT region already used\n");
return -EIO;
}
diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index c1ae199..9f013ed 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c
@@ -13,6 +13,8 @@
* - We disable half multipliers if ACPI is used on A0 stepping CPUs.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -35,9 +37,6 @@
#include "powernow-k7.h"
-#define PFX "powernow: "
-
-
struct psb_s {
u8 signature[10];
u8 tableversion;
@@ -127,14 +126,13 @@
maxei = cpuid_eax(0x80000000);
if (maxei < 0x80000007) { /* Any powernow info ? */
#ifdef MODULE
- printk(KERN_INFO PFX "No powernow capabilities detected\n");
+ pr_info("No powernow capabilities detected\n");
#endif
return 0;
}
if ((c->x86_model == 6) && (c->x86_mask == 0)) {
- printk(KERN_INFO PFX "K7 660[A0] core detected, "
- "enabling errata workarounds\n");
+ pr_info("K7 660[A0] core detected, enabling errata workarounds\n");
have_a0 = 1;
}
@@ -144,22 +142,22 @@
if (!(edx & (1 << 1 | 1 << 2)))
return 0;
- printk(KERN_INFO PFX "PowerNOW! Technology present. Can scale: ");
+ pr_info("PowerNOW! Technology present. Can scale: ");
if (edx & 1 << 1) {
- printk("frequency");
+ pr_cont("frequency");
can_scale_bus = 1;
}
if ((edx & (1 << 1 | 1 << 2)) == 0x6)
- printk(" and ");
+ pr_cont(" and ");
if (edx & 1 << 2) {
- printk("voltage");
+ pr_cont("voltage");
can_scale_vid = 1;
}
- printk(".\n");
+ pr_cont("\n");
return 1;
}
@@ -427,16 +425,14 @@
err05:
kfree(acpi_processor_perf);
err0:
- printk(KERN_WARNING PFX "ACPI perflib can not be used on "
- "this platform\n");
+ pr_warn("ACPI perflib can not be used on this platform\n");
acpi_processor_perf = NULL;
return retval;
}
#else
static int powernow_acpi_init(void)
{
- printk(KERN_INFO PFX "no support for ACPI processor found."
- " Please recompile your kernel with ACPI processor\n");
+ pr_info("no support for ACPI processor found - please recompile your kernel with ACPI processor\n");
return -EINVAL;
}
#endif
@@ -468,8 +464,7 @@
psb = (struct psb_s *) p;
pr_debug("Table version: 0x%x\n", psb->tableversion);
if (psb->tableversion != 0x12) {
- printk(KERN_INFO PFX "Sorry, only v1.2 tables"
- " supported right now\n");
+ pr_info("Sorry, only v1.2 tables supported right now\n");
return -ENODEV;
}
@@ -481,10 +476,8 @@
latency = psb->settlingtime;
if (latency < 100) {
- printk(KERN_INFO PFX "BIOS set settling time "
- "to %d microseconds. "
- "Should be at least 100. "
- "Correcting.\n", latency);
+ pr_info("BIOS set settling time to %d microseconds. Should be at least 100. Correcting.\n",
+ latency);
latency = 100;
}
pr_debug("Settling Time: %d microseconds.\n",
@@ -516,10 +509,9 @@
p += 2;
}
}
- printk(KERN_INFO PFX "No PST tables match this cpuid "
- "(0x%x)\n", etuple);
- printk(KERN_INFO PFX "This is indicative of a broken "
- "BIOS.\n");
+ pr_info("No PST tables match this cpuid (0x%x)\n",
+ etuple);
+ pr_info("This is indicative of a broken BIOS\n");
return -EINVAL;
}
@@ -552,7 +544,7 @@
sgtc = 100 * m * latency;
sgtc = sgtc / 3;
if (sgtc > 0xfffff) {
- printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc);
+ pr_warn("SGTC too large %d\n", sgtc);
sgtc = 0xfffff;
}
return sgtc;
@@ -574,14 +566,10 @@
static int acer_cpufreq_pst(const struct dmi_system_id *d)
{
- printk(KERN_WARNING PFX
- "%s laptop with broken PST tables in BIOS detected.\n",
+ pr_warn("%s laptop with broken PST tables in BIOS detected\n",
d->ident);
- printk(KERN_WARNING PFX
- "You need to downgrade to 3A21 (09/09/2002), or try a newer "
- "BIOS than 3A71 (01/20/2003)\n");
- printk(KERN_WARNING PFX
- "cpufreq scaling has been disabled as a result of this.\n");
+ pr_warn("You need to downgrade to 3A21 (09/09/2002), or try a newer BIOS than 3A71 (01/20/2003)\n");
+ pr_warn("cpufreq scaling has been disabled as a result of this\n");
return 0;
}
@@ -616,40 +604,38 @@
fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
if (!fsb) {
- printk(KERN_WARNING PFX "can not determine bus frequency\n");
+ pr_warn("can not determine bus frequency\n");
return -EINVAL;
}
pr_debug("FSB: %3dMHz\n", fsb/1000);
if (dmi_check_system(powernow_dmi_table) || acpi_force) {
- printk(KERN_INFO PFX "PSB/PST known to be broken. "
- "Trying ACPI instead\n");
+ pr_info("PSB/PST known to be broken - trying ACPI instead\n");
result = powernow_acpi_init();
} else {
result = powernow_decode_bios(fidvidstatus.bits.MFID,
fidvidstatus.bits.SVID);
if (result) {
- printk(KERN_INFO PFX "Trying ACPI perflib\n");
+ pr_info("Trying ACPI perflib\n");
maximum_speed = 0;
minimum_speed = -1;
latency = 0;
result = powernow_acpi_init();
if (result) {
- printk(KERN_INFO PFX
- "ACPI and legacy methods failed\n");
+ pr_info("ACPI and legacy methods failed\n");
}
} else {
/* SGTC use the bus clock as timer */
latency = fixup_sgtc();
- printk(KERN_INFO PFX "SGTC: %d\n", latency);
+ pr_info("SGTC: %d\n", latency);
}
}
if (result)
return result;
- printk(KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n",
- minimum_speed/1000, maximum_speed/1000);
+ pr_info("Minimum speed %d MHz - Maximum speed %d MHz\n",
+ minimum_speed/1000, maximum_speed/1000);
policy->cpuinfo.transition_latency =
cpufreq_scale(2000000UL, fsb, latency);
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 39ac78c..54c4536 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -36,12 +36,56 @@
#include <asm/reg.h>
#include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
#include <asm/opal.h>
+#include <linux/timer.h>
#define POWERNV_MAX_PSTATES 256
#define PMSR_PSAFE_ENABLE (1UL << 30)
#define PMSR_SPR_EM_DISABLE (1UL << 31)
#define PMSR_MAX(x) ((x >> 32) & 0xFF)
+#define MAX_RAMP_DOWN_TIME 5120
+/*
+ * On an idle system we want the global pstate to ramp-down from max value to
+ * min over a span of ~5 secs. Also we want it to initially ramp-down slowly and
+ * then ramp-down rapidly later on.
+ *
+ * This gives a percentage rampdown for time elapsed in milliseconds.
+ * ramp_down_percentage = ((ms * ms) >> 18)
+ * ~= 3.8 * (sec * sec)
+ *
+ * At 0 ms ramp_down_percent = 0
+ * At 5120 ms ramp_down_percent = 100
+ */
+#define ramp_down_percent(time) ((time * time) >> 18)
+
+/* Interval after which the timer is queued to bring down global pstate */
+#define GPSTATE_TIMER_INTERVAL 2000
+
+/**
+ * struct global_pstate_info - Per policy data structure to maintain history of
+ * global pstates
+ * @highest_lpstate: The local pstate from which we are ramping down
+ * @elapsed_time: Time in ms spent in ramping down from
+ * highest_lpstate
+ * @last_sampled_time: Time from boot in ms when global pstates were
+ * last set
+ * @last_lpstate,last_gpstate: Last set values for local and global pstates
+ * @timer: Is used for ramping down if cpu goes idle for
+ * a long time with global pstate held high
+ * @gpstate_lock: A spinlock to maintain synchronization between
+ * routines called by the timer handler and
+ * governer's target_index calls
+ */
+struct global_pstate_info {
+ int highest_lpstate;
+ unsigned int elapsed_time;
+ unsigned int last_sampled_time;
+ int last_lpstate;
+ int last_gpstate;
+ spinlock_t gpstate_lock;
+ struct timer_list timer;
+};
+
static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
static bool rebooting, throttled, occ_reset;
@@ -94,6 +138,17 @@
int nr_pstates;
} powernv_pstate_info;
+static inline void reset_gpstates(struct cpufreq_policy *policy)
+{
+ struct global_pstate_info *gpstates = policy->driver_data;
+
+ gpstates->highest_lpstate = 0;
+ gpstates->elapsed_time = 0;
+ gpstates->last_sampled_time = 0;
+ gpstates->last_lpstate = 0;
+ gpstates->last_gpstate = 0;
+}
+
/*
* Initialize the freq table based on data obtained
* from the firmware passed via device-tree
@@ -285,6 +340,7 @@
struct powernv_smp_call_data {
unsigned int freq;
int pstate_id;
+ int gpstate_id;
};
/*
@@ -343,19 +399,21 @@
* (struct powernv_smp_call_data *) and the pstate_id which needs to be set
* on this CPU should be present in freq_data->pstate_id.
*/
-static void set_pstate(void *freq_data)
+static void set_pstate(void *data)
{
unsigned long val;
- unsigned long pstate_ul =
- ((struct powernv_smp_call_data *) freq_data)->pstate_id;
+ struct powernv_smp_call_data *freq_data = data;
+ unsigned long pstate_ul = freq_data->pstate_id;
+ unsigned long gpstate_ul = freq_data->gpstate_id;
val = get_pmspr(SPRN_PMCR);
val = val & 0x0000FFFFFFFFFFFFULL;
pstate_ul = pstate_ul & 0xFF;
+ gpstate_ul = gpstate_ul & 0xFF;
/* Set both global(bits 56..63) and local(bits 48..55) PStates */
- val = val | (pstate_ul << 56) | (pstate_ul << 48);
+ val = val | (gpstate_ul << 56) | (pstate_ul << 48);
pr_debug("Setting cpu %d pmcr to %016lX\n",
raw_smp_processor_id(), val);
@@ -424,6 +482,111 @@
}
}
+/**
+ * calc_global_pstate - Calculate global pstate
+ * @elapsed_time: Elapsed time in milliseconds
+ * @local_pstate: New local pstate
+ * @highest_lpstate: pstate from which its ramping down
+ *
+ * Finds the appropriate global pstate based on the pstate from which its
+ * ramping down and the time elapsed in ramping down. It follows a quadratic
+ * equation which ensures that it reaches ramping down to pmin in 5sec.
+ */
+static inline int calc_global_pstate(unsigned int elapsed_time,
+ int highest_lpstate, int local_pstate)
+{
+ int pstate_diff;
+
+ /*
+ * Using ramp_down_percent we get the percentage of rampdown
+ * that we are expecting to be dropping. Difference between
+ * highest_lpstate and powernv_pstate_info.min will give a absolute
+ * number of how many pstates we will drop eventually by the end of
+ * 5 seconds, then just scale it get the number pstates to be dropped.
+ */
+ pstate_diff = ((int)ramp_down_percent(elapsed_time) *
+ (highest_lpstate - powernv_pstate_info.min)) / 100;
+
+ /* Ensure that global pstate is >= to local pstate */
+ if (highest_lpstate - pstate_diff < local_pstate)
+ return local_pstate;
+ else
+ return highest_lpstate - pstate_diff;
+}
+
+static inline void queue_gpstate_timer(struct global_pstate_info *gpstates)
+{
+ unsigned int timer_interval;
+
+ /*
+ * Setting up timer to fire after GPSTATE_TIMER_INTERVAL ms, But
+ * if it exceeds MAX_RAMP_DOWN_TIME ms for ramp down time.
+ * Set timer such that it fires exactly at MAX_RAMP_DOWN_TIME
+ * seconds of ramp down time.
+ */
+ if ((gpstates->elapsed_time + GPSTATE_TIMER_INTERVAL)
+ > MAX_RAMP_DOWN_TIME)
+ timer_interval = MAX_RAMP_DOWN_TIME - gpstates->elapsed_time;
+ else
+ timer_interval = GPSTATE_TIMER_INTERVAL;
+
+ mod_timer_pinned(&gpstates->timer, jiffies +
+ msecs_to_jiffies(timer_interval));
+}
+
+/**
+ * gpstate_timer_handler
+ *
+ * @data: pointer to cpufreq_policy on which timer was queued
+ *
+ * This handler brings down the global pstate closer to the local pstate
+ * according quadratic equation. Queues a new timer if it is still not equal
+ * to local pstate
+ */
+void gpstate_timer_handler(unsigned long data)
+{
+ struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
+ struct global_pstate_info *gpstates = policy->driver_data;
+ int gpstate_id;
+ unsigned int time_diff = jiffies_to_msecs(jiffies)
+ - gpstates->last_sampled_time;
+ struct powernv_smp_call_data freq_data;
+
+ if (!spin_trylock(&gpstates->gpstate_lock))
+ return;
+
+ gpstates->last_sampled_time += time_diff;
+ gpstates->elapsed_time += time_diff;
+ freq_data.pstate_id = gpstates->last_lpstate;
+
+ if ((gpstates->last_gpstate == freq_data.pstate_id) ||
+ (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
+ gpstate_id = freq_data.pstate_id;
+ reset_gpstates(policy);
+ gpstates->highest_lpstate = freq_data.pstate_id;
+ } else {
+ gpstate_id = calc_global_pstate(gpstates->elapsed_time,
+ gpstates->highest_lpstate,
+ freq_data.pstate_id);
+ }
+
+ /*
+ * If local pstate is equal to global pstate, rampdown is over
+ * So timer is not required to be queued.
+ */
+ if (gpstate_id != freq_data.pstate_id)
+ queue_gpstate_timer(gpstates);
+
+ freq_data.gpstate_id = gpstate_id;
+ gpstates->last_gpstate = freq_data.gpstate_id;
+ gpstates->last_lpstate = freq_data.pstate_id;
+
+ spin_unlock(&gpstates->gpstate_lock);
+
+ /* Timer may get migrated to a different cpu on cpu hot unplug */
+ smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
+}
+
/*
* powernv_cpufreq_target_index: Sets the frequency corresponding to
* the cpufreq table entry indexed by new_index on the cpus in the
@@ -433,6 +596,8 @@
unsigned int new_index)
{
struct powernv_smp_call_data freq_data;
+ unsigned int cur_msec, gpstate_id;
+ struct global_pstate_info *gpstates = policy->driver_data;
if (unlikely(rebooting) && new_index != get_nominal_index())
return 0;
@@ -440,28 +605,81 @@
if (!throttled)
powernv_cpufreq_throttle_check(NULL);
+ cur_msec = jiffies_to_msecs(get_jiffies_64());
+
+ spin_lock(&gpstates->gpstate_lock);
freq_data.pstate_id = powernv_freqs[new_index].driver_data;
+ if (!gpstates->last_sampled_time) {
+ gpstate_id = freq_data.pstate_id;
+ gpstates->highest_lpstate = freq_data.pstate_id;
+ goto gpstates_done;
+ }
+
+ if (gpstates->last_gpstate > freq_data.pstate_id) {
+ gpstates->elapsed_time += cur_msec -
+ gpstates->last_sampled_time;
+
+ /*
+ * If its has been ramping down for more than MAX_RAMP_DOWN_TIME
+ * we should be resetting all global pstate related data. Set it
+ * equal to local pstate to start fresh.
+ */
+ if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
+ reset_gpstates(policy);
+ gpstates->highest_lpstate = freq_data.pstate_id;
+ gpstate_id = freq_data.pstate_id;
+ } else {
+ /* Elaspsed_time is less than 5 seconds, continue to rampdown */
+ gpstate_id = calc_global_pstate(gpstates->elapsed_time,
+ gpstates->highest_lpstate,
+ freq_data.pstate_id);
+ }
+ } else {
+ reset_gpstates(policy);
+ gpstates->highest_lpstate = freq_data.pstate_id;
+ gpstate_id = freq_data.pstate_id;
+ }
+
+ /*
+ * If local pstate is equal to global pstate, rampdown is over
+ * So timer is not required to be queued.
+ */
+ if (gpstate_id != freq_data.pstate_id)
+ queue_gpstate_timer(gpstates);
+ else
+ del_timer_sync(&gpstates->timer);
+
+gpstates_done:
+ freq_data.gpstate_id = gpstate_id;
+ gpstates->last_sampled_time = cur_msec;
+ gpstates->last_gpstate = freq_data.gpstate_id;
+ gpstates->last_lpstate = freq_data.pstate_id;
+
+ spin_unlock(&gpstates->gpstate_lock);
+
/*
* Use smp_call_function to send IPI and execute the
* mtspr on target CPU. We could do that without IPI
* if current CPU is within policy->cpus (core)
*/
smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
-
return 0;
}
static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
- int base, i;
+ int base, i, ret;
+ struct kernfs_node *kn;
+ struct global_pstate_info *gpstates;
base = cpu_first_thread_sibling(policy->cpu);
for (i = 0; i < threads_per_core; i++)
cpumask_set_cpu(base + i, policy->cpus);
- if (!policy->driver_data) {
+ kn = kernfs_find_and_get(policy->kobj.sd, throttle_attr_grp.name);
+ if (!kn) {
int ret;
ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
@@ -470,13 +688,37 @@
policy->cpu);
return ret;
}
- /*
- * policy->driver_data is used as a flag for one-time
- * creation of throttle sysfs files.
- */
- policy->driver_data = policy;
+ } else {
+ kernfs_put(kn);
}
- return cpufreq_table_validate_and_show(policy, powernv_freqs);
+
+ gpstates = kzalloc(sizeof(*gpstates), GFP_KERNEL);
+ if (!gpstates)
+ return -ENOMEM;
+
+ policy->driver_data = gpstates;
+
+ /* initialize timer */
+ init_timer_deferrable(&gpstates->timer);
+ gpstates->timer.data = (unsigned long)policy;
+ gpstates->timer.function = gpstate_timer_handler;
+ gpstates->timer.expires = jiffies +
+ msecs_to_jiffies(GPSTATE_TIMER_INTERVAL);
+ spin_lock_init(&gpstates->gpstate_lock);
+ ret = cpufreq_table_validate_and_show(policy, powernv_freqs);
+
+ if (ret < 0)
+ kfree(policy->driver_data);
+
+ return ret;
+}
+
+static int powernv_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ /* timer is deleted in cpufreq_cpu_stop() */
+ kfree(policy->driver_data);
+
+ return 0;
}
static int powernv_cpufreq_reboot_notifier(struct notifier_block *nb,
@@ -604,15 +846,19 @@
static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
{
struct powernv_smp_call_data freq_data;
+ struct global_pstate_info *gpstates = policy->driver_data;
freq_data.pstate_id = powernv_pstate_info.min;
+ freq_data.gpstate_id = powernv_pstate_info.min;
smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
+ del_timer_sync(&gpstates->timer);
}
static struct cpufreq_driver powernv_cpufreq_driver = {
.name = "powernv-cpufreq",
.flags = CPUFREQ_CONST_LOOPS,
.init = powernv_cpufreq_cpu_init,
+ .exit = powernv_cpufreq_cpu_exit,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = powernv_cpufreq_target_index,
.get = powernv_cpufreq_get,
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq.h b/drivers/cpufreq/ppc_cbe_cpufreq.h
index b4c00a5..3eace72 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq.h
+++ b/drivers/cpufreq/ppc_cbe_cpufreq.h
@@ -17,7 +17,7 @@
int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
-#if defined(CONFIG_CPU_FREQ_CBE_PMI) || defined(CONFIG_CPU_FREQ_CBE_PMI_MODULE)
+#if IS_ENABLED(CONFIG_CPU_FREQ_CBE_PMI)
extern bool cbe_cpufreq_has_pmi;
#else
#define cbe_cpufreq_has_pmi (0)
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 7969f76..7c4cd5c 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -23,7 +23,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/timer.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/of_platform.h>
#include <asm/processor.h>
@@ -142,15 +142,4 @@
return 0;
}
-
-static void __exit cbe_cpufreq_pmi_exit(void)
-{
- cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
- pmi_unregister_handler(&cbe_pmi_handler);
-}
-
-module_init(cbe_cpufreq_pmi_init);
-module_exit(cbe_cpufreq_pmi_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+device_initcall(cbe_cpufreq_pmi_init);
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 46fee15..ce345bf 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -29,6 +29,8 @@
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
@@ -186,8 +188,7 @@
ret = regulator_set_voltage(vcc_core, vmin, vmax);
if (ret)
- pr_err("cpufreq: Failed to set vcc_core in [%dmV..%dmV]\n",
- vmin, vmax);
+ pr_err("Failed to set vcc_core in [%dmV..%dmV]\n", vmin, vmax);
return ret;
}
@@ -195,10 +196,10 @@
{
vcc_core = regulator_get(NULL, "vcc_core");
if (IS_ERR(vcc_core)) {
- pr_info("cpufreq: Didn't find vcc_core regulator\n");
+ pr_info("Didn't find vcc_core regulator\n");
vcc_core = NULL;
} else {
- pr_info("cpufreq: Found vcc_core regulator\n");
+ pr_info("Found vcc_core regulator\n");
}
}
#else
@@ -233,9 +234,8 @@
{
if (!pxa27x_maxfreq) {
pxa27x_maxfreq = 416000;
- printk(KERN_INFO "PXA CPU 27x max frequency not defined "
- "(pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n",
- pxa27x_maxfreq);
+ pr_info("PXA CPU 27x max frequency not defined (pxa27x_maxfreq), assuming pxa271 with %dkHz maxfreq\n",
+ pxa27x_maxfreq);
} else {
pxa27x_maxfreq *= 1000;
}
@@ -408,7 +408,7 @@
*/
if (cpu_is_pxa25x()) {
find_freq_tables(&pxa255_freq_table, &pxa255_freqs);
- pr_info("PXA255 cpufreq using %s frequency table\n",
+ pr_info("using %s frequency table\n",
pxa255_turbo_table ? "turbo" : "run");
cpufreq_table_validate_and_show(policy, pxa255_freq_table);
@@ -417,7 +417,7 @@
cpufreq_table_validate_and_show(policy, pxa27x_freq_table);
}
- printk(KERN_INFO "PXA CPU frequency change support initialized\n");
+ pr_info("frequency change support initialized\n");
return 0;
}
diff --git a/drivers/cpufreq/qoriq-cpufreq.c b/drivers/cpufreq/qoriq-cpufreq.c
index b23e525..53d8c3f 100644
--- a/drivers/cpufreq/qoriq-cpufreq.c
+++ b/drivers/cpufreq/qoriq-cpufreq.c
@@ -301,10 +301,11 @@
return -ENODEV;
}
-static int __exit qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+static int qoriq_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
struct cpu_data *data = policy->driver_data;
+ cpufreq_cooling_unregister(data->cdev);
kfree(data->pclk);
kfree(data->table);
kfree(data);
@@ -333,8 +334,8 @@
cpud->cdev = of_cpufreq_cooling_register(np,
policy->related_cpus);
- if (IS_ERR(cpud->cdev)) {
- pr_err("Failed to register cooling device cpu%d: %ld\n",
+ if (IS_ERR(cpud->cdev) && PTR_ERR(cpud->cdev) != -ENOSYS) {
+ pr_err("cpu%d is not running as cooling device: %ld\n",
policy->cpu, PTR_ERR(cpud->cdev));
cpud->cdev = NULL;
@@ -348,7 +349,7 @@
.name = "qoriq_cpufreq",
.flags = CPUFREQ_CONST_LOOPS,
.init = qoriq_cpufreq_cpu_init,
- .exit = __exit_p(qoriq_cpufreq_cpu_exit),
+ .exit = qoriq_cpufreq_cpu_exit,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = qoriq_cpufreq_target,
.get = cpufreq_generic_get,
diff --git a/drivers/cpufreq/s3c2412-cpufreq.c b/drivers/cpufreq/s3c2412-cpufreq.c
index eb26213..b04b6f0 100644
--- a/drivers/cpufreq/s3c2412-cpufreq.c
+++ b/drivers/cpufreq/s3c2412-cpufreq.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/module.h>
#include <linux/interrupt.h>
@@ -197,21 +199,20 @@
hclk = clk_get(NULL, "hclk");
if (IS_ERR(hclk)) {
- printk(KERN_ERR "%s: cannot find hclk clock\n", __func__);
+ pr_err("cannot find hclk clock\n");
return -ENOENT;
}
fclk = clk_get(NULL, "fclk");
if (IS_ERR(fclk)) {
- printk(KERN_ERR "%s: cannot find fclk clock\n", __func__);
+ pr_err("cannot find fclk clock\n");
goto err_fclk;
}
fclk_rate = clk_get_rate(fclk);
if (fclk_rate > 200000000) {
- printk(KERN_INFO
- "%s: fclk %ld MHz, assuming 266MHz capable part\n",
- __func__, fclk_rate / 1000000);
+ pr_info("fclk %ld MHz, assuming 266MHz capable part\n",
+ fclk_rate / 1000000);
s3c2412_cpufreq_info.max.fclk = 266000000;
s3c2412_cpufreq_info.max.hclk = 133000000;
s3c2412_cpufreq_info.max.pclk = 66000000;
@@ -219,13 +220,13 @@
armclk = clk_get(NULL, "armclk");
if (IS_ERR(armclk)) {
- printk(KERN_ERR "%s: cannot find arm clock\n", __func__);
+ pr_err("cannot find arm clock\n");
goto err_armclk;
}
xtal = clk_get(NULL, "xtal");
if (IS_ERR(xtal)) {
- printk(KERN_ERR "%s: cannot find xtal clock\n", __func__);
+ pr_err("cannot find xtal clock\n");
goto err_xtal;
}
diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c
index 0129f5c..d0d75b6 100644
--- a/drivers/cpufreq/s3c2440-cpufreq.c
+++ b/drivers/cpufreq/s3c2440-cpufreq.c
@@ -11,6 +11,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/module.h>
#include <linux/interrupt.h>
@@ -66,7 +68,7 @@
__func__, fclk, armclk, hclk_max);
if (armclk > fclk) {
- printk(KERN_WARNING "%s: armclk > fclk\n", __func__);
+ pr_warn("%s: armclk > fclk\n", __func__);
armclk = fclk;
}
@@ -273,7 +275,7 @@
armclk = s3c_cpufreq_clk_get(NULL, "armclk");
if (IS_ERR(xtal) || IS_ERR(hclk) || IS_ERR(fclk) || IS_ERR(armclk)) {
- printk(KERN_ERR "%s: failed to get clocks\n", __func__);
+ pr_err("%s: failed to get clocks\n", __func__);
return -ENOENT;
}
diff --git a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
index 9b7b428..4d976e8 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq-debugfs.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/export.h>
#include <linux/interrupt.h>
@@ -178,7 +180,7 @@
{
dbgfs_root = debugfs_create_dir("s3c-cpufreq", NULL);
if (IS_ERR(dbgfs_root)) {
- printk(KERN_ERR "%s: error creating debugfs root\n", __func__);
+ pr_err("%s: error creating debugfs root\n", __func__);
return PTR_ERR(dbgfs_root);
}
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 68ef8fd..ae8eaed 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/init.h>
#include <linux/module.h>
#include <linux/interrupt.h>
@@ -175,7 +177,7 @@
cpu_new.freq.fclk = cpu_new.pll.frequency;
if (s3c_cpufreq_calcdivs(&cpu_new) < 0) {
- printk(KERN_ERR "no divisors for %d\n", target_freq);
+ pr_err("no divisors for %d\n", target_freq);
goto err_notpossible;
}
@@ -187,7 +189,7 @@
if (cpu_new.freq.hclk != cpu_cur.freq.hclk) {
if (s3c_cpufreq_calcio(&cpu_new) < 0) {
- printk(KERN_ERR "%s: no IO timings\n", __func__);
+ pr_err("%s: no IO timings\n", __func__);
goto err_notpossible;
}
}
@@ -262,7 +264,7 @@
return 0;
err_notpossible:
- printk(KERN_ERR "no compatible settings for %d\n", target_freq);
+ pr_err("no compatible settings for %d\n", target_freq);
return -EINVAL;
}
@@ -331,7 +333,7 @@
&index);
if (ret < 0) {
- printk(KERN_ERR "%s: no PLL available\n", __func__);
+ pr_err("%s: no PLL available\n", __func__);
goto err_notpossible;
}
@@ -346,7 +348,7 @@
return s3c_cpufreq_settarget(policy, target_freq, pll);
err_notpossible:
- printk(KERN_ERR "no compatible settings for %d\n", target_freq);
+ pr_err("no compatible settings for %d\n", target_freq);
return -EINVAL;
}
@@ -356,7 +358,7 @@
clk = clk_get(dev, name);
if (IS_ERR(clk))
- printk(KERN_ERR "cpufreq: failed to get clock '%s'\n", name);
+ pr_err("failed to get clock '%s'\n", name);
return clk;
}
@@ -378,15 +380,16 @@
if (IS_ERR(clk_fclk) || IS_ERR(clk_hclk) || IS_ERR(clk_pclk) ||
IS_ERR(_clk_mpll) || IS_ERR(clk_arm) || IS_ERR(_clk_xtal)) {
- printk(KERN_ERR "%s: could not get clock(s)\n", __func__);
+ pr_err("%s: could not get clock(s)\n", __func__);
return -ENOENT;
}
- printk(KERN_INFO "%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n", __func__,
- clk_get_rate(clk_fclk) / 1000,
- clk_get_rate(clk_hclk) / 1000,
- clk_get_rate(clk_pclk) / 1000,
- clk_get_rate(clk_arm) / 1000);
+ pr_info("%s: clocks f=%lu,h=%lu,p=%lu,a=%lu\n",
+ __func__,
+ clk_get_rate(clk_fclk) / 1000,
+ clk_get_rate(clk_hclk) / 1000,
+ clk_get_rate(clk_pclk) / 1000,
+ clk_get_rate(clk_arm) / 1000);
return 0;
}
@@ -424,7 +427,7 @@
ret = s3c_cpufreq_settarget(NULL, suspend_freq, &suspend_pll);
if (ret) {
- printk(KERN_ERR "%s: failed to reset pll/freq\n", __func__);
+ pr_err("%s: failed to reset pll/freq\n", __func__);
return ret;
}
@@ -449,13 +452,12 @@
int s3c_cpufreq_register(struct s3c_cpufreq_info *info)
{
if (!info || !info->name) {
- printk(KERN_ERR "%s: failed to pass valid information\n",
- __func__);
+ pr_err("%s: failed to pass valid information\n", __func__);
return -EINVAL;
}
- printk(KERN_INFO "S3C24XX CPU Frequency driver, %s cpu support\n",
- info->name);
+ pr_info("S3C24XX CPU Frequency driver, %s cpu support\n",
+ info->name);
/* check our driver info has valid data */
@@ -478,7 +480,7 @@
struct s3c_cpufreq_board *ours;
if (!board) {
- printk(KERN_INFO "%s: no board data\n", __func__);
+ pr_info("%s: no board data\n", __func__);
return -EINVAL;
}
@@ -487,7 +489,7 @@
ours = kzalloc(sizeof(*ours), GFP_KERNEL);
if (ours == NULL) {
- printk(KERN_ERR "%s: no memory\n", __func__);
+ pr_err("%s: no memory\n", __func__);
return -ENOMEM;
}
@@ -502,15 +504,15 @@
int ret;
if (!cpu_cur.info->get_iotiming) {
- printk(KERN_ERR "%s: get_iotiming undefined\n", __func__);
+ pr_err("%s: get_iotiming undefined\n", __func__);
return -ENOENT;
}
- printk(KERN_INFO "%s: working out IO settings\n", __func__);
+ pr_info("%s: working out IO settings\n", __func__);
ret = (cpu_cur.info->get_iotiming)(&cpu_cur, &s3c24xx_iotiming);
if (ret)
- printk(KERN_ERR "%s: failed to get timings\n", __func__);
+ pr_err("%s: failed to get timings\n", __func__);
return ret;
}
@@ -561,7 +563,7 @@
val = calc_locktime(rate, cpu_cur.info->locktime_u) << bits;
val |= calc_locktime(rate, cpu_cur.info->locktime_m);
- printk(KERN_INFO "%s: new locktime is 0x%08x\n", __func__, val);
+ pr_info("%s: new locktime is 0x%08x\n", __func__, val);
__raw_writel(val, S3C2410_LOCKTIME);
}
@@ -580,7 +582,7 @@
ftab = kzalloc(sizeof(*ftab) * size, GFP_KERNEL);
if (!ftab) {
- printk(KERN_ERR "%s: no memory for tables\n", __func__);
+ pr_err("%s: no memory for tables\n", __func__);
return -ENOMEM;
}
@@ -608,15 +610,14 @@
if (cpu_cur.board->auto_io) {
ret = s3c_cpufreq_auto_io();
if (ret) {
- printk(KERN_ERR "%s: failed to get io timing\n",
+ pr_err("%s: failed to get io timing\n",
__func__);
goto out;
}
}
if (cpu_cur.board->need_io && !cpu_cur.info->set_iotiming) {
- printk(KERN_ERR "%s: no IO support registered\n",
- __func__);
+ pr_err("%s: no IO support registered\n", __func__);
ret = -EINVAL;
goto out;
}
@@ -666,9 +667,9 @@
vals += plls_no;
vals->frequency = CPUFREQ_TABLE_END;
- printk(KERN_INFO "cpufreq: %d PLL entries\n", plls_no);
+ pr_info("%d PLL entries\n", plls_no);
} else
- printk(KERN_ERR "cpufreq: no memory for PLL tables\n");
+ pr_err("no memory for PLL tables\n");
return vals ? 0 : -ENOMEM;
}
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index a145b31..06d8591 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -9,6 +9,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -205,7 +207,7 @@
} else if (ch == DMC1) {
reg = (dmc_base[1] + 0x30);
} else {
- printk(KERN_ERR "Cannot find DMC port\n");
+ pr_err("Cannot find DMC port\n");
return;
}
@@ -534,7 +536,7 @@
mem_type = check_mem_type(dmc_base[0]);
if ((mem_type != LPDDR) && (mem_type != LPDDR2)) {
- printk(KERN_ERR "CPUFreq doesn't support this memory type\n");
+ pr_err("CPUFreq doesn't support this memory type\n");
ret = -EINVAL;
goto out_dmc1;
}
@@ -635,13 +637,13 @@
arm_regulator = regulator_get(NULL, "vddarm");
if (IS_ERR(arm_regulator)) {
- pr_err("failed to get regulator vddarm");
+ pr_err("failed to get regulator vddarm\n");
return PTR_ERR(arm_regulator);
}
int_regulator = regulator_get(NULL, "vddint");
if (IS_ERR(int_regulator)) {
- pr_err("failed to get regulator vddint");
+ pr_err("failed to get regulator vddint\n");
regulator_put(arm_regulator);
return PTR_ERR(int_regulator);
}
diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index ac84e48..4225501 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -13,6 +13,8 @@
* 2005-03-30: - initial revision
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -30,8 +32,6 @@
static __u8 __iomem *cpuctl;
-#define PFX "sc520_freq: "
-
static struct cpufreq_frequency_table sc520_freq_table[] = {
{0, 0x01, 100000},
{0, 0x02, 133000},
@@ -44,8 +44,8 @@
switch (clockspeed_reg & 0x03) {
default:
- printk(KERN_ERR PFX "error: cpuctl register has unexpected "
- "value %02x\n", clockspeed_reg);
+ pr_err("error: cpuctl register has unexpected value %02x\n",
+ clockspeed_reg);
case 0x01:
return 100000;
case 0x02:
@@ -112,7 +112,7 @@
cpuctl = ioremap((unsigned long)(MMCR_BASE + OFFS_CPUCTL), 1);
if (!cpuctl) {
- printk(KERN_ERR "sc520_freq: error: failed to remap memory\n");
+ pr_err("sc520_freq: error: failed to remap memory\n");
return -ENOMEM;
}
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index de5e89b..e8a7bf5 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -18,6 +18,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -38,35 +39,6 @@
return scpi_ops->dvfs_get_info(domain);
}
-static int scpi_opp_table_ops(struct device *cpu_dev, bool remove)
-{
- int idx, ret = 0;
- struct scpi_opp *opp;
- struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
-
- if (IS_ERR(info))
- return PTR_ERR(info);
-
- if (!info->opps)
- return -EIO;
-
- for (opp = info->opps, idx = 0; idx < info->count; idx++, opp++) {
- if (remove)
- dev_pm_opp_remove(cpu_dev, opp->freq);
- else
- ret = dev_pm_opp_add(cpu_dev, opp->freq,
- opp->m_volt * 1000);
- if (ret) {
- dev_warn(cpu_dev, "failed to add opp %uHz %umV\n",
- opp->freq, opp->m_volt);
- while (idx-- > 0)
- dev_pm_opp_remove(cpu_dev, (--opp)->freq);
- return ret;
- }
- }
- return ret;
-}
-
static int scpi_get_transition_latency(struct device *cpu_dev)
{
struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
@@ -76,21 +48,42 @@
return info->latency;
}
-static int scpi_init_opp_table(struct device *cpu_dev)
+static int scpi_init_opp_table(const struct cpumask *cpumask)
{
- return scpi_opp_table_ops(cpu_dev, false);
-}
+ int idx, ret;
+ struct scpi_opp *opp;
+ struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
+ struct scpi_dvfs_info *info = scpi_get_dvfs_info(cpu_dev);
-static void scpi_free_opp_table(struct device *cpu_dev)
-{
- scpi_opp_table_ops(cpu_dev, true);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ if (!info->opps)
+ return -EIO;
+
+ for (opp = info->opps, idx = 0; idx < info->count; idx++, opp++) {
+ ret = dev_pm_opp_add(cpu_dev, opp->freq, opp->m_volt * 1000);
+ if (ret) {
+ dev_warn(cpu_dev, "failed to add opp %uHz %umV\n",
+ opp->freq, opp->m_volt);
+ while (idx-- > 0)
+ dev_pm_opp_remove(cpu_dev, (--opp)->freq);
+ return ret;
+ }
+ }
+
+ ret = dev_pm_opp_set_sharing_cpus(cpu_dev, cpumask);
+ if (ret)
+ dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
+ __func__, ret);
+ return ret;
}
static struct cpufreq_arm_bL_ops scpi_cpufreq_ops = {
.name = "scpi",
.get_transition_latency = scpi_get_transition_latency,
.init_opp_table = scpi_init_opp_table,
- .free_opp_table = scpi_free_opp_table,
+ .free_opp_table = dev_pm_opp_cpumask_remove_table,
};
static int scpi_cpufreq_probe(struct platform_device *pdev)
diff --git a/drivers/cpufreq/speedstep-centrino.c b/drivers/cpufreq/speedstep-centrino.c
index 7d4a315..41bc539 100644
--- a/drivers/cpufreq/speedstep-centrino.c
+++ b/drivers/cpufreq/speedstep-centrino.c
@@ -13,6 +13,8 @@
* Copyright (C) 2003 Jeremy Fitzhardinge <jeremy@goop.org>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -27,7 +29,6 @@
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
-#define PFX "speedstep-centrino: "
#define MAINTAINER "linux-pm@vger.kernel.org"
#define INTEL_MSR_RANGE (0xffff)
@@ -386,8 +387,7 @@
/* check to see if it stuck */
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
- printk(KERN_INFO PFX
- "couldn't enable Enhanced SpeedStep\n");
+ pr_info("couldn't enable Enhanced SpeedStep\n");
return -ENODEV;
}
}
diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 37555c6..b86953a 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c
@@ -18,6 +18,8 @@
* SPEEDSTEP - DEFINITIONS *
*********************************************************************/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -68,13 +70,13 @@
/* get PMBASE */
pci_read_config_dword(speedstep_chipset_dev, 0x40, &pmbase);
if (!(pmbase & 0x01)) {
- printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+ pr_err("could not find speedstep register\n");
return -ENODEV;
}
pmbase &= 0xFFFFFFFE;
if (!pmbase) {
- printk(KERN_ERR "speedstep-ich: could not find speedstep register\n");
+ pr_err("could not find speedstep register\n");
return -ENODEV;
}
@@ -136,7 +138,7 @@
pr_debug("change to %u MHz succeeded\n",
speedstep_get_frequency(speedstep_processor) / 1000);
else
- printk(KERN_ERR "cpufreq: change failed - I/O error\n");
+ pr_err("change failed - I/O error\n");
return;
}
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index 15d3214..1b80621 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -8,6 +8,8 @@
* BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -153,7 +155,7 @@
fsb = 333333;
break;
default:
- printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value");
+ pr_err("PCORE - MSR_FSB_FREQ undefined value\n");
}
rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp);
@@ -453,11 +455,8 @@
*/
if (*transition_latency > 10000000 ||
*transition_latency < 50000) {
- printk(KERN_WARNING PFX "frequency transition "
- "measured seems out of range (%u "
- "nSec), falling back to a safe one of"
- "%u nSec.\n",
- *transition_latency, 500000);
+ pr_warn("frequency transition measured seems out of range (%u nSec), falling back to a safe one of %u nSec\n",
+ *transition_latency, 500000);
*transition_latency = 500000;
}
}
diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c
index 819229e..770a9ae 100644
--- a/drivers/cpufreq/speedstep-smi.c
+++ b/drivers/cpufreq/speedstep-smi.c
@@ -12,6 +12,8 @@
* SPEEDSTEP - DEFINITIONS *
*********************************************************************/
+#define pr_fmt(fmt) "cpufreq: " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -204,9 +206,8 @@
(speedstep_freqs[new_state].frequency / 1000),
retry, result);
else
- printk(KERN_ERR "cpufreq: change to state %u "
- "failed with new_state %u and result %u\n",
- state, new_state, result);
+ pr_err("change to state %u failed with new_state %u and result %u\n",
+ state, new_state, result);
return;
}
diff --git a/drivers/cpufreq/tegra124-cpufreq.c b/drivers/cpufreq/tegra124-cpufreq.c
index 20bcceb..4353025 100644
--- a/drivers/cpufreq/tegra124-cpufreq.c
+++ b/drivers/cpufreq/tegra124-cpufreq.c
@@ -14,7 +14,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/clk.h>
-#include <linux/cpufreq-dt.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -69,10 +68,6 @@
clk_set_parent(priv->cpu_clk, priv->pllx_clk);
}
-static struct cpufreq_dt_platform_data cpufreq_dt_pd = {
- .independent_clocks = false,
-};
-
static int tegra124_cpufreq_probe(struct platform_device *pdev)
{
struct tegra124_cpufreq_priv *priv;
@@ -129,8 +124,6 @@
cpufreq_dt_devinfo.name = "cpufreq-dt";
cpufreq_dt_devinfo.parent = &pdev->dev;
- cpufreq_dt_devinfo.data = &cpufreq_dt_pd;
- cpufreq_dt_devinfo.size_data = sizeof(cpufreq_dt_pd);
priv->cpufreq_dt_pdev =
platform_device_register_full(&cpufreq_dt_devinfo);
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 433e93f..87e5bdc 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -18,6 +18,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/platform_device.h>
@@ -26,8 +27,9 @@
#include "arm_big_little.h"
-static int ve_spc_init_opp_table(struct device *cpu_dev)
+static int ve_spc_init_opp_table(const struct cpumask *cpumask)
{
+ struct device *cpu_dev = get_cpu_device(cpumask_first(cpumask));
/*
* platform specific SPC code must initialise the opp table
* so just check if the OPP count is non-zero
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index f996efc..2b8e6ce 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -173,7 +173,7 @@
struct cpuidle_state *target_state = &drv->states[index];
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
- ktime_t time_start, time_end;
+ u64 time_start, time_end;
s64 diff;
/*
@@ -195,13 +195,13 @@
sched_idle_set_state(target_state);
trace_cpu_idle_rcuidle(index, dev->cpu);
- time_start = ktime_get();
+ time_start = local_clock();
stop_critical_timings();
entered_state = target_state->enter(dev, drv, index);
start_critical_timings();
- time_end = ktime_get();
+ time_end = local_clock();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
/* The cpu is no longer idle or about to enter idle. */
@@ -217,7 +217,11 @@
if (!cpuidle_state_is_coupled(drv, entered_state))
local_irq_enable();
- diff = ktime_to_us(ktime_sub(time_end, time_start));
+ /*
+ * local_clock() returns the time in nanosecond, let's shift
+ * by 10 (divide by 1024) to have microsecond based time.
+ */
+ diff = (time_end - time_start) >> 10;
if (diff > INT_MAX)
diff = INT_MAX;
@@ -433,6 +437,8 @@
list_del(&dev->device_list);
per_cpu(cpuidle_devices, dev->cpu) = NULL;
module_put(drv->owner);
+
+ dev->registered = 0;
}
static void __cpuidle_device_init(struct cpuidle_device *dev)
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 4de78c5..78dac0e 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -64,30 +64,32 @@
Otherwise, the governor does not change the frequency
given at the initialization.
+config DEVFREQ_GOV_PASSIVE
+ tristate "Passive"
+ help
+ Sets the frequency based on the frequency of its parent devfreq
+ device. This governor does not change the frequency by itself
+ through sysfs entries. The passive governor recommends that
+ devfreq device uses the OPP table to get the frequency/voltage.
+
comment "DEVFREQ Drivers"
-config ARM_EXYNOS4_BUS_DEVFREQ
- bool "ARM Exynos4210/4212/4412 Memory Bus DEVFREQ Driver"
- depends on (CPU_EXYNOS4210 || SOC_EXYNOS4212 || SOC_EXYNOS4412) && !ARCH_MULTIPLATFORM
+config ARM_EXYNOS_BUS_DEVFREQ
+ bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
+ depends on ARCH_EXYNOS
select DEVFREQ_GOV_SIMPLE_ONDEMAND
+ select DEVFREQ_GOV_PASSIVE
+ select DEVFREQ_EVENT_EXYNOS_PPMU
+ select PM_DEVFREQ_EVENT
select PM_OPP
help
- This adds the DEVFREQ driver for Exynos4210 memory bus (vdd_int)
- and Exynos4212/4412 memory interface and bus (vdd_mif + vdd_int).
- It reads PPMU counters of memory controllers and adjusts
- the operating frequencies and voltages with OPP support.
+ This adds the common DEVFREQ driver for Exynos Memory bus. Exynos
+ Memory bus has one more group of memory bus (e.g, MIF and INT block).
+ Each memory bus group could contain many memoby bus block. It reads
+ PPMU counters of memory controllers by using DEVFREQ-event device
+ and adjusts the operating frequencies and voltages with OPP support.
This does not yet operate with optimal voltages.
-config ARM_EXYNOS5_BUS_DEVFREQ
- tristate "ARM Exynos5250 Bus DEVFREQ Driver"
- depends on SOC_EXYNOS5250
- select DEVFREQ_GOV_SIMPLE_ONDEMAND
- select PM_OPP
- help
- This adds the DEVFREQ driver for Exynos5250 bus interface (vdd_int).
- It reads PPMU counters of memory controllers and adjusts the
- operating frequencies and voltages with OPP support.
-
config ARM_TEGRA_DEVFREQ
tristate "Tegra DEVFREQ Driver"
depends on ARCH_TEGRA_124_SOC
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 5134f9e..09f11d9 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -4,10 +4,10 @@
obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE) += governor_performance.o
obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o
obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o
+obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o
# DEVFREQ Drivers
-obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ) += exynos/
-obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ) += exynos/
+obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o
obj-$(CONFIG_ARM_TEGRA_DEVFREQ) += tegra-devfreq.o
# DEVFREQ Event Drivers
diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index 38bf144..39b048e 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -235,6 +235,11 @@
mutex_lock(&devfreq_event_list_lock);
list_for_each_entry(edev, &devfreq_event_list, node) {
+ if (edev->dev.parent && edev->dev.parent->of_node == node)
+ goto out;
+ }
+
+ list_for_each_entry(edev, &devfreq_event_list, node) {
if (!strcmp(edev->desc->name, node->name))
goto out;
}
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 984c5e9..1d6c803 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -25,6 +25,7 @@
#include <linux/list.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
+#include <linux/of.h>
#include "governor.h"
static struct class *devfreq_class;
@@ -188,6 +189,29 @@
return ERR_PTR(-ENODEV);
}
+static int devfreq_notify_transition(struct devfreq *devfreq,
+ struct devfreq_freqs *freqs, unsigned int state)
+{
+ if (!devfreq)
+ return -EINVAL;
+
+ switch (state) {
+ case DEVFREQ_PRECHANGE:
+ srcu_notifier_call_chain(&devfreq->transition_notifier_list,
+ DEVFREQ_PRECHANGE, freqs);
+ break;
+
+ case DEVFREQ_POSTCHANGE:
+ srcu_notifier_call_chain(&devfreq->transition_notifier_list,
+ DEVFREQ_POSTCHANGE, freqs);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Load monitoring helper functions for governors use */
/**
@@ -199,7 +223,8 @@
*/
int update_devfreq(struct devfreq *devfreq)
{
- unsigned long freq;
+ struct devfreq_freqs freqs;
+ unsigned long freq, cur_freq;
int err = 0;
u32 flags = 0;
@@ -233,10 +258,22 @@
flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */
}
+ if (devfreq->profile->get_cur_freq)
+ devfreq->profile->get_cur_freq(devfreq->dev.parent, &cur_freq);
+ else
+ cur_freq = devfreq->previous_freq;
+
+ freqs.old = cur_freq;
+ freqs.new = freq;
+ devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
+
err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
if (err)
return err;
+ freqs.new = freq;
+ devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
+
if (devfreq->profile->freq_table)
if (devfreq_update_status(devfreq, freq))
dev_err(&devfreq->dev,
@@ -541,6 +578,8 @@
goto err_out;
}
+ srcu_init_notifier_head(&devfreq->transition_notifier_list);
+
mutex_unlock(&devfreq->lock);
mutex_lock(&devfreq_list_lock);
@@ -639,6 +678,49 @@
}
EXPORT_SYMBOL(devm_devfreq_add_device);
+#ifdef CONFIG_OF
+/*
+ * devfreq_get_devfreq_by_phandle - Get the devfreq device from devicetree
+ * @dev - instance to the given device
+ * @index - index into list of devfreq
+ *
+ * return the instance of devfreq device
+ */
+struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
+{
+ struct device_node *node;
+ struct devfreq *devfreq;
+
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ if (!dev->of_node)
+ return ERR_PTR(-EINVAL);
+
+ node = of_parse_phandle(dev->of_node, "devfreq", index);
+ if (!node)
+ return ERR_PTR(-ENODEV);
+
+ mutex_lock(&devfreq_list_lock);
+ list_for_each_entry(devfreq, &devfreq_list, node) {
+ if (devfreq->dev.parent
+ && devfreq->dev.parent->of_node == node) {
+ mutex_unlock(&devfreq_list_lock);
+ return devfreq;
+ }
+ }
+ mutex_unlock(&devfreq_list_lock);
+
+ return ERR_PTR(-EPROBE_DEFER);
+}
+#else
+struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
+{
+ return ERR_PTR(-ENODEV);
+}
+#endif /* CONFIG_OF */
+EXPORT_SYMBOL_GPL(devfreq_get_devfreq_by_phandle);
+
/**
* devm_devfreq_remove_device() - Resource-managed devfreq_remove_device()
* @dev: the device to add devfreq feature.
@@ -1266,6 +1348,129 @@
}
EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier);
+/**
+ * devfreq_register_notifier() - Register a driver with devfreq
+ * @devfreq: The devfreq object.
+ * @nb: The notifier block to register.
+ * @list: DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devfreq_register_notifier(struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ int ret = 0;
+
+ if (!devfreq)
+ return -EINVAL;
+
+ switch (list) {
+ case DEVFREQ_TRANSITION_NOTIFIER:
+ ret = srcu_notifier_chain_register(
+ &devfreq->transition_notifier_list, nb);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(devfreq_register_notifier);
+
+/*
+ * devfreq_unregister_notifier() - Unregister a driver with devfreq
+ * @devfreq: The devfreq object.
+ * @nb: The notifier block to be unregistered.
+ * @list: DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devfreq_unregister_notifier(struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ int ret = 0;
+
+ if (!devfreq)
+ return -EINVAL;
+
+ switch (list) {
+ case DEVFREQ_TRANSITION_NOTIFIER:
+ ret = srcu_notifier_chain_unregister(
+ &devfreq->transition_notifier_list, nb);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(devfreq_unregister_notifier);
+
+struct devfreq_notifier_devres {
+ struct devfreq *devfreq;
+ struct notifier_block *nb;
+ unsigned int list;
+};
+
+static void devm_devfreq_notifier_release(struct device *dev, void *res)
+{
+ struct devfreq_notifier_devres *this = res;
+
+ devfreq_unregister_notifier(this->devfreq, this->nb, this->list);
+}
+
+/**
+ * devm_devfreq_register_notifier()
+ - Resource-managed devfreq_register_notifier()
+ * @dev: The devfreq user device. (parent of devfreq)
+ * @devfreq: The devfreq object.
+ * @nb: The notifier block to be unregistered.
+ * @list: DEVFREQ_TRANSITION_NOTIFIER.
+ */
+int devm_devfreq_register_notifier(struct device *dev,
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ struct devfreq_notifier_devres *ptr;
+ int ret;
+
+ ptr = devres_alloc(devm_devfreq_notifier_release, sizeof(*ptr),
+ GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ ret = devfreq_register_notifier(devfreq, nb, list);
+ if (ret) {
+ devres_free(ptr);
+ return ret;
+ }
+
+ ptr->devfreq = devfreq;
+ ptr->nb = nb;
+ ptr->list = list;
+ devres_add(dev, ptr);
+
+ return 0;
+}
+EXPORT_SYMBOL(devm_devfreq_register_notifier);
+
+/**
+ * devm_devfreq_unregister_notifier()
+ - Resource-managed devfreq_unregister_notifier()
+ * @dev: The devfreq user device. (parent of devfreq)
+ * @devfreq: The devfreq object.
+ * @nb: The notifier block to be unregistered.
+ * @list: DEVFREQ_TRANSITION_NOTIFIER.
+ */
+void devm_devfreq_unregister_notifier(struct device *dev,
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ WARN_ON(devres_release(dev, devm_devfreq_notifier_release,
+ devm_devfreq_dev_match, devfreq));
+}
+EXPORT_SYMBOL(devm_devfreq_unregister_notifier);
+
MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
MODULE_DESCRIPTION("devfreq class support");
MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index a11720a..1e8b4f4 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -13,6 +13,14 @@
if PM_DEVFREQ_EVENT
+config DEVFREQ_EVENT_EXYNOS_NOCP
+ bool "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
+ depends on ARCH_EXYNOS
+ select PM_OPP
+ help
+ This add the devfreq-event driver for Exynos SoC. It provides NoC
+ (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
+
config DEVFREQ_EVENT_EXYNOS_PPMU
bool "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
depends on ARCH_EXYNOS
diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile
index be146ea..3d6afd3 100644
--- a/drivers/devfreq/event/Makefile
+++ b/drivers/devfreq/event/Makefile
@@ -1,2 +1,4 @@
# Exynos DEVFREQ Event Drivers
+
+obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o
obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
new file mode 100644
index 0000000..6b6a5f3
--- /dev/null
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -0,0 +1,304 @@
+/*
+ * exynos-nocp.c - EXYNOS NoC (Network On Chip) Probe support
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/devfreq-event.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include "exynos-nocp.h"
+
+struct exynos_nocp {
+ struct devfreq_event_dev *edev;
+ struct devfreq_event_desc desc;
+
+ struct device *dev;
+
+ struct regmap *regmap;
+ struct clk *clk;
+};
+
+/*
+ * The devfreq-event ops structure for nocp probe.
+ */
+static int exynos_nocp_set_event(struct devfreq_event_dev *edev)
+{
+ struct exynos_nocp *nocp = devfreq_event_get_drvdata(edev);
+ int ret;
+
+ /* Disable NoC probe */
+ ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+ NOCP_MAIN_CTL_STATEN_MASK, 0);
+ if (ret < 0) {
+ dev_err(nocp->dev, "failed to disable the NoC probe device\n");
+ return ret;
+ }
+
+ /* Set a statistics dump period to 0 */
+ ret = regmap_write(nocp->regmap, NOCP_STAT_PERIOD, 0x0);
+ if (ret < 0)
+ goto out;
+
+ /* Set the IntEvent fields of *_SRC */
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_0_SRC,
+ NOCP_CNT_SRC_INTEVENT_MASK,
+ NOCP_CNT_SRC_INTEVENT_BYTE_MASK);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_1_SRC,
+ NOCP_CNT_SRC_INTEVENT_MASK,
+ NOCP_CNT_SRC_INTEVENT_CHAIN_MASK);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_2_SRC,
+ NOCP_CNT_SRC_INTEVENT_MASK,
+ NOCP_CNT_SRC_INTEVENT_CYCLE_MASK);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_3_SRC,
+ NOCP_CNT_SRC_INTEVENT_MASK,
+ NOCP_CNT_SRC_INTEVENT_CHAIN_MASK);
+ if (ret < 0)
+ goto out;
+
+
+ /* Set an alarm with a max/min value of 0 to generate StatALARM */
+ ret = regmap_write(nocp->regmap, NOCP_STAT_ALARM_MIN, 0x0);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_write(nocp->regmap, NOCP_STAT_ALARM_MAX, 0x0);
+ if (ret < 0)
+ goto out;
+
+ /* Set AlarmMode */
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_0_ALARM_MODE,
+ NOCP_CNT_ALARM_MODE_MASK,
+ NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_1_ALARM_MODE,
+ NOCP_CNT_ALARM_MODE_MASK,
+ NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_2_ALARM_MODE,
+ NOCP_CNT_ALARM_MODE_MASK,
+ NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_update_bits(nocp->regmap, NOCP_COUNTERS_3_ALARM_MODE,
+ NOCP_CNT_ALARM_MODE_MASK,
+ NOCP_CNT_ALARM_MODE_MIN_MAX_MASK);
+ if (ret < 0)
+ goto out;
+
+ /* Enable the measurements by setting AlarmEn and StatEn */
+ ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+ NOCP_MAIN_CTL_STATEN_MASK | NOCP_MAIN_CTL_ALARMEN_MASK,
+ NOCP_MAIN_CTL_STATEN_MASK | NOCP_MAIN_CTL_ALARMEN_MASK);
+ if (ret < 0)
+ goto out;
+
+ /* Set GlobalEN */
+ ret = regmap_update_bits(nocp->regmap, NOCP_CFG_CTL,
+ NOCP_CFG_CTL_GLOBALEN_MASK,
+ NOCP_CFG_CTL_GLOBALEN_MASK);
+ if (ret < 0)
+ goto out;
+
+ /* Enable NoC probe */
+ ret = regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+ NOCP_MAIN_CTL_STATEN_MASK,
+ NOCP_MAIN_CTL_STATEN_MASK);
+ if (ret < 0)
+ goto out;
+
+ return 0;
+
+out:
+ /* Reset NoC probe */
+ if (regmap_update_bits(nocp->regmap, NOCP_MAIN_CTL,
+ NOCP_MAIN_CTL_STATEN_MASK, 0)) {
+ dev_err(nocp->dev, "Failed to reset NoC probe device\n");
+ }
+
+ return ret;
+}
+
+static int exynos_nocp_get_event(struct devfreq_event_dev *edev,
+ struct devfreq_event_data *edata)
+{
+ struct exynos_nocp *nocp = devfreq_event_get_drvdata(edev);
+ unsigned int counter[4];
+ int ret;
+
+ /* Read cycle count */
+ ret = regmap_read(nocp->regmap, NOCP_COUNTERS_0_VAL, &counter[0]);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_read(nocp->regmap, NOCP_COUNTERS_1_VAL, &counter[1]);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_read(nocp->regmap, NOCP_COUNTERS_2_VAL, &counter[2]);
+ if (ret < 0)
+ goto out;
+
+ ret = regmap_read(nocp->regmap, NOCP_COUNTERS_3_VAL, &counter[3]);
+ if (ret < 0)
+ goto out;
+
+ edata->load_count = ((counter[1] << 16) | counter[0]);
+ edata->total_count = ((counter[3] << 16) | counter[2]);
+
+ dev_dbg(&edev->dev, "%s (event: %ld/%ld)\n", edev->desc->name,
+ edata->load_count, edata->total_count);
+
+ return 0;
+
+out:
+ edata->load_count = 0;
+ edata->total_count = 0;
+
+ dev_err(nocp->dev, "Failed to read the counter of NoC probe device\n");
+
+ return ret;
+}
+
+static const struct devfreq_event_ops exynos_nocp_ops = {
+ .set_event = exynos_nocp_set_event,
+ .get_event = exynos_nocp_get_event,
+};
+
+static const struct of_device_id exynos_nocp_id_match[] = {
+ { .compatible = "samsung,exynos5420-nocp", },
+ { /* sentinel */ },
+};
+
+static struct regmap_config exynos_nocp_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .max_register = NOCP_COUNTERS_3_VAL,
+};
+
+static int exynos_nocp_parse_dt(struct platform_device *pdev,
+ struct exynos_nocp *nocp)
+{
+ struct device *dev = nocp->dev;
+ struct device_node *np = dev->of_node;
+ struct resource *res;
+ void __iomem *base;
+
+ if (!np) {
+ dev_err(dev, "failed to find devicetree node\n");
+ return -EINVAL;
+ }
+
+ nocp->clk = devm_clk_get(dev, "nocp");
+ if (IS_ERR(nocp->clk))
+ nocp->clk = NULL;
+
+ /* Maps the memory mapped IO to control nocp register */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ exynos_nocp_regmap_config.max_register = resource_size(res) - 4;
+
+ nocp->regmap = devm_regmap_init_mmio(dev, base,
+ &exynos_nocp_regmap_config);
+ if (IS_ERR(nocp->regmap)) {
+ dev_err(dev, "failed to initialize regmap\n");
+ return PTR_ERR(nocp->regmap);
+ }
+
+ return 0;
+}
+
+static int exynos_nocp_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct exynos_nocp *nocp;
+ int ret;
+
+ nocp = devm_kzalloc(&pdev->dev, sizeof(*nocp), GFP_KERNEL);
+ if (!nocp)
+ return -ENOMEM;
+
+ nocp->dev = &pdev->dev;
+
+ /* Parse dt data to get resource */
+ ret = exynos_nocp_parse_dt(pdev, nocp);
+ if (ret < 0) {
+ dev_err(&pdev->dev,
+ "failed to parse devicetree for resource\n");
+ return ret;
+ }
+
+ /* Add devfreq-event device to measure the bandwidth of NoC */
+ nocp->desc.ops = &exynos_nocp_ops;
+ nocp->desc.driver_data = nocp;
+ nocp->desc.name = np->full_name;
+ nocp->edev = devm_devfreq_event_add_edev(&pdev->dev, &nocp->desc);
+ if (IS_ERR(nocp->edev)) {
+ dev_err(&pdev->dev,
+ "failed to add devfreq-event device\n");
+ return PTR_ERR(nocp->edev);
+ }
+ platform_set_drvdata(pdev, nocp);
+
+ clk_prepare_enable(nocp->clk);
+
+ pr_info("exynos-nocp: new NoC Probe device registered: %s\n",
+ dev_name(dev));
+
+ return 0;
+}
+
+static int exynos_nocp_remove(struct platform_device *pdev)
+{
+ struct exynos_nocp *nocp = platform_get_drvdata(pdev);
+
+ clk_disable_unprepare(nocp->clk);
+
+ return 0;
+}
+
+static struct platform_driver exynos_nocp_driver = {
+ .probe = exynos_nocp_probe,
+ .remove = exynos_nocp_remove,
+ .driver = {
+ .name = "exynos-nocp",
+ .of_match_table = exynos_nocp_id_match,
+ },
+};
+module_platform_driver(exynos_nocp_driver);
+
+MODULE_DESCRIPTION("Exynos NoC (Network on Chip) Probe driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/event/exynos-nocp.h b/drivers/devfreq/event/exynos-nocp.h
new file mode 100644
index 0000000..28564db
--- /dev/null
+++ b/drivers/devfreq/event/exynos-nocp.h
@@ -0,0 +1,78 @@
+/*
+ * exynos-nocp.h - EXYNOS NoC (Network on Chip) Probe header file
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __EXYNOS_NOCP_H__
+#define __EXYNOS_NOCP_H__
+
+enum nocp_reg {
+ NOCP_ID_REVISION_ID = 0x04,
+ NOCP_MAIN_CTL = 0x08,
+ NOCP_CFG_CTL = 0x0C,
+
+ NOCP_STAT_PERIOD = 0x24,
+ NOCP_STAT_GO = 0x28,
+ NOCP_STAT_ALARM_MIN = 0x2C,
+ NOCP_STAT_ALARM_MAX = 0x30,
+ NOCP_STAT_ALARM_STATUS = 0x34,
+ NOCP_STAT_ALARM_CLR = 0x38,
+
+ NOCP_COUNTERS_0_SRC = 0x138,
+ NOCP_COUNTERS_0_ALARM_MODE = 0x13C,
+ NOCP_COUNTERS_0_VAL = 0x140,
+
+ NOCP_COUNTERS_1_SRC = 0x14C,
+ NOCP_COUNTERS_1_ALARM_MODE = 0x150,
+ NOCP_COUNTERS_1_VAL = 0x154,
+
+ NOCP_COUNTERS_2_SRC = 0x160,
+ NOCP_COUNTERS_2_ALARM_MODE = 0x164,
+ NOCP_COUNTERS_2_VAL = 0x168,
+
+ NOCP_COUNTERS_3_SRC = 0x174,
+ NOCP_COUNTERS_3_ALARM_MODE = 0x178,
+ NOCP_COUNTERS_3_VAL = 0x17C,
+};
+
+/* NOCP_MAIN_CTL register */
+#define NOCP_MAIN_CTL_ERREN_MASK BIT(0)
+#define NOCP_MAIN_CTL_TRACEEN_MASK BIT(1)
+#define NOCP_MAIN_CTL_PAYLOADEN_MASK BIT(2)
+#define NOCP_MAIN_CTL_STATEN_MASK BIT(3)
+#define NOCP_MAIN_CTL_ALARMEN_MASK BIT(4)
+#define NOCP_MAIN_CTL_STATCONDDUMP_MASK BIT(5)
+#define NOCP_MAIN_CTL_INTRUSIVEMODE_MASK BIT(6)
+
+/* NOCP_CFG_CTL register */
+#define NOCP_CFG_CTL_GLOBALEN_MASK BIT(0)
+#define NOCP_CFG_CTL_ACTIVE_MASK BIT(1)
+
+/* NOCP_COUNTERS_x_SRC register */
+#define NOCP_CNT_SRC_INTEVENT_SHIFT 0
+#define NOCP_CNT_SRC_INTEVENT_MASK (0x1F << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_OFF_MASK (0x0 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_CYCLE_MASK (0x1 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_IDLE_MASK (0x2 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_XFER_MASK (0x3 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_BUSY_MASK (0x4 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_WAIT_MASK (0x5 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_PKT_MASK (0x6 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_BYTE_MASK (0x8 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+#define NOCP_CNT_SRC_INTEVENT_CHAIN_MASK (0x10 << NOCP_CNT_SRC_INTEVENT_SHIFT)
+
+/* NOCP_COUNTERS_x_ALARM_MODE register */
+#define NOCP_CNT_ALARM_MODE_SHIFT 0
+#define NOCP_CNT_ALARM_MODE_MASK (0x3 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_OFF_MASK (0x0 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MIN_MASK (0x1 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MAX_MASK (0x2 << NOCP_CNT_ALARM_MODE_SHIFT)
+#define NOCP_CNT_ALARM_MODE_MIN_MAX_MASK (0x3 << NOCP_CNT_ALARM_MODE_SHIFT)
+
+#endif /* __EXYNOS_NOCP_H__ */
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
new file mode 100644
index 0000000..2363d0a
--- /dev/null
+++ b/drivers/devfreq/exynos-bus.c
@@ -0,0 +1,570 @@
+/*
+ * Generic Exynos Bus frequency driver with DEVFREQ Framework
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Author : Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This driver support Exynos Bus frequency feature by using
+ * DEVFREQ framework and is based on drivers/devfreq/exynos/exynos4_bus.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq-event.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pm_opp.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#define DEFAULT_SATURATION_RATIO 40
+#define DEFAULT_VOLTAGE_TOLERANCE 2
+
+struct exynos_bus {
+ struct device *dev;
+
+ struct devfreq *devfreq;
+ struct devfreq_event_dev **edev;
+ unsigned int edev_count;
+ struct mutex lock;
+
+ struct dev_pm_opp *curr_opp;
+
+ struct regulator *regulator;
+ struct clk *clk;
+ unsigned int voltage_tolerance;
+ unsigned int ratio;
+};
+
+/*
+ * Control the devfreq-event device to get the current state of bus
+ */
+#define exynos_bus_ops_edev(ops) \
+static int exynos_bus_##ops(struct exynos_bus *bus) \
+{ \
+ int i, ret; \
+ \
+ for (i = 0; i < bus->edev_count; i++) { \
+ if (!bus->edev[i]) \
+ continue; \
+ ret = devfreq_event_##ops(bus->edev[i]); \
+ if (ret < 0) \
+ return ret; \
+ } \
+ \
+ return 0; \
+}
+exynos_bus_ops_edev(enable_edev);
+exynos_bus_ops_edev(disable_edev);
+exynos_bus_ops_edev(set_event);
+
+static int exynos_bus_get_event(struct exynos_bus *bus,
+ struct devfreq_event_data *edata)
+{
+ struct devfreq_event_data event_data;
+ unsigned long load_count = 0, total_count = 0;
+ int i, ret = 0;
+
+ for (i = 0; i < bus->edev_count; i++) {
+ if (!bus->edev[i])
+ continue;
+
+ ret = devfreq_event_get_event(bus->edev[i], &event_data);
+ if (ret < 0)
+ return ret;
+
+ if (i == 0 || event_data.load_count > load_count) {
+ load_count = event_data.load_count;
+ total_count = event_data.total_count;
+ }
+ }
+
+ edata->load_count = load_count;
+ edata->total_count = total_count;
+
+ return ret;
+}
+
+/*
+ * Must necessary function for devfreq simple-ondemand governor
+ */
+static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+ struct dev_pm_opp *new_opp;
+ unsigned long old_freq, new_freq, old_volt, new_volt, tol;
+ int ret = 0;
+
+ /* Get new opp-bus instance according to new bus clock */
+ rcu_read_lock();
+ new_opp = devfreq_recommended_opp(dev, freq, flags);
+ if (IS_ERR(new_opp)) {
+ dev_err(dev, "failed to get recommended opp instance\n");
+ rcu_read_unlock();
+ return PTR_ERR(new_opp);
+ }
+
+ new_freq = dev_pm_opp_get_freq(new_opp);
+ new_volt = dev_pm_opp_get_voltage(new_opp);
+ old_freq = dev_pm_opp_get_freq(bus->curr_opp);
+ old_volt = dev_pm_opp_get_voltage(bus->curr_opp);
+ rcu_read_unlock();
+
+ if (old_freq == new_freq)
+ return 0;
+ tol = new_volt * bus->voltage_tolerance / 100;
+
+ /* Change voltage and frequency according to new OPP level */
+ mutex_lock(&bus->lock);
+
+ if (old_freq < new_freq) {
+ ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol);
+ if (ret < 0) {
+ dev_err(bus->dev, "failed to set voltage\n");
+ goto out;
+ }
+ }
+
+ ret = clk_set_rate(bus->clk, new_freq);
+ if (ret < 0) {
+ dev_err(dev, "failed to change clock of bus\n");
+ clk_set_rate(bus->clk, old_freq);
+ goto out;
+ }
+
+ if (old_freq > new_freq) {
+ ret = regulator_set_voltage_tol(bus->regulator, new_volt, tol);
+ if (ret < 0) {
+ dev_err(bus->dev, "failed to set voltage\n");
+ goto out;
+ }
+ }
+ bus->curr_opp = new_opp;
+
+ dev_dbg(dev, "Set the frequency of bus (%lukHz -> %lukHz)\n",
+ old_freq/1000, new_freq/1000);
+out:
+ mutex_unlock(&bus->lock);
+
+ return ret;
+}
+
+static int exynos_bus_get_dev_status(struct device *dev,
+ struct devfreq_dev_status *stat)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+ struct devfreq_event_data edata;
+ int ret;
+
+ rcu_read_lock();
+ stat->current_frequency = dev_pm_opp_get_freq(bus->curr_opp);
+ rcu_read_unlock();
+
+ ret = exynos_bus_get_event(bus, &edata);
+ if (ret < 0) {
+ stat->total_time = stat->busy_time = 0;
+ goto err;
+ }
+
+ stat->busy_time = (edata.load_count * 100) / bus->ratio;
+ stat->total_time = edata.total_count;
+
+ dev_dbg(dev, "Usage of devfreq-event : %lu/%lu\n", stat->busy_time,
+ stat->total_time);
+
+err:
+ ret = exynos_bus_set_event(bus);
+ if (ret < 0) {
+ dev_err(dev, "failed to set event to devfreq-event devices\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+static void exynos_bus_exit(struct device *dev)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+ int ret;
+
+ ret = exynos_bus_disable_edev(bus);
+ if (ret < 0)
+ dev_warn(dev, "failed to disable the devfreq-event devices\n");
+
+ if (bus->regulator)
+ regulator_disable(bus->regulator);
+
+ dev_pm_opp_of_remove_table(dev);
+ clk_disable_unprepare(bus->clk);
+}
+
+/*
+ * Must necessary function for devfreq passive governor
+ */
+static int exynos_bus_passive_target(struct device *dev, unsigned long *freq,
+ u32 flags)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+ struct dev_pm_opp *new_opp;
+ unsigned long old_freq, new_freq;
+ int ret = 0;
+
+ /* Get new opp-bus instance according to new bus clock */
+ rcu_read_lock();
+ new_opp = devfreq_recommended_opp(dev, freq, flags);
+ if (IS_ERR(new_opp)) {
+ dev_err(dev, "failed to get recommended opp instance\n");
+ rcu_read_unlock();
+ return PTR_ERR(new_opp);
+ }
+
+ new_freq = dev_pm_opp_get_freq(new_opp);
+ old_freq = dev_pm_opp_get_freq(bus->curr_opp);
+ rcu_read_unlock();
+
+ if (old_freq == new_freq)
+ return 0;
+
+ /* Change the frequency according to new OPP level */
+ mutex_lock(&bus->lock);
+
+ ret = clk_set_rate(bus->clk, new_freq);
+ if (ret < 0) {
+ dev_err(dev, "failed to set the clock of bus\n");
+ goto out;
+ }
+
+ *freq = new_freq;
+ bus->curr_opp = new_opp;
+
+ dev_dbg(dev, "Set the frequency of bus (%lukHz -> %lukHz)\n",
+ old_freq/1000, new_freq/1000);
+out:
+ mutex_unlock(&bus->lock);
+
+ return ret;
+}
+
+static void exynos_bus_passive_exit(struct device *dev)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+
+ dev_pm_opp_of_remove_table(dev);
+ clk_disable_unprepare(bus->clk);
+}
+
+static int exynos_bus_parent_parse_of(struct device_node *np,
+ struct exynos_bus *bus)
+{
+ struct device *dev = bus->dev;
+ int i, ret, count, size;
+
+ /* Get the regulator to provide each bus with the power */
+ bus->regulator = devm_regulator_get(dev, "vdd");
+ if (IS_ERR(bus->regulator)) {
+ dev_err(dev, "failed to get VDD regulator\n");
+ return PTR_ERR(bus->regulator);
+ }
+
+ ret = regulator_enable(bus->regulator);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable VDD regulator\n");
+ return ret;
+ }
+
+ /*
+ * Get the devfreq-event devices to get the current utilization of
+ * buses. This raw data will be used in devfreq ondemand governor.
+ */
+ count = devfreq_event_get_edev_count(dev);
+ if (count < 0) {
+ dev_err(dev, "failed to get the count of devfreq-event dev\n");
+ ret = count;
+ goto err_regulator;
+ }
+ bus->edev_count = count;
+
+ size = sizeof(*bus->edev) * count;
+ bus->edev = devm_kzalloc(dev, size, GFP_KERNEL);
+ if (!bus->edev) {
+ ret = -ENOMEM;
+ goto err_regulator;
+ }
+
+ for (i = 0; i < count; i++) {
+ bus->edev[i] = devfreq_event_get_edev_by_phandle(dev, i);
+ if (IS_ERR(bus->edev[i])) {
+ ret = -EPROBE_DEFER;
+ goto err_regulator;
+ }
+ }
+
+ /*
+ * Optionally, Get the saturation ratio according to Exynos SoC
+ * When measuring the utilization of each AXI bus with devfreq-event
+ * devices, the measured real cycle might be much lower than the
+ * total cycle of bus during sampling rate. In result, the devfreq
+ * simple-ondemand governor might not decide to change the current
+ * frequency due to too utilization (= real cycle/total cycle).
+ * So, this property is used to adjust the utilization when calculating
+ * the busy_time in exynos_bus_get_dev_status().
+ */
+ if (of_property_read_u32(np, "exynos,saturation-ratio", &bus->ratio))
+ bus->ratio = DEFAULT_SATURATION_RATIO;
+
+ if (of_property_read_u32(np, "exynos,voltage-tolerance",
+ &bus->voltage_tolerance))
+ bus->voltage_tolerance = DEFAULT_VOLTAGE_TOLERANCE;
+
+ return 0;
+
+err_regulator:
+ regulator_disable(bus->regulator);
+
+ return ret;
+}
+
+static int exynos_bus_parse_of(struct device_node *np,
+ struct exynos_bus *bus)
+{
+ struct device *dev = bus->dev;
+ unsigned long rate;
+ int ret;
+
+ /* Get the clock to provide each bus with source clock */
+ bus->clk = devm_clk_get(dev, "bus");
+ if (IS_ERR(bus->clk)) {
+ dev_err(dev, "failed to get bus clock\n");
+ return PTR_ERR(bus->clk);
+ }
+
+ ret = clk_prepare_enable(bus->clk);
+ if (ret < 0) {
+ dev_err(dev, "failed to get enable clock\n");
+ return ret;
+ }
+
+ /* Get the freq and voltage from OPP table to scale the bus freq */
+ rcu_read_lock();
+ ret = dev_pm_opp_of_add_table(dev);
+ if (ret < 0) {
+ dev_err(dev, "failed to get OPP table\n");
+ rcu_read_unlock();
+ goto err_clk;
+ }
+
+ rate = clk_get_rate(bus->clk);
+ bus->curr_opp = devfreq_recommended_opp(dev, &rate, 0);
+ if (IS_ERR(bus->curr_opp)) {
+ dev_err(dev, "failed to find dev_pm_opp\n");
+ rcu_read_unlock();
+ ret = PTR_ERR(bus->curr_opp);
+ goto err_opp;
+ }
+ rcu_read_unlock();
+
+ return 0;
+
+err_opp:
+ dev_pm_opp_of_remove_table(dev);
+err_clk:
+ clk_disable_unprepare(bus->clk);
+
+ return ret;
+}
+
+static int exynos_bus_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct devfreq_dev_profile *profile;
+ struct devfreq_simple_ondemand_data *ondemand_data;
+ struct devfreq_passive_data *passive_data;
+ struct devfreq *parent_devfreq;
+ struct exynos_bus *bus;
+ int ret, max_state;
+ unsigned long min_freq, max_freq;
+
+ if (!np) {
+ dev_err(dev, "failed to find devicetree node\n");
+ return -EINVAL;
+ }
+
+ bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+ if (!bus)
+ return -ENOMEM;
+ mutex_init(&bus->lock);
+ bus->dev = &pdev->dev;
+ platform_set_drvdata(pdev, bus);
+
+ /* Parse the device-tree to get the resource information */
+ ret = exynos_bus_parse_of(np, bus);
+ if (ret < 0)
+ goto err;
+
+ profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
+ if (!profile) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (of_parse_phandle(dev->of_node, "devfreq", 0))
+ goto passive;
+ else
+ ret = exynos_bus_parent_parse_of(np, bus);
+
+ if (ret < 0)
+ goto err;
+
+ /* Initialize the struct profile and governor data for parent device */
+ profile->polling_ms = 50;
+ profile->target = exynos_bus_target;
+ profile->get_dev_status = exynos_bus_get_dev_status;
+ profile->exit = exynos_bus_exit;
+
+ ondemand_data = devm_kzalloc(dev, sizeof(*ondemand_data), GFP_KERNEL);
+ if (!ondemand_data) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ ondemand_data->upthreshold = 40;
+ ondemand_data->downdifferential = 5;
+
+ /* Add devfreq device to monitor and handle the exynos bus */
+ bus->devfreq = devm_devfreq_add_device(dev, profile, "simple_ondemand",
+ ondemand_data);
+ if (IS_ERR(bus->devfreq)) {
+ dev_err(dev, "failed to add devfreq device\n");
+ ret = PTR_ERR(bus->devfreq);
+ goto err;
+ }
+
+ /* Register opp_notifier to catch the change of OPP */
+ ret = devm_devfreq_register_opp_notifier(dev, bus->devfreq);
+ if (ret < 0) {
+ dev_err(dev, "failed to register opp notifier\n");
+ goto err;
+ }
+
+ /*
+ * Enable devfreq-event to get raw data which is used to determine
+ * current bus load.
+ */
+ ret = exynos_bus_enable_edev(bus);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable devfreq-event devices\n");
+ goto err;
+ }
+
+ ret = exynos_bus_set_event(bus);
+ if (ret < 0) {
+ dev_err(dev, "failed to set event to devfreq-event devices\n");
+ goto err;
+ }
+
+ goto out;
+passive:
+ /* Initialize the struct profile and governor data for passive device */
+ profile->target = exynos_bus_passive_target;
+ profile->exit = exynos_bus_passive_exit;
+
+ /* Get the instance of parent devfreq device */
+ parent_devfreq = devfreq_get_devfreq_by_phandle(dev, 0);
+ if (IS_ERR(parent_devfreq)) {
+ ret = -EPROBE_DEFER;
+ goto err;
+ }
+
+ passive_data = devm_kzalloc(dev, sizeof(*passive_data), GFP_KERNEL);
+ if (!passive_data) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ passive_data->parent = parent_devfreq;
+
+ /* Add devfreq device for exynos bus with passive governor */
+ bus->devfreq = devm_devfreq_add_device(dev, profile, "passive",
+ passive_data);
+ if (IS_ERR(bus->devfreq)) {
+ dev_err(dev,
+ "failed to add devfreq dev with passive governor\n");
+ ret = -EPROBE_DEFER;
+ goto err;
+ }
+
+out:
+ max_state = bus->devfreq->profile->max_state;
+ min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
+ max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
+ pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
+ dev_name(dev), min_freq, max_freq);
+
+ return 0;
+
+err:
+ dev_pm_opp_of_remove_table(dev);
+ clk_disable_unprepare(bus->clk);
+
+ return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int exynos_bus_resume(struct device *dev)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+ int ret;
+
+ ret = exynos_bus_enable_edev(bus);
+ if (ret < 0) {
+ dev_err(dev, "failed to enable the devfreq-event devices\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int exynos_bus_suspend(struct device *dev)
+{
+ struct exynos_bus *bus = dev_get_drvdata(dev);
+ int ret;
+
+ ret = exynos_bus_disable_edev(bus);
+ if (ret < 0) {
+ dev_err(dev, "failed to disable the devfreq-event devices\n");
+ return ret;
+ }
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_bus_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(exynos_bus_suspend, exynos_bus_resume)
+};
+
+static const struct of_device_id exynos_bus_of_match[] = {
+ { .compatible = "samsung,exynos-bus", },
+ { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, exynos_bus_of_match);
+
+static struct platform_driver exynos_bus_platdrv = {
+ .probe = exynos_bus_probe,
+ .driver = {
+ .name = "exynos-bus",
+ .pm = &exynos_bus_pm,
+ .of_match_table = of_match_ptr(exynos_bus_of_match),
+ },
+};
+module_platform_driver(exynos_bus_platdrv);
+
+MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/exynos/Makefile b/drivers/devfreq/exynos/Makefile
deleted file mode 100644
index 49bc917..0000000
--- a/drivers/devfreq/exynos/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# Exynos DEVFREQ Drivers
-obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ) += exynos_ppmu.o exynos4_bus.o
-obj-$(CONFIG_ARM_EXYNOS5_BUS_DEVFREQ) += exynos_ppmu.o exynos5_bus.o
diff --git a/drivers/devfreq/exynos/exynos4_bus.c b/drivers/devfreq/exynos/exynos4_bus.c
deleted file mode 100644
index da95092..0000000
--- a/drivers/devfreq/exynos/exynos4_bus.c
+++ /dev/null
@@ -1,1055 +0,0 @@
-/* drivers/devfreq/exynos4210_memorybus.c
- *
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- * http://www.samsung.com/
- * MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * EXYNOS4 - Memory/Bus clock frequency scaling support in DEVFREQ framework
- * This version supports EXYNOS4210 only. This changes bus frequencies
- * and vddint voltages. Exynos4412/4212 should be able to be supported
- * with minor modifications.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/suspend.h>
-#include <linux/pm_opp.h>
-#include <linux/devfreq.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/module.h>
-
-#include <mach/map.h>
-
-#include "exynos_ppmu.h"
-#include "exynos4_bus.h"
-
-#define MAX_SAFEVOLT 1200000 /* 1.2V */
-
-enum exynos4_busf_type {
- TYPE_BUSF_EXYNOS4210,
- TYPE_BUSF_EXYNOS4x12,
-};
-
-/* Assume that the bus is saturated if the utilization is 40% */
-#define BUS_SATURATION_RATIO 40
-
-enum busclk_level_idx {
- LV_0 = 0,
- LV_1,
- LV_2,
- LV_3,
- LV_4,
- _LV_END
-};
-
-enum exynos_ppmu_idx {
- PPMU_DMC0,
- PPMU_DMC1,
- PPMU_END,
-};
-
-#define EX4210_LV_MAX LV_2
-#define EX4x12_LV_MAX LV_4
-#define EX4210_LV_NUM (LV_2 + 1)
-#define EX4x12_LV_NUM (LV_4 + 1)
-
-/**
- * struct busfreq_opp_info - opp information for bus
- * @rate: Frequency in hertz
- * @volt: Voltage in microvolts corresponding to this OPP
- */
-struct busfreq_opp_info {
- unsigned long rate;
- unsigned long volt;
-};
-
-struct busfreq_data {
- enum exynos4_busf_type type;
- struct device *dev;
- struct devfreq *devfreq;
- bool disabled;
- struct regulator *vdd_int;
- struct regulator *vdd_mif; /* Exynos4412/4212 only */
- struct busfreq_opp_info curr_oppinfo;
- struct busfreq_ppmu_data ppmu_data;
-
- struct notifier_block pm_notifier;
- struct mutex lock;
-
- /* Dividers calculated at boot/probe-time */
- unsigned int dmc_divtable[_LV_END]; /* DMC0 */
- unsigned int top_divtable[_LV_END];
-};
-
-/* 4210 controls clock of mif and voltage of int */
-static struct bus_opp_table exynos4210_busclk_table[] = {
- {LV_0, 400000, 1150000},
- {LV_1, 267000, 1050000},
- {LV_2, 133000, 1025000},
- {0, 0, 0},
-};
-
-/*
- * MIF is the main control knob clock for Exynos4x12 MIF/INT
- * clock and voltage of both mif/int are controlled.
- */
-static struct bus_opp_table exynos4x12_mifclk_table[] = {
- {LV_0, 400000, 1100000},
- {LV_1, 267000, 1000000},
- {LV_2, 160000, 950000},
- {LV_3, 133000, 950000},
- {LV_4, 100000, 950000},
- {0, 0, 0},
-};
-
-/*
- * INT is not the control knob of 4x12. LV_x is not meant to represent
- * the current performance. (MIF does)
- */
-static struct bus_opp_table exynos4x12_intclk_table[] = {
- {LV_0, 200000, 1000000},
- {LV_1, 160000, 950000},
- {LV_2, 133000, 925000},
- {LV_3, 100000, 900000},
- {0, 0, 0},
-};
-
-/* TODO: asv volt definitions are "__initdata"? */
-/* Some chips have different operating voltages */
-static unsigned int exynos4210_asv_volt[][EX4210_LV_NUM] = {
- {1150000, 1050000, 1050000},
- {1125000, 1025000, 1025000},
- {1100000, 1000000, 1000000},
- {1075000, 975000, 975000},
- {1050000, 950000, 950000},
-};
-
-static unsigned int exynos4x12_mif_step_50[][EX4x12_LV_NUM] = {
- /* 400 267 160 133 100 */
- {1050000, 950000, 900000, 900000, 900000}, /* ASV0 */
- {1050000, 950000, 900000, 900000, 900000}, /* ASV1 */
- {1050000, 950000, 900000, 900000, 900000}, /* ASV2 */
- {1050000, 900000, 900000, 900000, 900000}, /* ASV3 */
- {1050000, 900000, 900000, 900000, 850000}, /* ASV4 */
- {1050000, 900000, 900000, 850000, 850000}, /* ASV5 */
- {1050000, 900000, 850000, 850000, 850000}, /* ASV6 */
- {1050000, 900000, 850000, 850000, 850000}, /* ASV7 */
- {1050000, 900000, 850000, 850000, 850000}, /* ASV8 */
-};
-
-static unsigned int exynos4x12_int_volt[][EX4x12_LV_NUM] = {
- /* 200 160 133 100 */
- {1000000, 950000, 925000, 900000}, /* ASV0 */
- {975000, 925000, 925000, 900000}, /* ASV1 */
- {950000, 925000, 900000, 875000}, /* ASV2 */
- {950000, 900000, 900000, 875000}, /* ASV3 */
- {925000, 875000, 875000, 875000}, /* ASV4 */
- {900000, 850000, 850000, 850000}, /* ASV5 */
- {900000, 850000, 850000, 850000}, /* ASV6 */
- {900000, 850000, 850000, 850000}, /* ASV7 */
- {900000, 850000, 850000, 850000}, /* ASV8 */
-};
-
-/*** Clock Divider Data for Exynos4210 ***/
-static unsigned int exynos4210_clkdiv_dmc0[][8] = {
- /*
- * Clock divider value for following
- * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD
- * DIVDMCP, DIVCOPY2, DIVCORE_TIMERS }
- */
-
- /* DMC L0: 400MHz */
- { 3, 1, 1, 1, 1, 1, 3, 1 },
- /* DMC L1: 266.7MHz */
- { 4, 1, 1, 2, 1, 1, 3, 1 },
- /* DMC L2: 133MHz */
- { 5, 1, 1, 5, 1, 1, 3, 1 },
-};
-static unsigned int exynos4210_clkdiv_top[][5] = {
- /*
- * Clock divider value for following
- * { DIVACLK200, DIVACLK100, DIVACLK160, DIVACLK133, DIVONENAND }
- */
- /* ACLK200 L0: 200MHz */
- { 3, 7, 4, 5, 1 },
- /* ACLK200 L1: 160MHz */
- { 4, 7, 5, 6, 1 },
- /* ACLK200 L2: 133MHz */
- { 5, 7, 7, 7, 1 },
-};
-static unsigned int exynos4210_clkdiv_lr_bus[][2] = {
- /*
- * Clock divider value for following
- * { DIVGDL/R, DIVGPL/R }
- */
- /* ACLK_GDL/R L1: 200MHz */
- { 3, 1 },
- /* ACLK_GDL/R L2: 160MHz */
- { 4, 1 },
- /* ACLK_GDL/R L3: 133MHz */
- { 5, 1 },
-};
-
-/*** Clock Divider Data for Exynos4212/4412 ***/
-static unsigned int exynos4x12_clkdiv_dmc0[][6] = {
- /*
- * Clock divider value for following
- * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD
- * DIVDMCP}
- */
-
- /* DMC L0: 400MHz */
- {3, 1, 1, 1, 1, 1},
- /* DMC L1: 266.7MHz */
- {4, 1, 1, 2, 1, 1},
- /* DMC L2: 160MHz */
- {5, 1, 1, 4, 1, 1},
- /* DMC L3: 133MHz */
- {5, 1, 1, 5, 1, 1},
- /* DMC L4: 100MHz */
- {7, 1, 1, 7, 1, 1},
-};
-static unsigned int exynos4x12_clkdiv_dmc1[][6] = {
- /*
- * Clock divider value for following
- * { G2DACP, DIVC2C, DIVC2C_ACLK }
- */
-
- /* DMC L0: 400MHz */
- {3, 1, 1},
- /* DMC L1: 266.7MHz */
- {4, 2, 1},
- /* DMC L2: 160MHz */
- {5, 4, 1},
- /* DMC L3: 133MHz */
- {5, 5, 1},
- /* DMC L4: 100MHz */
- {7, 7, 1},
-};
-static unsigned int exynos4x12_clkdiv_top[][5] = {
- /*
- * Clock divider value for following
- * { DIVACLK266_GPS, DIVACLK100, DIVACLK160,
- DIVACLK133, DIVONENAND }
- */
-
- /* ACLK_GDL/R L0: 200MHz */
- {2, 7, 4, 5, 1},
- /* ACLK_GDL/R L1: 200MHz */
- {2, 7, 4, 5, 1},
- /* ACLK_GDL/R L2: 160MHz */
- {4, 7, 5, 7, 1},
- /* ACLK_GDL/R L3: 133MHz */
- {4, 7, 5, 7, 1},
- /* ACLK_GDL/R L4: 100MHz */
- {7, 7, 7, 7, 1},
-};
-static unsigned int exynos4x12_clkdiv_lr_bus[][2] = {
- /*
- * Clock divider value for following
- * { DIVGDL/R, DIVGPL/R }
- */
-
- /* ACLK_GDL/R L0: 200MHz */
- {3, 1},
- /* ACLK_GDL/R L1: 200MHz */
- {3, 1},
- /* ACLK_GDL/R L2: 160MHz */
- {4, 1},
- /* ACLK_GDL/R L3: 133MHz */
- {5, 1},
- /* ACLK_GDL/R L4: 100MHz */
- {7, 1},
-};
-static unsigned int exynos4x12_clkdiv_sclkip[][3] = {
- /*
- * Clock divider value for following
- * { DIVMFC, DIVJPEG, DIVFIMC0~3}
- */
-
- /* SCLK_MFC: 200MHz */
- {3, 3, 4},
- /* SCLK_MFC: 200MHz */
- {3, 3, 4},
- /* SCLK_MFC: 160MHz */
- {4, 4, 5},
- /* SCLK_MFC: 133MHz */
- {5, 5, 5},
- /* SCLK_MFC: 100MHz */
- {7, 7, 7},
-};
-
-
-static int exynos4210_set_busclk(struct busfreq_data *data,
- struct busfreq_opp_info *oppi)
-{
- unsigned int index;
- unsigned int tmp;
-
- for (index = LV_0; index < EX4210_LV_NUM; index++)
- if (oppi->rate == exynos4210_busclk_table[index].clk)
- break;
-
- if (index == EX4210_LV_NUM)
- return -EINVAL;
-
- /* Change Divider - DMC0 */
- tmp = data->dmc_divtable[index];
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_DMC0);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC0);
- } while (tmp & 0x11111111);
-
- /* Change Divider - TOP */
- tmp = data->top_divtable[index];
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_TOP);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_TOP);
- } while (tmp & 0x11111);
-
- /* Change Divider - LEFTBUS */
- tmp = __raw_readl(EXYNOS4_CLKDIV_LEFTBUS);
-
- tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
- tmp |= ((exynos4210_clkdiv_lr_bus[index][0] <<
- EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
- (exynos4210_clkdiv_lr_bus[index][1] <<
- EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_LEFTBUS);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_LEFTBUS);
- } while (tmp & 0x11);
-
- /* Change Divider - RIGHTBUS */
- tmp = __raw_readl(EXYNOS4_CLKDIV_RIGHTBUS);
-
- tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
- tmp |= ((exynos4210_clkdiv_lr_bus[index][0] <<
- EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
- (exynos4210_clkdiv_lr_bus[index][1] <<
- EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_RIGHTBUS);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_RIGHTBUS);
- } while (tmp & 0x11);
-
- return 0;
-}
-
-static int exynos4x12_set_busclk(struct busfreq_data *data,
- struct busfreq_opp_info *oppi)
-{
- unsigned int index;
- unsigned int tmp;
-
- for (index = LV_0; index < EX4x12_LV_NUM; index++)
- if (oppi->rate == exynos4x12_mifclk_table[index].clk)
- break;
-
- if (index == EX4x12_LV_NUM)
- return -EINVAL;
-
- /* Change Divider - DMC0 */
- tmp = data->dmc_divtable[index];
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_DMC0);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC0);
- } while (tmp & 0x11111111);
-
- /* Change Divider - DMC1 */
- tmp = __raw_readl(EXYNOS4_CLKDIV_DMC1);
-
- tmp &= ~(EXYNOS4_CLKDIV_DMC1_G2D_ACP_MASK |
- EXYNOS4_CLKDIV_DMC1_C2C_MASK |
- EXYNOS4_CLKDIV_DMC1_C2CACLK_MASK);
-
- tmp |= ((exynos4x12_clkdiv_dmc1[index][0] <<
- EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT) |
- (exynos4x12_clkdiv_dmc1[index][1] <<
- EXYNOS4_CLKDIV_DMC1_C2C_SHIFT) |
- (exynos4x12_clkdiv_dmc1[index][2] <<
- EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_DMC1);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_DMC1);
- } while (tmp & 0x111111);
-
- /* Change Divider - TOP */
- tmp = __raw_readl(EXYNOS4_CLKDIV_TOP);
-
- tmp &= ~(EXYNOS4_CLKDIV_TOP_ACLK266_GPS_MASK |
- EXYNOS4_CLKDIV_TOP_ACLK100_MASK |
- EXYNOS4_CLKDIV_TOP_ACLK160_MASK |
- EXYNOS4_CLKDIV_TOP_ACLK133_MASK |
- EXYNOS4_CLKDIV_TOP_ONENAND_MASK);
-
- tmp |= ((exynos4x12_clkdiv_top[index][0] <<
- EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT) |
- (exynos4x12_clkdiv_top[index][1] <<
- EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT) |
- (exynos4x12_clkdiv_top[index][2] <<
- EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT) |
- (exynos4x12_clkdiv_top[index][3] <<
- EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT) |
- (exynos4x12_clkdiv_top[index][4] <<
- EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_TOP);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_TOP);
- } while (tmp & 0x11111);
-
- /* Change Divider - LEFTBUS */
- tmp = __raw_readl(EXYNOS4_CLKDIV_LEFTBUS);
-
- tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
- tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] <<
- EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
- (exynos4x12_clkdiv_lr_bus[index][1] <<
- EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_LEFTBUS);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_LEFTBUS);
- } while (tmp & 0x11);
-
- /* Change Divider - RIGHTBUS */
- tmp = __raw_readl(EXYNOS4_CLKDIV_RIGHTBUS);
-
- tmp &= ~(EXYNOS4_CLKDIV_BUS_GDLR_MASK | EXYNOS4_CLKDIV_BUS_GPLR_MASK);
-
- tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] <<
- EXYNOS4_CLKDIV_BUS_GDLR_SHIFT) |
- (exynos4x12_clkdiv_lr_bus[index][1] <<
- EXYNOS4_CLKDIV_BUS_GPLR_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_RIGHTBUS);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_RIGHTBUS);
- } while (tmp & 0x11);
-
- /* Change Divider - MFC */
- tmp = __raw_readl(EXYNOS4_CLKDIV_MFC);
-
- tmp &= ~(EXYNOS4_CLKDIV_MFC_MASK);
-
- tmp |= ((exynos4x12_clkdiv_sclkip[index][0] <<
- EXYNOS4_CLKDIV_MFC_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_MFC);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_MFC);
- } while (tmp & 0x1);
-
- /* Change Divider - JPEG */
- tmp = __raw_readl(EXYNOS4_CLKDIV_CAM1);
-
- tmp &= ~(EXYNOS4_CLKDIV_CAM1_JPEG_MASK);
-
- tmp |= ((exynos4x12_clkdiv_sclkip[index][1] <<
- EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_CAM1);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_CAM1);
- } while (tmp & 0x1);
-
- /* Change Divider - FIMC0~3 */
- tmp = __raw_readl(EXYNOS4_CLKDIV_CAM);
-
- tmp &= ~(EXYNOS4_CLKDIV_CAM_FIMC0_MASK | EXYNOS4_CLKDIV_CAM_FIMC1_MASK |
- EXYNOS4_CLKDIV_CAM_FIMC2_MASK | EXYNOS4_CLKDIV_CAM_FIMC3_MASK);
-
- tmp |= ((exynos4x12_clkdiv_sclkip[index][2] <<
- EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT) |
- (exynos4x12_clkdiv_sclkip[index][2] <<
- EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT) |
- (exynos4x12_clkdiv_sclkip[index][2] <<
- EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT) |
- (exynos4x12_clkdiv_sclkip[index][2] <<
- EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT));
-
- __raw_writel(tmp, EXYNOS4_CLKDIV_CAM);
-
- do {
- tmp = __raw_readl(EXYNOS4_CLKDIV_STAT_CAM1);
- } while (tmp & 0x1111);
-
- return 0;
-}
-
-static int exynos4x12_get_intspec(unsigned long mifclk)
-{
- int i = 0;
-
- while (exynos4x12_intclk_table[i].clk) {
- if (exynos4x12_intclk_table[i].clk <= mifclk)
- return i;
- i++;
- }
-
- return -EINVAL;
-}
-
-static int exynos4_bus_setvolt(struct busfreq_data *data,
- struct busfreq_opp_info *oppi,
- struct busfreq_opp_info *oldoppi)
-{
- int err = 0, tmp;
- unsigned long volt = oppi->volt;
-
- switch (data->type) {
- case TYPE_BUSF_EXYNOS4210:
- /* OPP represents DMC clock + INT voltage */
- err = regulator_set_voltage(data->vdd_int, volt,
- MAX_SAFEVOLT);
- break;
- case TYPE_BUSF_EXYNOS4x12:
- /* OPP represents MIF clock + MIF voltage */
- err = regulator_set_voltage(data->vdd_mif, volt,
- MAX_SAFEVOLT);
- if (err)
- break;
-
- tmp = exynos4x12_get_intspec(oppi->rate);
- if (tmp < 0) {
- err = tmp;
- regulator_set_voltage(data->vdd_mif,
- oldoppi->volt,
- MAX_SAFEVOLT);
- break;
- }
- err = regulator_set_voltage(data->vdd_int,
- exynos4x12_intclk_table[tmp].volt,
- MAX_SAFEVOLT);
- /* Try to recover */
- if (err)
- regulator_set_voltage(data->vdd_mif,
- oldoppi->volt,
- MAX_SAFEVOLT);
- break;
- default:
- err = -EINVAL;
- }
-
- return err;
-}
-
-static int exynos4_bus_target(struct device *dev, unsigned long *_freq,
- u32 flags)
-{
- int err = 0;
- struct platform_device *pdev = container_of(dev, struct platform_device,
- dev);
- struct busfreq_data *data = platform_get_drvdata(pdev);
- struct dev_pm_opp *opp;
- unsigned long freq;
- unsigned long old_freq = data->curr_oppinfo.rate;
- struct busfreq_opp_info new_oppinfo;
-
- rcu_read_lock();
- opp = devfreq_recommended_opp(dev, _freq, flags);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- return PTR_ERR(opp);
- }
- new_oppinfo.rate = dev_pm_opp_get_freq(opp);
- new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
- freq = new_oppinfo.rate;
-
- if (old_freq == freq)
- return 0;
-
- dev_dbg(dev, "targeting %lukHz %luuV\n", freq, new_oppinfo.volt);
-
- mutex_lock(&data->lock);
-
- if (data->disabled)
- goto out;
-
- if (old_freq < freq)
- err = exynos4_bus_setvolt(data, &new_oppinfo,
- &data->curr_oppinfo);
- if (err)
- goto out;
-
- if (old_freq != freq) {
- switch (data->type) {
- case TYPE_BUSF_EXYNOS4210:
- err = exynos4210_set_busclk(data, &new_oppinfo);
- break;
- case TYPE_BUSF_EXYNOS4x12:
- err = exynos4x12_set_busclk(data, &new_oppinfo);
- break;
- default:
- err = -EINVAL;
- }
- }
- if (err)
- goto out;
-
- if (old_freq > freq)
- err = exynos4_bus_setvolt(data, &new_oppinfo,
- &data->curr_oppinfo);
- if (err)
- goto out;
-
- data->curr_oppinfo = new_oppinfo;
-out:
- mutex_unlock(&data->lock);
- return err;
-}
-
-static int exynos4_bus_get_dev_status(struct device *dev,
- struct devfreq_dev_status *stat)
-{
- struct busfreq_data *data = dev_get_drvdata(dev);
- struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
- int busier;
-
- exynos_read_ppmu(ppmu_data);
- busier = exynos_get_busier_ppmu(ppmu_data);
- stat->current_frequency = data->curr_oppinfo.rate;
-
- /* Number of cycles spent on memory access */
- stat->busy_time = ppmu_data->ppmu[busier].count[PPMU_PMNCNT3];
- stat->busy_time *= 100 / BUS_SATURATION_RATIO;
- stat->total_time = ppmu_data->ppmu[busier].ccnt;
-
- /* If the counters have overflown, retry */
- if (ppmu_data->ppmu[busier].ccnt_overflow ||
- ppmu_data->ppmu[busier].count_overflow[0])
- return -EAGAIN;
-
- return 0;
-}
-
-static struct devfreq_dev_profile exynos4_devfreq_profile = {
- .initial_freq = 400000,
- .polling_ms = 50,
- .target = exynos4_bus_target,
- .get_dev_status = exynos4_bus_get_dev_status,
-};
-
-static int exynos4210_init_tables(struct busfreq_data *data)
-{
- u32 tmp;
- int mgrp;
- int i, err = 0;
-
- tmp = __raw_readl(EXYNOS4_CLKDIV_DMC0);
- for (i = LV_0; i < EX4210_LV_NUM; i++) {
- tmp &= ~(EXYNOS4_CLKDIV_DMC0_ACP_MASK |
- EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK |
- EXYNOS4_CLKDIV_DMC0_DPHY_MASK |
- EXYNOS4_CLKDIV_DMC0_DMC_MASK |
- EXYNOS4_CLKDIV_DMC0_DMCD_MASK |
- EXYNOS4_CLKDIV_DMC0_DMCP_MASK |
- EXYNOS4_CLKDIV_DMC0_COPY2_MASK |
- EXYNOS4_CLKDIV_DMC0_CORETI_MASK);
-
- tmp |= ((exynos4210_clkdiv_dmc0[i][0] <<
- EXYNOS4_CLKDIV_DMC0_ACP_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][1] <<
- EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][2] <<
- EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][3] <<
- EXYNOS4_CLKDIV_DMC0_DMC_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][4] <<
- EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][5] <<
- EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][6] <<
- EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT) |
- (exynos4210_clkdiv_dmc0[i][7] <<
- EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT));
-
- data->dmc_divtable[i] = tmp;
- }
-
- tmp = __raw_readl(EXYNOS4_CLKDIV_TOP);
- for (i = LV_0; i < EX4210_LV_NUM; i++) {
- tmp &= ~(EXYNOS4_CLKDIV_TOP_ACLK200_MASK |
- EXYNOS4_CLKDIV_TOP_ACLK100_MASK |
- EXYNOS4_CLKDIV_TOP_ACLK160_MASK |
- EXYNOS4_CLKDIV_TOP_ACLK133_MASK |
- EXYNOS4_CLKDIV_TOP_ONENAND_MASK);
-
- tmp |= ((exynos4210_clkdiv_top[i][0] <<
- EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT) |
- (exynos4210_clkdiv_top[i][1] <<
- EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT) |
- (exynos4210_clkdiv_top[i][2] <<
- EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT) |
- (exynos4210_clkdiv_top[i][3] <<
- EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT) |
- (exynos4210_clkdiv_top[i][4] <<
- EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT));
-
- data->top_divtable[i] = tmp;
- }
-
- /*
- * TODO: init tmp based on busfreq_data
- * (device-tree or platform-data)
- */
- tmp = 0; /* Max voltages for the reliability of the unknown */
-
- pr_debug("ASV Group of Exynos4 is %d\n", tmp);
- /* Use merged grouping for voltage */
- switch (tmp) {
- case 0:
- mgrp = 0;
- break;
- case 1:
- case 2:
- mgrp = 1;
- break;
- case 3:
- case 4:
- mgrp = 2;
- break;
- case 5:
- case 6:
- mgrp = 3;
- break;
- case 7:
- mgrp = 4;
- break;
- default:
- pr_warn("Unknown ASV Group. Use max voltage.\n");
- mgrp = 0;
- }
-
- for (i = LV_0; i < EX4210_LV_NUM; i++)
- exynos4210_busclk_table[i].volt = exynos4210_asv_volt[mgrp][i];
-
- for (i = LV_0; i < EX4210_LV_NUM; i++) {
- err = dev_pm_opp_add(data->dev, exynos4210_busclk_table[i].clk,
- exynos4210_busclk_table[i].volt);
- if (err) {
- dev_err(data->dev, "Cannot add opp entries.\n");
- return err;
- }
- }
-
-
- return 0;
-}
-
-static int exynos4x12_init_tables(struct busfreq_data *data)
-{
- unsigned int i;
- unsigned int tmp;
- int ret;
-
- /* Enable pause function for DREX2 DVFS */
- tmp = __raw_readl(EXYNOS4_DMC_PAUSE_CTRL);
- tmp |= EXYNOS4_DMC_PAUSE_ENABLE;
- __raw_writel(tmp, EXYNOS4_DMC_PAUSE_CTRL);
-
- tmp = __raw_readl(EXYNOS4_CLKDIV_DMC0);
-
- for (i = 0; i < EX4x12_LV_NUM; i++) {
- tmp &= ~(EXYNOS4_CLKDIV_DMC0_ACP_MASK |
- EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK |
- EXYNOS4_CLKDIV_DMC0_DPHY_MASK |
- EXYNOS4_CLKDIV_DMC0_DMC_MASK |
- EXYNOS4_CLKDIV_DMC0_DMCD_MASK |
- EXYNOS4_CLKDIV_DMC0_DMCP_MASK);
-
- tmp |= ((exynos4x12_clkdiv_dmc0[i][0] <<
- EXYNOS4_CLKDIV_DMC0_ACP_SHIFT) |
- (exynos4x12_clkdiv_dmc0[i][1] <<
- EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT) |
- (exynos4x12_clkdiv_dmc0[i][2] <<
- EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT) |
- (exynos4x12_clkdiv_dmc0[i][3] <<
- EXYNOS4_CLKDIV_DMC0_DMC_SHIFT) |
- (exynos4x12_clkdiv_dmc0[i][4] <<
- EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT) |
- (exynos4x12_clkdiv_dmc0[i][5] <<
- EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT));
-
- data->dmc_divtable[i] = tmp;
- }
-
- tmp = 0; /* Max voltages for the reliability of the unknown */
-
- if (tmp > 8)
- tmp = 0;
- pr_debug("ASV Group of Exynos4x12 is %d\n", tmp);
-
- for (i = 0; i < EX4x12_LV_NUM; i++) {
- exynos4x12_mifclk_table[i].volt =
- exynos4x12_mif_step_50[tmp][i];
- exynos4x12_intclk_table[i].volt =
- exynos4x12_int_volt[tmp][i];
- }
-
- for (i = 0; i < EX4x12_LV_NUM; i++) {
- ret = dev_pm_opp_add(data->dev, exynos4x12_mifclk_table[i].clk,
- exynos4x12_mifclk_table[i].volt);
- if (ret) {
- dev_err(data->dev, "Fail to add opp entries.\n");
- return ret;
- }
- }
-
- return 0;
-}
-
-static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct busfreq_data *data = container_of(this, struct busfreq_data,
- pm_notifier);
- struct dev_pm_opp *opp;
- struct busfreq_opp_info new_oppinfo;
- unsigned long maxfreq = ULONG_MAX;
- int err = 0;
-
- switch (event) {
- case PM_SUSPEND_PREPARE:
- /* Set Fastest and Deactivate DVFS */
- mutex_lock(&data->lock);
-
- data->disabled = true;
-
- rcu_read_lock();
- opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- dev_err(data->dev, "%s: unable to find a min freq\n",
- __func__);
- mutex_unlock(&data->lock);
- return PTR_ERR(opp);
- }
- new_oppinfo.rate = dev_pm_opp_get_freq(opp);
- new_oppinfo.volt = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
-
- err = exynos4_bus_setvolt(data, &new_oppinfo,
- &data->curr_oppinfo);
- if (err)
- goto unlock;
-
- switch (data->type) {
- case TYPE_BUSF_EXYNOS4210:
- err = exynos4210_set_busclk(data, &new_oppinfo);
- break;
- case TYPE_BUSF_EXYNOS4x12:
- err = exynos4x12_set_busclk(data, &new_oppinfo);
- break;
- default:
- err = -EINVAL;
- }
- if (err)
- goto unlock;
-
- data->curr_oppinfo = new_oppinfo;
-unlock:
- mutex_unlock(&data->lock);
- if (err)
- return err;
- return NOTIFY_OK;
- case PM_POST_RESTORE:
- case PM_POST_SUSPEND:
- /* Reactivate */
- mutex_lock(&data->lock);
- data->disabled = false;
- mutex_unlock(&data->lock);
- return NOTIFY_OK;
- }
-
- return NOTIFY_DONE;
-}
-
-static int exynos4_busfreq_probe(struct platform_device *pdev)
-{
- struct busfreq_data *data;
- struct busfreq_ppmu_data *ppmu_data;
- struct dev_pm_opp *opp;
- struct device *dev = &pdev->dev;
- int err = 0;
-
- data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data), GFP_KERNEL);
- if (data == NULL) {
- dev_err(dev, "Cannot allocate memory.\n");
- return -ENOMEM;
- }
-
- ppmu_data = &data->ppmu_data;
- ppmu_data->ppmu_end = PPMU_END;
- ppmu_data->ppmu = devm_kzalloc(dev,
- sizeof(struct exynos_ppmu) * PPMU_END,
- GFP_KERNEL);
- if (!ppmu_data->ppmu) {
- dev_err(dev, "Failed to allocate memory for exynos_ppmu\n");
- return -ENOMEM;
- }
-
- data->type = pdev->id_entry->driver_data;
- ppmu_data->ppmu[PPMU_DMC0].hw_base = S5P_VA_DMC0;
- ppmu_data->ppmu[PPMU_DMC1].hw_base = S5P_VA_DMC1;
- data->pm_notifier.notifier_call = exynos4_busfreq_pm_notifier_event;
- data->dev = dev;
- mutex_init(&data->lock);
-
- switch (data->type) {
- case TYPE_BUSF_EXYNOS4210:
- err = exynos4210_init_tables(data);
- break;
- case TYPE_BUSF_EXYNOS4x12:
- err = exynos4x12_init_tables(data);
- break;
- default:
- dev_err(dev, "Cannot determine the device id %d\n", data->type);
- err = -EINVAL;
- }
- if (err) {
- dev_err(dev, "Cannot initialize busfreq table %d\n",
- data->type);
- return err;
- }
-
- data->vdd_int = devm_regulator_get(dev, "vdd_int");
- if (IS_ERR(data->vdd_int)) {
- dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
- return PTR_ERR(data->vdd_int);
- }
- if (data->type == TYPE_BUSF_EXYNOS4x12) {
- data->vdd_mif = devm_regulator_get(dev, "vdd_mif");
- if (IS_ERR(data->vdd_mif)) {
- dev_err(dev, "Cannot get the regulator \"vdd_mif\"\n");
- return PTR_ERR(data->vdd_mif);
- }
- }
-
- rcu_read_lock();
- opp = dev_pm_opp_find_freq_floor(dev,
- &exynos4_devfreq_profile.initial_freq);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- dev_err(dev, "Invalid initial frequency %lu kHz.\n",
- exynos4_devfreq_profile.initial_freq);
- return PTR_ERR(opp);
- }
- data->curr_oppinfo.rate = dev_pm_opp_get_freq(opp);
- data->curr_oppinfo.volt = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
-
- platform_set_drvdata(pdev, data);
-
- data->devfreq = devm_devfreq_add_device(dev, &exynos4_devfreq_profile,
- "simple_ondemand", NULL);
- if (IS_ERR(data->devfreq))
- return PTR_ERR(data->devfreq);
-
- /*
- * Start PPMU (Performance Profiling Monitoring Unit) to check
- * utilization of each IP in the Exynos4 SoC.
- */
- busfreq_mon_reset(ppmu_data);
-
- /* Register opp_notifier for Exynos4 busfreq */
- err = devm_devfreq_register_opp_notifier(dev, data->devfreq);
- if (err < 0) {
- dev_err(dev, "Failed to register opp notifier\n");
- return err;
- }
-
- /* Register pm_notifier for Exynos4 busfreq */
- err = register_pm_notifier(&data->pm_notifier);
- if (err) {
- dev_err(dev, "Failed to setup pm notifier\n");
- return err;
- }
-
- return 0;
-}
-
-static int exynos4_busfreq_remove(struct platform_device *pdev)
-{
- struct busfreq_data *data = platform_get_drvdata(pdev);
-
- /* Unregister all of notifier chain */
- unregister_pm_notifier(&data->pm_notifier);
-
- return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos4_busfreq_resume(struct device *dev)
-{
- struct busfreq_data *data = dev_get_drvdata(dev);
- struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-
- busfreq_mon_reset(ppmu_data);
- return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(exynos4_busfreq_pm_ops, NULL, exynos4_busfreq_resume);
-
-static const struct platform_device_id exynos4_busfreq_id[] = {
- { "exynos4210-busfreq", TYPE_BUSF_EXYNOS4210 },
- { "exynos4412-busfreq", TYPE_BUSF_EXYNOS4x12 },
- { "exynos4212-busfreq", TYPE_BUSF_EXYNOS4x12 },
- { },
-};
-
-static struct platform_driver exynos4_busfreq_driver = {
- .probe = exynos4_busfreq_probe,
- .remove = exynos4_busfreq_remove,
- .id_table = exynos4_busfreq_id,
- .driver = {
- .name = "exynos4-busfreq",
- .pm = &exynos4_busfreq_pm_ops,
- },
-};
-
-static int __init exynos4_busfreq_init(void)
-{
- return platform_driver_register(&exynos4_busfreq_driver);
-}
-late_initcall(exynos4_busfreq_init);
-
-static void __exit exynos4_busfreq_exit(void)
-{
- platform_driver_unregister(&exynos4_busfreq_driver);
-}
-module_exit(exynos4_busfreq_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EXYNOS4 busfreq driver with devfreq framework");
-MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
diff --git a/drivers/devfreq/exynos/exynos4_bus.h b/drivers/devfreq/exynos/exynos4_bus.h
deleted file mode 100644
index 94c73c1..0000000
--- a/drivers/devfreq/exynos/exynos4_bus.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- * http://www.samsung.com/
- *
- * EXYNOS4 BUS header
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __DEVFREQ_EXYNOS4_BUS_H
-#define __DEVFREQ_EXYNOS4_BUS_H __FILE__
-
-#include <mach/map.h>
-
-#define EXYNOS4_CLKDIV_LEFTBUS (S5P_VA_CMU + 0x04500)
-#define EXYNOS4_CLKDIV_STAT_LEFTBUS (S5P_VA_CMU + 0x04600)
-
-#define EXYNOS4_CLKDIV_RIGHTBUS (S5P_VA_CMU + 0x08500)
-#define EXYNOS4_CLKDIV_STAT_RIGHTBUS (S5P_VA_CMU + 0x08600)
-
-#define EXYNOS4_CLKDIV_TOP (S5P_VA_CMU + 0x0C510)
-#define EXYNOS4_CLKDIV_CAM (S5P_VA_CMU + 0x0C520)
-#define EXYNOS4_CLKDIV_MFC (S5P_VA_CMU + 0x0C528)
-
-#define EXYNOS4_CLKDIV_STAT_TOP (S5P_VA_CMU + 0x0C610)
-#define EXYNOS4_CLKDIV_STAT_MFC (S5P_VA_CMU + 0x0C628)
-
-#define EXYNOS4210_CLKGATE_IP_IMAGE (S5P_VA_CMU + 0x0C930)
-#define EXYNOS4212_CLKGATE_IP_IMAGE (S5P_VA_CMU + 0x04930)
-
-#define EXYNOS4_CLKDIV_DMC0 (S5P_VA_CMU + 0x10500)
-#define EXYNOS4_CLKDIV_DMC1 (S5P_VA_CMU + 0x10504)
-#define EXYNOS4_CLKDIV_STAT_DMC0 (S5P_VA_CMU + 0x10600)
-#define EXYNOS4_CLKDIV_STAT_DMC1 (S5P_VA_CMU + 0x10604)
-
-#define EXYNOS4_DMC_PAUSE_CTRL (S5P_VA_CMU + 0x11094)
-#define EXYNOS4_DMC_PAUSE_ENABLE (1 << 0)
-
-#define EXYNOS4_CLKDIV_DMC0_ACP_SHIFT (0)
-#define EXYNOS4_CLKDIV_DMC0_ACP_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_ACP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT (4)
-#define EXYNOS4_CLKDIV_DMC0_ACPPCLK_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_ACPPCLK_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT (8)
-#define EXYNOS4_CLKDIV_DMC0_DPHY_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_DPHY_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMC_SHIFT (12)
-#define EXYNOS4_CLKDIV_DMC0_DMC_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_DMC_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT (16)
-#define EXYNOS4_CLKDIV_DMC0_DMCD_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_DMCD_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT (20)
-#define EXYNOS4_CLKDIV_DMC0_DMCP_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_DMCP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT (24)
-#define EXYNOS4_CLKDIV_DMC0_COPY2_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_COPY2_SHIFT)
-#define EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT (28)
-#define EXYNOS4_CLKDIV_DMC0_CORETI_MASK (0x7 << EXYNOS4_CLKDIV_DMC0_CORETI_SHIFT)
-
-#define EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT (0)
-#define EXYNOS4_CLKDIV_DMC1_G2D_ACP_MASK (0xf << EXYNOS4_CLKDIV_DMC1_G2D_ACP_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_C2C_SHIFT (4)
-#define EXYNOS4_CLKDIV_DMC1_C2C_MASK (0x7 << EXYNOS4_CLKDIV_DMC1_C2C_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_PWI_SHIFT (8)
-#define EXYNOS4_CLKDIV_DMC1_PWI_MASK (0xf << EXYNOS4_CLKDIV_DMC1_PWI_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT (12)
-#define EXYNOS4_CLKDIV_DMC1_C2CACLK_MASK (0x7 << EXYNOS4_CLKDIV_DMC1_C2CACLK_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_DVSEM_SHIFT (16)
-#define EXYNOS4_CLKDIV_DMC1_DVSEM_MASK (0x7f << EXYNOS4_CLKDIV_DMC1_DVSEM_SHIFT)
-#define EXYNOS4_CLKDIV_DMC1_DPM_SHIFT (24)
-#define EXYNOS4_CLKDIV_DMC1_DPM_MASK (0x7f << EXYNOS4_CLKDIV_DMC1_DPM_SHIFT)
-
-#define EXYNOS4_CLKDIV_MFC_SHIFT (0)
-#define EXYNOS4_CLKDIV_MFC_MASK (0x7 << EXYNOS4_CLKDIV_MFC_SHIFT)
-
-#define EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT (0)
-#define EXYNOS4_CLKDIV_TOP_ACLK200_MASK (0x7 << EXYNOS4_CLKDIV_TOP_ACLK200_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT (4)
-#define EXYNOS4_CLKDIV_TOP_ACLK100_MASK (0xF << EXYNOS4_CLKDIV_TOP_ACLK100_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT (8)
-#define EXYNOS4_CLKDIV_TOP_ACLK160_MASK (0x7 << EXYNOS4_CLKDIV_TOP_ACLK160_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT (12)
-#define EXYNOS4_CLKDIV_TOP_ACLK133_MASK (0x7 << EXYNOS4_CLKDIV_TOP_ACLK133_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT (16)
-#define EXYNOS4_CLKDIV_TOP_ONENAND_MASK (0x7 << EXYNOS4_CLKDIV_TOP_ONENAND_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT (20)
-#define EXYNOS4_CLKDIV_TOP_ACLK266_GPS_MASK (0x7 << EXYNOS4_CLKDIV_TOP_ACLK266_GPS_SHIFT)
-#define EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_SHIFT (24)
-#define EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_MASK (0x7 << EXYNOS4_CLKDIV_TOP_ACLK400_MCUISP_SHIFT)
-
-#define EXYNOS4_CLKDIV_BUS_GDLR_SHIFT (0)
-#define EXYNOS4_CLKDIV_BUS_GDLR_MASK (0x7 << EXYNOS4_CLKDIV_BUS_GDLR_SHIFT)
-#define EXYNOS4_CLKDIV_BUS_GPLR_SHIFT (4)
-#define EXYNOS4_CLKDIV_BUS_GPLR_MASK (0x7 << EXYNOS4_CLKDIV_BUS_GPLR_SHIFT)
-
-#define EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT (0)
-#define EXYNOS4_CLKDIV_CAM_FIMC0_MASK (0xf << EXYNOS4_CLKDIV_CAM_FIMC0_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT (4)
-#define EXYNOS4_CLKDIV_CAM_FIMC1_MASK (0xf << EXYNOS4_CLKDIV_CAM_FIMC1_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT (8)
-#define EXYNOS4_CLKDIV_CAM_FIMC2_MASK (0xf << EXYNOS4_CLKDIV_CAM_FIMC2_SHIFT)
-#define EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT (12)
-#define EXYNOS4_CLKDIV_CAM_FIMC3_MASK (0xf << EXYNOS4_CLKDIV_CAM_FIMC3_SHIFT)
-
-#define EXYNOS4_CLKDIV_CAM1 (S5P_VA_CMU + 0x0C568)
-
-#define EXYNOS4_CLKDIV_STAT_CAM1 (S5P_VA_CMU + 0x0C668)
-
-#define EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT (0)
-#define EXYNOS4_CLKDIV_CAM1_JPEG_MASK (0xf << EXYNOS4_CLKDIV_CAM1_JPEG_SHIFT)
-
-#endif /* __DEVFREQ_EXYNOS4_BUS_H */
diff --git a/drivers/devfreq/exynos/exynos5_bus.c b/drivers/devfreq/exynos/exynos5_bus.c
deleted file mode 100644
index 297ea30..0000000
--- a/drivers/devfreq/exynos/exynos5_bus.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- * http://www.samsung.com/
- *
- * EXYNOS5 INT clock frequency scaling support using DEVFREQ framework
- * Based on work done by Jonghwan Choi <jhbird.choi@samsung.com>
- * Support for only EXYNOS5250 is present.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/devfreq.h>
-#include <linux/io.h>
-#include <linux/pm_opp.h>
-#include <linux/slab.h>
-#include <linux/suspend.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/pm_qos.h>
-#include <linux/regulator/consumer.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-
-#include "exynos_ppmu.h"
-
-#define MAX_SAFEVOLT 1100000 /* 1.10V */
-/* Assume that the bus is saturated if the utilization is 25% */
-#define INT_BUS_SATURATION_RATIO 25
-
-enum int_level_idx {
- LV_0,
- LV_1,
- LV_2,
- LV_3,
- LV_4,
- _LV_END
-};
-
-enum exynos_ppmu_list {
- PPMU_RIGHT,
- PPMU_END,
-};
-
-struct busfreq_data_int {
- struct device *dev;
- struct devfreq *devfreq;
- struct regulator *vdd_int;
- struct busfreq_ppmu_data ppmu_data;
- unsigned long curr_freq;
- bool disabled;
-
- struct notifier_block pm_notifier;
- struct mutex lock;
- struct pm_qos_request int_req;
- struct clk *int_clk;
-};
-
-struct int_bus_opp_table {
- unsigned int idx;
- unsigned long clk;
- unsigned long volt;
-};
-
-static struct int_bus_opp_table exynos5_int_opp_table[] = {
- {LV_0, 266000, 1025000},
- {LV_1, 200000, 1025000},
- {LV_2, 160000, 1025000},
- {LV_3, 133000, 1025000},
- {LV_4, 100000, 1025000},
- {0, 0, 0},
-};
-
-static int exynos5_int_setvolt(struct busfreq_data_int *data,
- unsigned long volt)
-{
- return regulator_set_voltage(data->vdd_int, volt, MAX_SAFEVOLT);
-}
-
-static int exynos5_busfreq_int_target(struct device *dev, unsigned long *_freq,
- u32 flags)
-{
- int err = 0;
- struct platform_device *pdev = container_of(dev, struct platform_device,
- dev);
- struct busfreq_data_int *data = platform_get_drvdata(pdev);
- struct dev_pm_opp *opp;
- unsigned long old_freq, freq;
- unsigned long volt;
-
- rcu_read_lock();
- opp = devfreq_recommended_opp(dev, _freq, flags);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- dev_err(dev, "%s: Invalid OPP.\n", __func__);
- return PTR_ERR(opp);
- }
-
- freq = dev_pm_opp_get_freq(opp);
- volt = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
-
- old_freq = data->curr_freq;
-
- if (old_freq == freq)
- return 0;
-
- dev_dbg(dev, "targeting %lukHz %luuV\n", freq, volt);
-
- mutex_lock(&data->lock);
-
- if (data->disabled)
- goto out;
-
- if (freq > exynos5_int_opp_table[0].clk)
- pm_qos_update_request(&data->int_req, freq * 16 / 1000);
- else
- pm_qos_update_request(&data->int_req, -1);
-
- if (old_freq < freq)
- err = exynos5_int_setvolt(data, volt);
- if (err)
- goto out;
-
- err = clk_set_rate(data->int_clk, freq * 1000);
-
- if (err)
- goto out;
-
- if (old_freq > freq)
- err = exynos5_int_setvolt(data, volt);
- if (err)
- goto out;
-
- data->curr_freq = freq;
-out:
- mutex_unlock(&data->lock);
- return err;
-}
-
-static int exynos5_int_get_dev_status(struct device *dev,
- struct devfreq_dev_status *stat)
-{
- struct platform_device *pdev = container_of(dev, struct platform_device,
- dev);
- struct busfreq_data_int *data = platform_get_drvdata(pdev);
- struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
- int busier_dmc;
-
- exynos_read_ppmu(ppmu_data);
- busier_dmc = exynos_get_busier_ppmu(ppmu_data);
-
- stat->current_frequency = data->curr_freq;
-
- /* Number of cycles spent on memory access */
- stat->busy_time = ppmu_data->ppmu[busier_dmc].count[PPMU_PMNCNT3];
- stat->busy_time *= 100 / INT_BUS_SATURATION_RATIO;
- stat->total_time = ppmu_data->ppmu[busier_dmc].ccnt;
-
- return 0;
-}
-
-static struct devfreq_dev_profile exynos5_devfreq_int_profile = {
- .initial_freq = 160000,
- .polling_ms = 100,
- .target = exynos5_busfreq_int_target,
- .get_dev_status = exynos5_int_get_dev_status,
-};
-
-static int exynos5250_init_int_tables(struct busfreq_data_int *data)
-{
- int i, err = 0;
-
- for (i = LV_0; i < _LV_END; i++) {
- err = dev_pm_opp_add(data->dev, exynos5_int_opp_table[i].clk,
- exynos5_int_opp_table[i].volt);
- if (err) {
- dev_err(data->dev, "Cannot add opp entries.\n");
- return err;
- }
- }
-
- return 0;
-}
-
-static int exynos5_busfreq_int_pm_notifier_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct busfreq_data_int *data = container_of(this,
- struct busfreq_data_int, pm_notifier);
- struct dev_pm_opp *opp;
- unsigned long maxfreq = ULONG_MAX;
- unsigned long freq;
- unsigned long volt;
- int err = 0;
-
- switch (event) {
- case PM_SUSPEND_PREPARE:
- /* Set Fastest and Deactivate DVFS */
- mutex_lock(&data->lock);
-
- data->disabled = true;
-
- rcu_read_lock();
- opp = dev_pm_opp_find_freq_floor(data->dev, &maxfreq);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- err = PTR_ERR(opp);
- goto unlock;
- }
- freq = dev_pm_opp_get_freq(opp);
- volt = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
-
- err = exynos5_int_setvolt(data, volt);
- if (err)
- goto unlock;
-
- err = clk_set_rate(data->int_clk, freq * 1000);
-
- if (err)
- goto unlock;
-
- data->curr_freq = freq;
-unlock:
- mutex_unlock(&data->lock);
- if (err)
- return NOTIFY_BAD;
- return NOTIFY_OK;
- case PM_POST_RESTORE:
- case PM_POST_SUSPEND:
- /* Reactivate */
- mutex_lock(&data->lock);
- data->disabled = false;
- mutex_unlock(&data->lock);
- return NOTIFY_OK;
- }
-
- return NOTIFY_DONE;
-}
-
-static int exynos5_busfreq_int_probe(struct platform_device *pdev)
-{
- struct busfreq_data_int *data;
- struct busfreq_ppmu_data *ppmu_data;
- struct dev_pm_opp *opp;
- struct device *dev = &pdev->dev;
- struct device_node *np;
- unsigned long initial_freq;
- unsigned long initial_volt;
- int err = 0;
- int i;
-
- data = devm_kzalloc(&pdev->dev, sizeof(struct busfreq_data_int),
- GFP_KERNEL);
- if (data == NULL) {
- dev_err(dev, "Cannot allocate memory.\n");
- return -ENOMEM;
- }
-
- ppmu_data = &data->ppmu_data;
- ppmu_data->ppmu_end = PPMU_END;
- ppmu_data->ppmu = devm_kzalloc(dev,
- sizeof(struct exynos_ppmu) * PPMU_END,
- GFP_KERNEL);
- if (!ppmu_data->ppmu) {
- dev_err(dev, "Failed to allocate memory for exynos_ppmu\n");
- return -ENOMEM;
- }
-
- np = of_find_compatible_node(NULL, NULL, "samsung,exynos5250-ppmu");
- if (np == NULL) {
- pr_err("Unable to find PPMU node\n");
- return -ENOENT;
- }
-
- for (i = 0; i < ppmu_data->ppmu_end; i++) {
- /* map PPMU memory region */
- ppmu_data->ppmu[i].hw_base = of_iomap(np, i);
- if (ppmu_data->ppmu[i].hw_base == NULL) {
- dev_err(&pdev->dev, "failed to map memory region\n");
- return -ENOMEM;
- }
- }
- data->pm_notifier.notifier_call = exynos5_busfreq_int_pm_notifier_event;
- data->dev = dev;
- mutex_init(&data->lock);
-
- err = exynos5250_init_int_tables(data);
- if (err)
- return err;
-
- data->vdd_int = devm_regulator_get(dev, "vdd_int");
- if (IS_ERR(data->vdd_int)) {
- dev_err(dev, "Cannot get the regulator \"vdd_int\"\n");
- return PTR_ERR(data->vdd_int);
- }
-
- data->int_clk = devm_clk_get(dev, "int_clk");
- if (IS_ERR(data->int_clk)) {
- dev_err(dev, "Cannot get clock \"int_clk\"\n");
- return PTR_ERR(data->int_clk);
- }
-
- rcu_read_lock();
- opp = dev_pm_opp_find_freq_floor(dev,
- &exynos5_devfreq_int_profile.initial_freq);
- if (IS_ERR(opp)) {
- rcu_read_unlock();
- dev_err(dev, "Invalid initial frequency %lu kHz.\n",
- exynos5_devfreq_int_profile.initial_freq);
- return PTR_ERR(opp);
- }
- initial_freq = dev_pm_opp_get_freq(opp);
- initial_volt = dev_pm_opp_get_voltage(opp);
- rcu_read_unlock();
- data->curr_freq = initial_freq;
-
- err = clk_set_rate(data->int_clk, initial_freq * 1000);
- if (err) {
- dev_err(dev, "Failed to set initial frequency\n");
- return err;
- }
-
- err = exynos5_int_setvolt(data, initial_volt);
- if (err)
- return err;
-
- platform_set_drvdata(pdev, data);
-
- busfreq_mon_reset(ppmu_data);
-
- data->devfreq = devm_devfreq_add_device(dev, &exynos5_devfreq_int_profile,
- "simple_ondemand", NULL);
- if (IS_ERR(data->devfreq))
- return PTR_ERR(data->devfreq);
-
- err = devm_devfreq_register_opp_notifier(dev, data->devfreq);
- if (err < 0) {
- dev_err(dev, "Failed to register opp notifier\n");
- return err;
- }
-
- err = register_pm_notifier(&data->pm_notifier);
- if (err) {
- dev_err(dev, "Failed to setup pm notifier\n");
- return err;
- }
-
- /* TODO: Add a new QOS class for int/mif bus */
- pm_qos_add_request(&data->int_req, PM_QOS_NETWORK_THROUGHPUT, -1);
-
- return 0;
-}
-
-static int exynos5_busfreq_int_remove(struct platform_device *pdev)
-{
- struct busfreq_data_int *data = platform_get_drvdata(pdev);
-
- pm_qos_remove_request(&data->int_req);
- unregister_pm_notifier(&data->pm_notifier);
-
- return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos5_busfreq_int_resume(struct device *dev)
-{
- struct platform_device *pdev = container_of(dev, struct platform_device,
- dev);
- struct busfreq_data_int *data = platform_get_drvdata(pdev);
- struct busfreq_ppmu_data *ppmu_data = &data->ppmu_data;
-
- busfreq_mon_reset(ppmu_data);
- return 0;
-}
-static const struct dev_pm_ops exynos5_busfreq_int_pm = {
- .resume = exynos5_busfreq_int_resume,
-};
-#endif
-static SIMPLE_DEV_PM_OPS(exynos5_busfreq_int_pm_ops, NULL,
- exynos5_busfreq_int_resume);
-
-/* platform device pointer for exynos5 devfreq device. */
-static struct platform_device *exynos5_devfreq_pdev;
-
-static struct platform_driver exynos5_busfreq_int_driver = {
- .probe = exynos5_busfreq_int_probe,
- .remove = exynos5_busfreq_int_remove,
- .driver = {
- .name = "exynos5-bus-int",
- .pm = &exynos5_busfreq_int_pm_ops,
- },
-};
-
-static int __init exynos5_busfreq_int_init(void)
-{
- int ret;
-
- ret = platform_driver_register(&exynos5_busfreq_int_driver);
- if (ret < 0)
- goto out;
-
- exynos5_devfreq_pdev =
- platform_device_register_simple("exynos5-bus-int", -1, NULL, 0);
- if (IS_ERR(exynos5_devfreq_pdev)) {
- ret = PTR_ERR(exynos5_devfreq_pdev);
- goto out1;
- }
-
- return 0;
-out1:
- platform_driver_unregister(&exynos5_busfreq_int_driver);
-out:
- return ret;
-}
-late_initcall(exynos5_busfreq_int_init);
-
-static void __exit exynos5_busfreq_int_exit(void)
-{
- platform_device_unregister(exynos5_devfreq_pdev);
- platform_driver_unregister(&exynos5_busfreq_int_driver);
-}
-module_exit(exynos5_busfreq_int_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("EXYNOS5 busfreq driver with devfreq framework");
diff --git a/drivers/devfreq/exynos/exynos_ppmu.c b/drivers/devfreq/exynos/exynos_ppmu.c
deleted file mode 100644
index 97b75e5..0000000
--- a/drivers/devfreq/exynos/exynos_ppmu.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- * http://www.samsung.com/
- *
- * EXYNOS - PPMU support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/io.h>
-
-#include "exynos_ppmu.h"
-
-void exynos_ppmu_reset(void __iomem *ppmu_base)
-{
- __raw_writel(PPMU_CYCLE_RESET | PPMU_COUNTER_RESET, ppmu_base);
- __raw_writel(PPMU_ENABLE_CYCLE |
- PPMU_ENABLE_COUNT0 |
- PPMU_ENABLE_COUNT1 |
- PPMU_ENABLE_COUNT2 |
- PPMU_ENABLE_COUNT3,
- ppmu_base + PPMU_CNTENS);
-}
-
-void exynos_ppmu_setevent(void __iomem *ppmu_base, unsigned int ch,
- unsigned int evt)
-{
- __raw_writel(evt, ppmu_base + PPMU_BEVTSEL(ch));
-}
-
-void exynos_ppmu_start(void __iomem *ppmu_base)
-{
- __raw_writel(PPMU_ENABLE, ppmu_base);
-}
-
-void exynos_ppmu_stop(void __iomem *ppmu_base)
-{
- __raw_writel(PPMU_DISABLE, ppmu_base);
-}
-
-unsigned int exynos_ppmu_read(void __iomem *ppmu_base, unsigned int ch)
-{
- unsigned int total;
-
- if (ch == PPMU_PMNCNT3)
- total = ((__raw_readl(ppmu_base + PMCNT_OFFSET(ch)) << 8) |
- __raw_readl(ppmu_base + PMCNT_OFFSET(ch + 1)));
- else
- total = __raw_readl(ppmu_base + PMCNT_OFFSET(ch));
-
- return total;
-}
-
-void busfreq_mon_reset(struct busfreq_ppmu_data *ppmu_data)
-{
- unsigned int i;
-
- for (i = 0; i < ppmu_data->ppmu_end; i++) {
- void __iomem *ppmu_base = ppmu_data->ppmu[i].hw_base;
-
- /* Reset the performance and cycle counters */
- exynos_ppmu_reset(ppmu_base);
-
- /* Setup count registers to monitor read/write transactions */
- ppmu_data->ppmu[i].event[PPMU_PMNCNT3] = RDWR_DATA_COUNT;
- exynos_ppmu_setevent(ppmu_base, PPMU_PMNCNT3,
- ppmu_data->ppmu[i].event[PPMU_PMNCNT3]);
-
- exynos_ppmu_start(ppmu_base);
- }
-}
-EXPORT_SYMBOL(busfreq_mon_reset);
-
-void exynos_read_ppmu(struct busfreq_ppmu_data *ppmu_data)
-{
- int i, j;
-
- for (i = 0; i < ppmu_data->ppmu_end; i++) {
- void __iomem *ppmu_base = ppmu_data->ppmu[i].hw_base;
-
- exynos_ppmu_stop(ppmu_base);
-
- /* Update local data from PPMU */
- ppmu_data->ppmu[i].ccnt = __raw_readl(ppmu_base + PPMU_CCNT);
-
- for (j = PPMU_PMNCNT0; j < PPMU_PMNCNT_MAX; j++) {
- if (ppmu_data->ppmu[i].event[j] == 0)
- ppmu_data->ppmu[i].count[j] = 0;
- else
- ppmu_data->ppmu[i].count[j] =
- exynos_ppmu_read(ppmu_base, j);
- }
- }
-
- busfreq_mon_reset(ppmu_data);
-}
-EXPORT_SYMBOL(exynos_read_ppmu);
-
-int exynos_get_busier_ppmu(struct busfreq_ppmu_data *ppmu_data)
-{
- unsigned int count = 0;
- int i, j, busy = 0;
-
- for (i = 0; i < ppmu_data->ppmu_end; i++) {
- for (j = PPMU_PMNCNT0; j < PPMU_PMNCNT_MAX; j++) {
- if (ppmu_data->ppmu[i].count[j] > count) {
- count = ppmu_data->ppmu[i].count[j];
- busy = i;
- }
- }
- }
-
- return busy;
-}
-EXPORT_SYMBOL(exynos_get_busier_ppmu);
diff --git a/drivers/devfreq/exynos/exynos_ppmu.h b/drivers/devfreq/exynos/exynos_ppmu.h
deleted file mode 100644
index 71f17ba..0000000
--- a/drivers/devfreq/exynos/exynos_ppmu.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- * http://www.samsung.com/
- *
- * EXYNOS PPMU header
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
-
-#ifndef __DEVFREQ_EXYNOS_PPMU_H
-#define __DEVFREQ_EXYNOS_PPMU_H __FILE__
-
-#include <linux/ktime.h>
-
-/* For PPMU Control */
-#define PPMU_ENABLE BIT(0)
-#define PPMU_DISABLE 0x0
-#define PPMU_CYCLE_RESET BIT(1)
-#define PPMU_COUNTER_RESET BIT(2)
-
-#define PPMU_ENABLE_COUNT0 BIT(0)
-#define PPMU_ENABLE_COUNT1 BIT(1)
-#define PPMU_ENABLE_COUNT2 BIT(2)
-#define PPMU_ENABLE_COUNT3 BIT(3)
-#define PPMU_ENABLE_CYCLE BIT(31)
-
-#define PPMU_CNTENS 0x10
-#define PPMU_FLAG 0x50
-#define PPMU_CCNT_OVERFLOW BIT(31)
-#define PPMU_CCNT 0x100
-
-#define PPMU_PMCNT0 0x110
-#define PPMU_PMCNT_OFFSET 0x10
-#define PMCNT_OFFSET(x) (PPMU_PMCNT0 + (PPMU_PMCNT_OFFSET * x))
-
-#define PPMU_BEVT0SEL 0x1000
-#define PPMU_BEVTSEL_OFFSET 0x100
-#define PPMU_BEVTSEL(x) (PPMU_BEVT0SEL + (ch * PPMU_BEVTSEL_OFFSET))
-
-/* For Event Selection */
-#define RD_DATA_COUNT 0x5
-#define WR_DATA_COUNT 0x6
-#define RDWR_DATA_COUNT 0x7
-
-enum ppmu_counter {
- PPMU_PMNCNT0,
- PPMU_PMCCNT1,
- PPMU_PMNCNT2,
- PPMU_PMNCNT3,
- PPMU_PMNCNT_MAX,
-};
-
-struct bus_opp_table {
- unsigned int idx;
- unsigned long clk;
- unsigned long volt;
-};
-
-struct exynos_ppmu {
- void __iomem *hw_base;
- unsigned int ccnt;
- unsigned int event[PPMU_PMNCNT_MAX];
- unsigned int count[PPMU_PMNCNT_MAX];
- unsigned long long ns;
- ktime_t reset_time;
- bool ccnt_overflow;
- bool count_overflow[PPMU_PMNCNT_MAX];
-};
-
-struct busfreq_ppmu_data {
- struct exynos_ppmu *ppmu;
- int ppmu_end;
-};
-
-void exynos_ppmu_reset(void __iomem *ppmu_base);
-void exynos_ppmu_setevent(void __iomem *ppmu_base, unsigned int ch,
- unsigned int evt);
-void exynos_ppmu_start(void __iomem *ppmu_base);
-void exynos_ppmu_stop(void __iomem *ppmu_base);
-unsigned int exynos_ppmu_read(void __iomem *ppmu_base, unsigned int ch);
-void busfreq_mon_reset(struct busfreq_ppmu_data *ppmu_data);
-void exynos_read_ppmu(struct busfreq_ppmu_data *ppmu_data);
-int exynos_get_busier_ppmu(struct busfreq_ppmu_data *ppmu_data);
-#endif /* __DEVFREQ_EXYNOS_PPMU_H */
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
new file mode 100644
index 0000000..9ef46e2
--- /dev/null
+++ b/drivers/devfreq/governor_passive.c
@@ -0,0 +1,205 @@
+/*
+ * linux/drivers/devfreq/governor_passive.c
+ *
+ * Copyright (C) 2016 Samsung Electronics
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ * Author: MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/devfreq.h>
+#include "governor.h"
+
+static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
+ unsigned long *freq)
+{
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent;
+ unsigned long child_freq = ULONG_MAX;
+ struct dev_pm_opp *opp;
+ int i, count, ret = 0;
+
+ /*
+ * If the devfreq device with passive governor has the specific method
+ * to determine the next frequency, should use the get_target_freq()
+ * of struct devfreq_passive_data.
+ */
+ if (p_data->get_target_freq) {
+ ret = p_data->get_target_freq(devfreq, freq);
+ goto out;
+ }
+
+ /*
+ * If the parent and passive devfreq device uses the OPP table,
+ * get the next frequency by using the OPP table.
+ */
+
+ /*
+ * - parent devfreq device uses the governors except for passive.
+ * - passive devfreq device uses the passive governor.
+ *
+ * Each devfreq has the OPP table. After deciding the new frequency
+ * from the governor of parent devfreq device, the passive governor
+ * need to get the index of new frequency on OPP table of parent
+ * device. And then the index is used for getting the suitable
+ * new frequency for passive devfreq device.
+ */
+ if (!devfreq->profile || !devfreq->profile->freq_table
+ || devfreq->profile->max_state <= 0)
+ return -EINVAL;
+
+ /*
+ * The passive governor have to get the correct frequency from OPP
+ * list of parent device. Because in this case, *freq is temporary
+ * value which is decided by ondemand governor.
+ */
+ rcu_read_lock();
+ opp = devfreq_recommended_opp(parent_devfreq->dev.parent, freq, 0);
+ rcu_read_unlock();
+ if (IS_ERR(opp)) {
+ ret = PTR_ERR(opp);
+ goto out;
+ }
+
+ /*
+ * Get the OPP table's index of decided freqeuncy by governor
+ * of parent device.
+ */
+ for (i = 0; i < parent_devfreq->profile->max_state; i++)
+ if (parent_devfreq->profile->freq_table[i] == *freq)
+ break;
+
+ if (i == parent_devfreq->profile->max_state) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Get the suitable frequency by using index of parent device. */
+ if (i < devfreq->profile->max_state) {
+ child_freq = devfreq->profile->freq_table[i];
+ } else {
+ count = devfreq->profile->max_state;
+ child_freq = devfreq->profile->freq_table[count - 1];
+ }
+
+ /* Return the suitable frequency for passive device. */
+ *freq = child_freq;
+
+out:
+ return ret;
+}
+
+static int update_devfreq_passive(struct devfreq *devfreq, unsigned long freq)
+{
+ int ret;
+
+ if (!devfreq->governor)
+ return -EINVAL;
+
+ mutex_lock_nested(&devfreq->lock, SINGLE_DEPTH_NESTING);
+
+ ret = devfreq->governor->get_target_freq(devfreq, &freq);
+ if (ret < 0)
+ goto out;
+
+ ret = devfreq->profile->target(devfreq->dev.parent, &freq, 0);
+ if (ret < 0)
+ goto out;
+
+ devfreq->previous_freq = freq;
+
+out:
+ mutex_unlock(&devfreq->lock);
+
+ return 0;
+}
+
+static int devfreq_passive_notifier_call(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct devfreq_passive_data *data
+ = container_of(nb, struct devfreq_passive_data, nb);
+ struct devfreq *devfreq = (struct devfreq *)data->this;
+ struct devfreq *parent = (struct devfreq *)data->parent;
+ struct devfreq_freqs *freqs = (struct devfreq_freqs *)ptr;
+ unsigned long freq = freqs->new;
+
+ switch (event) {
+ case DEVFREQ_PRECHANGE:
+ if (parent->previous_freq > freq)
+ update_devfreq_passive(devfreq, freq);
+ break;
+ case DEVFREQ_POSTCHANGE:
+ if (parent->previous_freq < freq)
+ update_devfreq_passive(devfreq, freq);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int devfreq_passive_event_handler(struct devfreq *devfreq,
+ unsigned int event, void *data)
+{
+ struct device *dev = devfreq->dev.parent;
+ struct devfreq_passive_data *p_data
+ = (struct devfreq_passive_data *)devfreq->data;
+ struct devfreq *parent = (struct devfreq *)p_data->parent;
+ struct notifier_block *nb = &p_data->nb;
+ int ret = 0;
+
+ if (!parent)
+ return -EPROBE_DEFER;
+
+ switch (event) {
+ case DEVFREQ_GOV_START:
+ if (!p_data->this)
+ p_data->this = devfreq;
+
+ nb->notifier_call = devfreq_passive_notifier_call;
+ ret = devm_devfreq_register_notifier(dev, parent, nb,
+ DEVFREQ_TRANSITION_NOTIFIER);
+ break;
+ case DEVFREQ_GOV_STOP:
+ devm_devfreq_unregister_notifier(dev, parent, nb,
+ DEVFREQ_TRANSITION_NOTIFIER);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static struct devfreq_governor devfreq_passive = {
+ .name = "passive",
+ .get_target_freq = devfreq_passive_get_target_freq,
+ .event_handler = devfreq_passive_event_handler,
+};
+
+static int __init devfreq_passive_init(void)
+{
+ return devfreq_add_governor(&devfreq_passive);
+}
+subsys_initcall(devfreq_passive_init);
+
+static void __exit devfreq_passive_exit(void)
+{
+ int ret;
+
+ ret = devfreq_remove_governor(&devfreq_passive);
+ if (ret)
+ pr_err("%s: failed remove governor %d\n", __func__, ret);
+}
+module_exit(devfreq_passive_exit);
+
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_DESCRIPTION("DEVFREQ Passive governor");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 37755e6..6ca7474 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -378,12 +378,11 @@
config EDAC_ALTERA_L2C
bool "Altera L2 Cache ECC"
- depends on EDAC_ALTERA=y
- select CACHE_L2X0
+ depends on EDAC_ALTERA=y && CACHE_L2X0
help
Support for error detection and correction on the
Altera L2 cache Memory for Altera SoCs. This option
- requires L2 cache so it will force that selection.
+ requires L2 cache.
config EDAC_ALTERA_OCRAM
bool "Altera On-Chip RAM ECC"
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 63e4209..5b4d223 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -24,6 +24,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
@@ -78,27 +79,6 @@
.ue_set_mask = A10_DIAGINT_TDERRA_MASK,
};
-/************************** EDAC Device Defines **************************/
-
-/* OCRAM ECC Management Group Defines */
-#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
-#define ALTR_OCR_ECC_EN BIT(0)
-#define ALTR_OCR_ECC_INJS BIT(1)
-#define ALTR_OCR_ECC_INJD BIT(2)
-#define ALTR_OCR_ECC_SERR BIT(3)
-#define ALTR_OCR_ECC_DERR BIT(4)
-
-/* L2 ECC Management Group Defines */
-#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
-#define ALTR_L2_ECC_EN BIT(0)
-#define ALTR_L2_ECC_INJS BIT(1)
-#define ALTR_L2_ECC_INJD BIT(2)
-
-#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
-#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
-#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
-#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
-
/*********************** EDAC Memory Controller Functions ****************/
/* The SDRAM controller uses the EDAC Memory Controller framework. */
@@ -252,8 +232,8 @@
}
static const struct of_device_id altr_sdram_ctrl_of_match[] = {
- { .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
- { .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
+ { .compatible = "altr,sdram-edac", .data = &c5_data},
+ { .compatible = "altr,sdram-edac-a10", .data = &a10_data},
{},
};
MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -570,28 +550,8 @@
const struct edac_device_prv_data ocramecc_data;
const struct edac_device_prv_data l2ecc_data;
-
-struct edac_device_prv_data {
- int (*setup)(struct platform_device *pdev, void __iomem *base);
- int ce_clear_mask;
- int ue_clear_mask;
- char dbgfs_name[20];
- void * (*alloc_mem)(size_t size, void **other);
- void (*free_mem)(void *p, size_t size, void *other);
- int ecc_enable_mask;
- int ce_set_mask;
- int ue_set_mask;
- int trig_alloc_sz;
-};
-
-struct altr_edac_device_dev {
- void __iomem *base;
- int sb_irq;
- int db_irq;
- const struct edac_device_prv_data *data;
- struct dentry *debugfs_dir;
- char *edac_dev_name;
-};
+const struct edac_device_prv_data a10_ocramecc_data;
+const struct edac_device_prv_data a10_l2ecc_data;
static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
{
@@ -665,8 +625,9 @@
if (ACCESS_ONCE(ptemp[i]))
result = -1;
/* Toggle Error bit (it is latched), leave ECC enabled */
- writel(error_mask, drvdata->base);
- writel(priv->ecc_enable_mask, drvdata->base);
+ writel(error_mask, (drvdata->base + priv->set_err_ofst));
+ writel(priv->ecc_enable_mask, (drvdata->base +
+ priv->set_err_ofst));
ptemp[i] = i;
}
/* Ensure it has been written out */
@@ -694,6 +655,16 @@
.llseek = generic_file_llseek,
};
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos);
+
+static const struct file_operations altr_edac_a10_device_inject_fops = {
+ .open = simple_open,
+ .write = altr_edac_a10_device_trig,
+ .llseek = generic_file_llseek,
+};
+
static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
const struct edac_device_prv_data *priv)
{
@@ -708,17 +679,18 @@
if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR,
drvdata->debugfs_dir, edac_dci,
- &altr_edac_device_inject_fops))
+ priv->inject_fops))
debugfs_remove_recursive(drvdata->debugfs_dir);
}
static const struct of_device_id altr_edac_device_of_match[] = {
#ifdef CONFIG_EDAC_ALTERA_L2C
- { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data },
+ { .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data },
+ { .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data },
#endif
#ifdef CONFIG_EDAC_ALTERA_OCRAM
- { .compatible = "altr,socfpga-ocram-ecc",
- .data = (void *)&ocramecc_data },
+ { .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data },
+ { .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data },
#endif
{},
};
@@ -789,7 +761,7 @@
/* Check specific dependencies for the module */
if (drvdata->data->setup) {
- res = drvdata->data->setup(pdev, drvdata->base);
+ res = drvdata->data->setup(drvdata);
if (res)
goto fail1;
}
@@ -856,6 +828,25 @@
/*********************** OCRAM EDAC Device Functions *********************/
#ifdef CONFIG_EDAC_ALTERA_OCRAM
+/*
+ * Test for memory's ECC dependencies upon entry because platform specific
+ * startup should have initialized the memory and enabled the ECC.
+ * Can't turn on ECC here because accessing un-initialized memory will
+ * cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+ void __iomem *base = device->base;
+ const struct edac_device_prv_data *prv = device->data;
+
+ if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask)
+ return 0;
+
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: No ECC present or ECC disabled.\n",
+ device->edac_dev_name);
+ return -ENODEV;
+}
static void *ocram_alloc_mem(size_t size, void **other)
{
@@ -891,36 +882,53 @@
gen_pool_free((struct gen_pool *)other, (u32)p, size);
}
-/*
- * altr_ocram_check_deps()
- * Test for OCRAM cache ECC dependencies upon entry because
- * platform specific startup should have initialized the
- * On-Chip RAM memory and enabled the ECC.
- * Can't turn on ECC here because accessing un-initialized
- * memory will cause CE/UE errors possibly causing an ABORT.
- */
-static int altr_ocram_check_deps(struct platform_device *pdev,
- void __iomem *base)
+static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci,
+ bool sberr)
{
- if (readl(base) & ALTR_OCR_ECC_EN)
- return 0;
+ void __iomem *base = dci->base;
- edac_printk(KERN_ERR, EDAC_DEVICE,
- "OCRAM: No ECC present or ECC disabled.\n");
- return -ENODEV;
+ if (sberr) {
+ writel(ALTR_A10_ECC_SERRPENA,
+ base + ALTR_A10_ECC_INTSTAT_OFST);
+ edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ } else {
+ writel(ALTR_A10_ECC_DERRPENA,
+ base + ALTR_A10_ECC_INTSTAT_OFST);
+ edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+ }
+ return IRQ_HANDLED;
}
const struct edac_device_prv_data ocramecc_data = {
- .setup = altr_ocram_check_deps,
+ .setup = altr_check_ecc_deps,
.ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
.ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
.dbgfs_name = "altr_ocram_trigger",
.alloc_mem = ocram_alloc_mem,
.free_mem = ocram_free_mem,
.ecc_enable_mask = ALTR_OCR_ECC_EN,
+ .ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET,
.ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS),
.ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD),
+ .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET,
.trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE,
+ .inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_ocramecc_data = {
+ .setup = altr_check_ecc_deps,
+ .ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+ .ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+ .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM,
+ .dbgfs_name = "altr_ocram_trigger",
+ .ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL,
+ .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+ .ce_set_mask = ALTR_A10_ECC_TSERRA,
+ .ue_set_mask = ALTR_A10_ECC_TDERRA,
+ .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+ .ecc_irq_handler = altr_edac_a10_ecc_irq,
+ .inject_fops = &altr_edac_a10_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_OCRAM */
@@ -966,10 +974,13 @@
* Bail if ECC is not enabled.
* Note that L2 Cache Enable is forced at build time.
*/
-static int altr_l2_check_deps(struct platform_device *pdev,
- void __iomem *base)
+static int altr_l2_check_deps(struct altr_edac_device_dev *device)
{
- if (readl(base) & ALTR_L2_ECC_EN)
+ void __iomem *base = device->base;
+ const struct edac_device_prv_data *prv = device->data;
+
+ if ((readl(base) & prv->ecc_enable_mask) ==
+ prv->ecc_enable_mask)
return 0;
edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -977,6 +988,24 @@
return -ENODEV;
}
+static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci,
+ bool sberr)
+{
+ if (sberr) {
+ regmap_write(dci->edac->ecc_mgr_map,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
+ edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ } else {
+ regmap_write(dci->edac->ecc_mgr_map,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
+ edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+ }
+ return IRQ_HANDLED;
+}
+
const struct edac_device_prv_data l2ecc_data = {
.setup = altr_l2_check_deps,
.ce_clear_mask = 0,
@@ -987,11 +1016,252 @@
.ecc_enable_mask = ALTR_L2_ECC_EN,
.ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS),
.ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD),
+ .set_err_ofst = ALTR_L2_ECC_REG_OFFSET,
.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+ .inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_l2ecc_data = {
+ .setup = altr_l2_check_deps,
+ .ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR,
+ .ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR,
+ .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2,
+ .dbgfs_name = "altr_l2_trigger",
+ .alloc_mem = l2_alloc_mem,
+ .free_mem = l2_free_mem,
+ .ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL,
+ .ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK,
+ .ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK,
+ .set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST,
+ .ecc_irq_handler = altr_edac_a10_l2_irq,
+ .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+ .inject_fops = &altr_edac_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_L2C */
+/********************* Arria10 EDAC Device Functions *************************/
+
+/*
+ * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5
+ * because 2 IRQs are shared among the all ECC peripherals. The ECC
+ * manager manages the IRQs and the children.
+ * Based on xgene_edac.c peripheral code.
+ */
+
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct edac_device_ctl_info *edac_dci = file->private_data;
+ struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+ const struct edac_device_prv_data *priv = drvdata->data;
+ void __iomem *set_addr = (drvdata->base + priv->set_err_ofst);
+ unsigned long flags;
+ u8 trig_type;
+
+ if (!user_buf || get_user(trig_type, user_buf))
+ return -EFAULT;
+
+ local_irq_save(flags);
+ if (trig_type == ALTR_UE_TRIGGER_CHAR)
+ writel(priv->ue_set_mask, set_addr);
+ else
+ writel(priv->ce_set_mask, set_addr);
+ /* Ensure the interrupt test bits are set */
+ wmb();
+ local_irq_restore(flags);
+
+ return count;
+}
+
+static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id)
+{
+ irqreturn_t rc = IRQ_NONE;
+ struct altr_arria10_edac *edac = dev_id;
+ struct altr_edac_device_dev *dci;
+ int irq_status;
+ bool sberr = (irq == edac->sb_irq) ? 1 : 0;
+ int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST :
+ A10_SYSMGR_ECC_INTSTAT_DERR_OFST;
+
+ regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
+
+ if ((irq != edac->sb_irq) && (irq != edac->db_irq)) {
+ WARN_ON(1);
+ } else {
+ list_for_each_entry(dci, &edac->a10_ecc_devices, next) {
+ if (irq_status & dci->data->irq_status_mask)
+ rc = dci->data->ecc_irq_handler(dci, sberr);
+ }
+ }
+
+ return rc;
+}
+
+static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
+ struct device_node *np)
+{
+ struct edac_device_ctl_info *dci;
+ struct altr_edac_device_dev *altdev;
+ char *ecc_name = (char *)np->name;
+ struct resource res;
+ int edac_idx;
+ int rc = 0;
+ const struct edac_device_prv_data *prv;
+ /* Get matching node and check for valid result */
+ const struct of_device_id *pdev_id =
+ of_match_node(altr_edac_device_of_match, np);
+ if (IS_ERR_OR_NULL(pdev_id))
+ return -ENODEV;
+
+ /* Get driver specific data for this EDAC device */
+ prv = pdev_id->data;
+ if (IS_ERR_OR_NULL(prv))
+ return -ENODEV;
+
+ if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
+ return -ENOMEM;
+
+ rc = of_address_to_resource(np, 0, &res);
+ if (rc < 0) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: no resource address\n", ecc_name);
+ goto err_release_group;
+ }
+
+ edac_idx = edac_device_alloc_index();
+ dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name,
+ 1, ecc_name, 1, 0, NULL, 0,
+ edac_idx);
+
+ if (!dci) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: Unable to allocate EDAC device\n", ecc_name);
+ rc = -ENOMEM;
+ goto err_release_group;
+ }
+
+ altdev = dci->pvt_info;
+ dci->dev = edac->dev;
+ altdev->edac_dev_name = ecc_name;
+ altdev->edac_idx = edac_idx;
+ altdev->edac = edac;
+ altdev->edac_dev = dci;
+ altdev->data = prv;
+ altdev->ddev = *edac->dev;
+ dci->dev = &altdev->ddev;
+ dci->ctl_name = "Altera ECC Manager";
+ dci->mod_name = ecc_name;
+ dci->dev_name = ecc_name;
+
+ altdev->base = devm_ioremap_resource(edac->dev, &res);
+ if (IS_ERR(altdev->base)) {
+ rc = PTR_ERR(altdev->base);
+ goto err_release_group1;
+ }
+
+ /* Check specific dependencies for the module */
+ if (altdev->data->setup) {
+ rc = altdev->data->setup(altdev);
+ if (rc)
+ goto err_release_group1;
+ }
+
+ rc = edac_device_add_device(dci);
+ if (rc) {
+ dev_err(edac->dev, "edac_device_add_device failed\n");
+ rc = -ENOMEM;
+ goto err_release_group1;
+ }
+
+ altr_create_edacdev_dbgfs(dci, prv);
+
+ list_add(&altdev->next, &edac->a10_ecc_devices);
+
+ devres_remove_group(edac->dev, altr_edac_a10_device_add);
+
+ return 0;
+
+err_release_group1:
+ edac_device_free_ctl_info(dci);
+err_release_group:
+ edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__);
+ devres_release_group(edac->dev, NULL);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s:Error setting up EDAC device: %d\n", ecc_name, rc);
+
+ return rc;
+}
+
+static int altr_edac_a10_probe(struct platform_device *pdev)
+{
+ struct altr_arria10_edac *edac;
+ struct device_node *child;
+ int rc;
+
+ edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
+ if (!edac)
+ return -ENOMEM;
+
+ edac->dev = &pdev->dev;
+ platform_set_drvdata(pdev, edac);
+ INIT_LIST_HEAD(&edac->a10_ecc_devices);
+
+ edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "altr,sysmgr-syscon");
+ if (IS_ERR(edac->ecc_mgr_map)) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to get syscon altr,sysmgr-syscon\n");
+ return PTR_ERR(edac->ecc_mgr_map);
+ }
+
+ edac->sb_irq = platform_get_irq(pdev, 0);
+ rc = devm_request_irq(&pdev->dev, edac->sb_irq,
+ altr_edac_a10_irq_handler,
+ IRQF_SHARED, dev_name(&pdev->dev), edac);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
+ return rc;
+ }
+
+ edac->db_irq = platform_get_irq(pdev, 1);
+ rc = devm_request_irq(&pdev->dev, edac->db_irq,
+ altr_edac_a10_irq_handler,
+ IRQF_SHARED, dev_name(&pdev->dev), edac);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+ return rc;
+ }
+
+ for_each_child_of_node(pdev->dev.of_node, child) {
+ if (!of_device_is_available(child))
+ continue;
+ if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc"))
+ altr_edac_a10_device_add(edac, child);
+ else if (of_device_is_compatible(child,
+ "altr,socfpga-a10-ocram-ecc"))
+ altr_edac_a10_device_add(edac, child);
+ }
+
+ return 0;
+}
+
+static const struct of_device_id altr_edac_a10_of_match[] = {
+ { .compatible = "altr,socfpga-a10-ecc-manager" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match);
+
+static struct platform_driver altr_edac_a10_driver = {
+ .probe = altr_edac_a10_probe,
+ .driver = {
+ .name = "socfpga_a10_ecc_manager",
+ .of_match_table = altr_edac_a10_of_match,
+ },
+};
+module_platform_driver(altr_edac_a10_driver);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 953077d..42090f3 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -195,4 +195,132 @@
const struct altr_sdram_prv_data *data;
};
+/************************** EDAC Device Defines **************************/
+/***** General Device Trigger Defines *****/
+#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
+#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
+#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
+#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
+
+/******* Cyclone5 and Arria5 Defines *******/
+/* OCRAM ECC Management Group Defines */
+#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
+#define ALTR_OCR_ECC_REG_OFFSET 0x00
+#define ALTR_OCR_ECC_EN BIT(0)
+#define ALTR_OCR_ECC_INJS BIT(1)
+#define ALTR_OCR_ECC_INJD BIT(2)
+#define ALTR_OCR_ECC_SERR BIT(3)
+#define ALTR_OCR_ECC_DERR BIT(4)
+
+/* L2 ECC Management Group Defines */
+#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
+#define ALTR_L2_ECC_REG_OFFSET 0x00
+#define ALTR_L2_ECC_EN BIT(0)
+#define ALTR_L2_ECC_INJS BIT(1)
+#define ALTR_L2_ECC_INJD BIT(2)
+
+/* Arria10 General ECC Block Module Defines */
+#define ALTR_A10_ECC_CTRL_OFST 0x08
+#define ALTR_A10_ECC_EN BIT(0)
+#define ALTR_A10_ECC_INITA BIT(16)
+#define ALTR_A10_ECC_INITB BIT(24)
+
+#define ALTR_A10_ECC_INITSTAT_OFST 0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST 0x10
+#define ALTR_A10_ECC_SERRINTEN BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST 0x20
+#define ALTR_A10_ECC_SERRPENA BIT(0)
+#define ALTR_A10_ECC_DERRPENA BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \
+ ALTR_A10_ECC_DERRPENA)
+#define ALTR_A10_ECC_SERRPENB BIT(16)
+#define ALTR_A10_ECC_DERRPENB BIT(24)
+#define ALTR_A10_ECC_ERRPENB_MASK (ALTR_A10_ECC_SERRPENB | \
+ ALTR_A10_ECC_DERRPENB)
+
+#define ALTR_A10_ECC_INTTEST_OFST 0x24
+#define ALTR_A10_ECC_TSERRA BIT(0)
+#define ALTR_A10_ECC_TDERRA BIT(8)
+
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1)
+
+#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C
+#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
+#define A10_SYSMGR_ECC_INTSTAT_L2 BIT(0)
+#define A10_SYSMGR_ECC_INTSTAT_OCRAM BIT(1)
+
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST 0xA8
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB BIT(15)
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB BIT(31)
+
+/* Arria 10 L2 ECC Management Group Defines */
+#define ALTR_A10_L2_ECC_CTL_OFST 0x0
+#define ALTR_A10_L2_ECC_EN_CTL BIT(0)
+
+#define ALTR_A10_L2_ECC_STATUS 0xFFD060A4
+#define ALTR_A10_L2_ECC_STAT_OFST 0xA4
+#define ALTR_A10_L2_ECC_SERR_PEND BIT(0)
+#define ALTR_A10_L2_ECC_MERR_PEND BIT(0)
+
+#define ALTR_A10_L2_ECC_CLR_OFST 0x4
+#define ALTR_A10_L2_ECC_SERR_CLR BIT(15)
+#define ALTR_A10_L2_ECC_MERR_CLR BIT(31)
+
+#define ALTR_A10_L2_ECC_INJ_OFST ALTR_A10_L2_ECC_CTL_OFST
+#define ALTR_A10_L2_ECC_CE_INJ_MASK 0x00000101
+#define ALTR_A10_L2_ECC_UE_INJ_MASK 0x00010101
+
+/* Arria 10 OCRAM ECC Management Group Defines */
+#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0))
+
+struct altr_edac_device_dev;
+
+struct edac_device_prv_data {
+ int (*setup)(struct altr_edac_device_dev *device);
+ int ce_clear_mask;
+ int ue_clear_mask;
+ int irq_status_mask;
+ char dbgfs_name[20];
+ void * (*alloc_mem)(size_t size, void **other);
+ void (*free_mem)(void *p, size_t size, void *other);
+ int ecc_enable_mask;
+ int ecc_en_ofst;
+ int ce_set_mask;
+ int ue_set_mask;
+ int set_err_ofst;
+ irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci,
+ bool sb);
+ int trig_alloc_sz;
+ const struct file_operations *inject_fops;
+};
+
+struct altr_edac_device_dev {
+ struct list_head next;
+ void __iomem *base;
+ int sb_irq;
+ int db_irq;
+ const struct edac_device_prv_data *data;
+ struct dentry *debugfs_dir;
+ char *edac_dev_name;
+ struct altr_arria10_edac *edac;
+ struct edac_device_ctl_info *edac_dev;
+ struct device ddev;
+ int edac_idx;
+};
+
+struct altr_arria10_edac {
+ struct device *dev;
+ struct regmap *ecc_mgr_map;
+ int sb_irq;
+ int db_irq;
+ struct list_head a10_ecc_devices;
+};
+
#endif /* #ifndef _ALTERA_EDAC_H */
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index d87a475..624e2f7 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,11 +15,6 @@
static struct msr __percpu *msrs;
-/*
- * count successfully initialized driver instances for setup_pci_device()
- */
-static atomic_t drv_instances = ATOMIC_INIT(0);
-
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
@@ -1918,7 +1913,7 @@
[K8_CPUS] = {
.ctl_name = "K8",
.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
- .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+ .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
.ops = {
.early_channel_count = k8_early_channel_count,
.map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
@@ -1928,7 +1923,7 @@
[F10_CPUS] = {
.ctl_name = "F10h",
.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
- .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+ .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1938,7 +1933,7 @@
[F15_CPUS] = {
.ctl_name = "F15h",
.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1948,7 +1943,7 @@
[F15_M30H_CPUS] = {
.ctl_name = "F15h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1958,7 +1953,7 @@
[F15_M60H_CPUS] = {
.ctl_name = "F15h_M60h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1968,7 +1963,7 @@
[F16_CPUS] = {
.ctl_name = "F16h",
.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1978,7 +1973,7 @@
[F16_M30H_CPUS] = {
.ctl_name = "F16h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2227,13 +2222,13 @@
}
/*
- * Use pvt->F2 which contains the F2 CPU PCI device to get the related
- * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
+ * Use pvt->F3 which contains the F3 CPU PCI device to get the related
+ * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
*/
-static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
+static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id)
{
/* Reserve the ADDRESS MAP Device */
- pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
+ pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3);
if (!pvt->F1) {
amd64_err("error address map device not found: "
"vendor %x device 0x%x (broken BIOS?)\n",
@@ -2241,15 +2236,15 @@
return -ENODEV;
}
- /* Reserve the MISC Device */
- pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
- if (!pvt->F3) {
+ /* Reserve the DCT Device */
+ pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3);
+ if (!pvt->F2) {
pci_dev_put(pvt->F1);
pvt->F1 = NULL;
- amd64_err("error F3 device not found: "
+ amd64_err("error F2 device not found: "
"vendor %x device 0x%x (broken BIOS?)\n",
- PCI_VENDOR_ID_AMD, f3_id);
+ PCI_VENDOR_ID_AMD, f2_id);
return -ENODEV;
}
@@ -2263,7 +2258,7 @@
static void free_mc_sibling_devs(struct amd64_pvt *pvt)
{
pci_dev_put(pvt->F1);
- pci_dev_put(pvt->F3);
+ pci_dev_put(pvt->F2);
}
/*
@@ -2778,14 +2773,14 @@
NULL
};
-static int init_one_instance(struct pci_dev *F2)
+static int init_one_instance(unsigned int nid)
{
- struct amd64_pvt *pvt = NULL;
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct amd64_family_type *fam_type = NULL;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
+ struct amd64_pvt *pvt = NULL;
int err = 0, ret;
- u16 nid = amd_pci_dev_to_node_id(F2);
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2793,7 +2788,7 @@
goto err_ret;
pvt->mc_node_id = nid;
- pvt->F2 = F2;
+ pvt->F3 = F3;
ret = -EINVAL;
fam_type = per_family_init(pvt);
@@ -2801,7 +2796,7 @@
goto err_free;
ret = -ENODEV;
- err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
+ err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id);
if (err)
goto err_free;
@@ -2836,7 +2831,7 @@
goto err_siblings;
mci->pvt_info = pvt;
- mci->pdev = &pvt->F2->dev;
+ mci->pdev = &pvt->F3->dev;
setup_mci_misc_attrs(mci, fam_type);
@@ -2855,8 +2850,6 @@
amd_register_ecc_decoder(decode_bus_error);
- atomic_inc(&drv_instances);
-
return 0;
err_add_mc:
@@ -2872,19 +2865,11 @@
return ret;
}
-static int probe_one_instance(struct pci_dev *pdev,
- const struct pci_device_id *mc_type)
+static int probe_one_instance(unsigned int nid)
{
- u16 nid = amd_pci_dev_to_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
- int ret = 0;
-
- ret = pci_enable_device(pdev);
- if (ret < 0) {
- edac_dbg(0, "ret=%d\n", ret);
- return -EIO;
- }
+ int ret;
ret = -ENOMEM;
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
@@ -2905,7 +2890,7 @@
goto err_enable;
}
- ret = init_one_instance(pdev);
+ ret = init_one_instance(nid);
if (ret < 0) {
amd64_err("Error probing instance: %d\n", nid);
restore_ecc_error_reporting(s, nid, F3);
@@ -2921,19 +2906,18 @@
return ret;
}
-static void remove_one_instance(struct pci_dev *pdev)
+static void remove_one_instance(unsigned int nid)
{
- struct mem_ctl_info *mci;
- struct amd64_pvt *pvt;
- u16 nid = amd_pci_dev_to_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
+ struct mem_ctl_info *mci;
+ struct amd64_pvt *pvt;
- mci = find_mci_by_dev(&pdev->dev);
+ mci = find_mci_by_dev(&F3->dev);
WARN_ON(!mci);
/* Remove from EDAC CORE tracking list */
- mci = edac_mc_del_mc(&pdev->dev);
+ mci = edac_mc_del_mc(&F3->dev);
if (!mci)
return;
@@ -2957,31 +2941,6 @@
edac_mc_free(mci);
}
-/*
- * This table is part of the interface for loading drivers for PCI devices. The
- * PCI core identifies what devices are on a system during boot, and then
- * inquiry this table to see if this driver is for a given device found.
- */
-static const struct pci_device_id amd64_pci_table[] = {
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) },
- {0, }
-};
-MODULE_DEVICE_TABLE(pci, amd64_pci_table);
-
-static struct pci_driver amd64_pci_driver = {
- .name = EDAC_MOD_STR,
- .probe = probe_one_instance,
- .remove = remove_one_instance,
- .id_table = amd64_pci_table,
- .driver.probe_type = PROBE_FORCE_SYNCHRONOUS,
-};
-
static void setup_pci_device(void)
{
struct mem_ctl_info *mci;
@@ -3005,8 +2964,7 @@
static int __init amd64_edac_init(void)
{
int err = -ENODEV;
-
- printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
+ int i;
opstate_init();
@@ -3022,13 +2980,14 @@
if (!msrs)
goto err_free;
- err = pci_register_driver(&amd64_pci_driver);
- if (err)
- goto err_pci;
+ for (i = 0; i < amd_nb_num(); i++)
+ if (probe_one_instance(i)) {
+ /* unwind properly */
+ while (--i >= 0)
+ remove_one_instance(i);
- err = -ENODEV;
- if (!atomic_read(&drv_instances))
- goto err_no_instances;
+ goto err_pci;
+ }
setup_pci_device();
@@ -3036,10 +2995,9 @@
amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
#endif
- return 0;
+ printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
-err_no_instances:
- pci_unregister_driver(&amd64_pci_driver);
+ return 0;
err_pci:
msrs_free(msrs);
@@ -3055,10 +3013,13 @@
static void __exit amd64_edac_exit(void)
{
+ int i;
+
if (pci_ctl)
edac_pci_release_generic_ctl(pci_ctl);
- pci_unregister_driver(&amd64_pci_driver);
+ for (i = 0; i < amd_nb_num(); i++)
+ remove_one_instance(i);
kfree(ecc_stngs);
ecc_stngs = NULL;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index c0f248f..c088704 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -422,7 +422,7 @@
struct amd64_family_type {
const char *ctl_name;
- u16 f1_id, f3_id;
+ u16 f1_id, f2_id;
struct low_ops ops;
};
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 1472f48c..6aa256b0 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -923,7 +923,7 @@
mci->ue_mc += count;
if (!enable_per_layer_report) {
- mci->ce_noinfo_count += count;
+ mci->ue_noinfo_count += count;
return;
}
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 26e65ab..10c305b 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -998,11 +998,12 @@
void edac_unregister_sysfs(struct mem_ctl_info *mci)
{
+ struct bus_type *bus = mci->bus;
const char *name = mci->bus->name;
edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
device_unregister(&mci->dev);
- bus_unregister(mci->bus);
+ bus_unregister(bus);
kfree(name);
}
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 792bdae..8a68a5e 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -271,16 +271,6 @@
bool is_registered, enable_scrub;
- /* Fifo double buffers */
- struct mce mce_entry[MCE_LOG_LEN];
- struct mce mce_outentry[MCE_LOG_LEN];
-
- /* Fifo in/out counters */
- unsigned mce_in, mce_out;
-
- /* Count indicator to show errors not got */
- unsigned mce_overrun;
-
/* DCLK Frequency used for computing scrub rate */
int dclk_freq;
@@ -1792,56 +1782,15 @@
* i7core_check_error Retrieve and process errors reported by the
* hardware. Called by the Core module.
*/
-static void i7core_check_error(struct mem_ctl_info *mci)
+static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
- int i;
- unsigned count = 0;
- struct mce *m;
- /*
- * MCE first step: Copy all mce errors into a temporary buffer
- * We use a double buffering here, to reduce the risk of
- * losing an error.
- */
- smp_rmb();
- count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
- % MCE_LOG_LEN;
- if (!count)
- goto check_ce_error;
-
- m = pvt->mce_outentry;
- if (pvt->mce_in + count > MCE_LOG_LEN) {
- unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
- smp_wmb();
- pvt->mce_in = 0;
- count -= l;
- m += l;
- }
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
- smp_wmb();
- pvt->mce_in += count;
-
- smp_rmb();
- if (pvt->mce_overrun) {
- i7core_printk(KERN_ERR, "Lost %d memory errors\n",
- pvt->mce_overrun);
- smp_wmb();
- pvt->mce_overrun = 0;
- }
-
- /*
- * MCE second step: parse errors and display
- */
- for (i = 0; i < count; i++)
- i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
+ i7core_mce_output_error(mci, m);
/*
* Now, let's increment CE error counts
*/
-check_ce_error:
if (!pvt->is_registered)
i7core_udimm_check_mc_ecc_err(mci);
else
@@ -1849,12 +1798,8 @@
}
/*
- * i7core_mce_check_error Replicates mcelog routine to get errors
- * This routine simply queues mcelog errors, and
- * return. The error itself should be handled later
- * by i7core_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
*/
static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
@@ -1882,21 +1827,7 @@
if (mce->bank != 8)
return NOTIFY_DONE;
- smp_rmb();
- if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
- smp_wmb();
- pvt->mce_overrun++;
- return NOTIFY_DONE;
- }
-
- /* Copy memory error at the ringbuffer */
- memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
- smp_wmb();
- pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
- /* Handle fatal errors immediately */
- if (mce->mcgstatus & 1)
- i7core_check_error(mci);
+ i7core_check_error(mci, mce);
/* Advise mcelog that the errors were handled */
return NOTIFY_STOP;
@@ -2243,8 +2174,6 @@
get_dimm_config(mci);
/* record ptr to the generic device */
mci->pdev = &i7core_dev->pdev[0]->dev;
- /* Set the function pointer to an actual operation function */
- mci->edac_check = i7core_check_error;
/* Enable scrubrate setting */
if (pvt->enable_scrub)
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 18d77ac..1c88d97 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -17,6 +17,7 @@
* 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller
* 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller
+ * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
*
* Based on Intel specification:
* http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
@@ -55,6 +56,7 @@
#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
#define IE31200_DIMMS 4
#define IE31200_RANKS 8
@@ -105,8 +107,11 @@
* 1 Multiple Bit Error Status (MERRSTS)
* 0 Correctable Error Status (CERRSTS)
*/
+
#define IE31200_C0ECCERRLOG 0x40c8
#define IE31200_C1ECCERRLOG 0x44c8
+#define IE31200_C0ECCERRLOG_SKL 0x4048
+#define IE31200_C1ECCERRLOG_SKL 0x4448
#define IE31200_ECCERRLOG_CE BIT(0)
#define IE31200_ECCERRLOG_UE BIT(1)
#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27)
@@ -123,17 +128,28 @@
#define IE31200_CAPID0_DDPCD BIT(6)
#define IE31200_CAPID0_ECC BIT(1)
-#define IE31200_MAD_DIMM_0_OFFSET 0x5004
-#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
-#define IE31200_MAD_DIMM_A_RANK BIT(17)
-#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
+#define IE31200_MAD_DIMM_0_OFFSET 0x5004
+#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C
+#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
+#define IE31200_MAD_DIMM_A_RANK BIT(17)
+#define IE31200_MAD_DIMM_A_RANK_SHIFT 17
+#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10)
+#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10
+#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
+#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19
+#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8)
+#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8
-#define IE31200_PAGES(n) (n << (28 - PAGE_SHIFT))
+/* Skylake reports 1GB increments, everything else is 256MB */
+#define IE31200_PAGES(n, skl) \
+ (n << (28 + (2 * skl) - PAGE_SHIFT))
static int nr_channels;
struct ie31200_priv {
void __iomem *window;
+ void __iomem *c0errlog;
+ void __iomem *c1errlog;
};
enum ie31200_chips {
@@ -157,9 +173,9 @@
};
struct dimm_data {
- u8 size; /* in 256MB multiples */
+ u8 size; /* in multiples of 256MB, except Skylake is 1GB */
u8 dual_rank : 1,
- x16_width : 1; /* 0 means x8 width */
+ x16_width : 2; /* 0 means x8 width */
};
static int how_many_channels(struct pci_dev *pdev)
@@ -197,11 +213,10 @@
return true;
}
-static int eccerrlog_row(int channel, u64 log)
+static int eccerrlog_row(u64 log)
{
- int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >>
- IE31200_ECCERRLOG_RANK_SHIFT);
- return rank | (channel * IE31200_RANKS_PER_CHANNEL);
+ return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
+ IE31200_ECCERRLOG_RANK_SHIFT);
}
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
@@ -219,7 +234,6 @@
{
struct pci_dev *pdev;
struct ie31200_priv *priv = mci->pvt_info;
- void __iomem *window = priv->window;
pdev = to_pci_dev(mci->pdev);
@@ -232,9 +246,9 @@
if (!(info->errsts & IE31200_ERRSTS_BITS))
return;
- info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+ info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
if (nr_channels == 2)
- info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG);
+ info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2);
@@ -245,10 +259,10 @@
* should be UE info.
*/
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
- info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+ info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
if (nr_channels == 2)
info->eccerrlog[1] =
- lo_hi_readq(window + IE31200_C1ECCERRLOG);
+ lo_hi_readq(priv->c1errlog);
}
ie31200_clear_error_info(mci);
@@ -274,14 +288,14 @@
if (log & IE31200_ECCERRLOG_UE) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
- eccerrlog_row(channel, log),
+ eccerrlog_row(log),
channel, -1,
"ie31200 UE", "");
} else if (log & IE31200_ECCERRLOG_CE) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0,
IE31200_ECCERRLOG_SYNDROME(log),
- eccerrlog_row(channel, log),
+ eccerrlog_row(log),
channel, -1,
"ie31200 CE", "");
}
@@ -326,6 +340,33 @@
return window;
}
+static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+ int chan)
+{
+ dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
+ dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
+ dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
+ (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
+}
+
+static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+ int chan)
+{
+ dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
+ dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
+ dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
+}
+
+static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
+ bool skl)
+{
+ if (skl)
+ __skl_populate_dimm_info(dd, addr_decode, chan);
+ else
+ __populate_dimm_info(dd, addr_decode, chan);
+}
+
+
static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
{
int i, j, ret;
@@ -334,7 +375,8 @@
struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
void __iomem *window;
struct ie31200_priv *priv;
- u32 addr_decode;
+ u32 addr_decode, mad_offset;
+ bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
edac_dbg(0, "MC:\n");
@@ -363,7 +405,10 @@
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
- mci->mtype_cap = MEM_FLAG_DDR3;
+ if (skl)
+ mci->mtype_cap = MEM_FLAG_DDR4;
+ else
+ mci->mtype_cap = MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
@@ -374,19 +419,24 @@
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
+ if (skl) {
+ priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
+ priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
+ mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
+ } else {
+ priv->c0errlog = window + IE31200_C0ECCERRLOG;
+ priv->c1errlog = window + IE31200_C1ECCERRLOG;
+ mad_offset = IE31200_MAD_DIMM_0_OFFSET;
+ }
/* populate DIMM info */
for (i = 0; i < IE31200_CHANNELS; i++) {
- addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET +
+ addr_decode = readl(window + mad_offset +
(i * 4));
edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
- dimm_info[i][j].size = (addr_decode >> (j * 8)) &
- IE31200_MAD_DIMM_SIZE;
- dimm_info[i][j].dual_rank = (addr_decode &
- (IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0;
- dimm_info[i][j].x16_width = (addr_decode &
- (IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0;
+ populate_dimm_info(&dimm_info[i][j], addr_decode, j,
+ skl);
edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
dimm_info[i][j].size,
dimm_info[i][j].dual_rank,
@@ -405,7 +455,7 @@
struct dimm_info *dimm;
unsigned long nr_pages;
- nr_pages = IE31200_PAGES(dimm_info[j][i].size);
+ nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
if (nr_pages == 0)
continue;
@@ -417,7 +467,10 @@
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* just a guess */
- dimm->mtype = MEM_DDR3;
+ if (skl)
+ dimm->mtype = MEM_DDR4;
+ else
+ dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
@@ -426,7 +479,10 @@
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* same guess */
- dimm->mtype = MEM_DDR3;
+ if (skl)
+ dimm->mtype = MEM_DDR4;
+ else
+ dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
@@ -501,6 +557,9 @@
PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
+ PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ IE31200},
+ {
0,
} /* 0 terminated list. */
};
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 49768c0..9b6800a 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1052,7 +1052,6 @@
struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc;
- u32 ebx = cpuid_ebx(0x80000007);
if (amd_filter_mce(m))
return NOTIFY_STOP;
@@ -1075,7 +1074,7 @@
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
- if (!!(ebx & BIT(3))) {
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
@@ -1094,7 +1093,7 @@
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
- if (!!(ebx & BIT(3))) {
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
decode_smca_errors(m);
goto err_code;
}
@@ -1149,7 +1148,6 @@
static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- u32 ebx;
if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
@@ -1205,9 +1203,8 @@
break;
case 0x17:
- ebx = cpuid_ebx(0x80000007);
xec_mask = 0x3f;
- if (!(ebx & BIT(3))) {
+ if (!boot_cpu_has(X86_FEATURE_SMCA)) {
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
goto err_out;
}
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 8bf745d..b4d0bf6 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -21,6 +21,8 @@
#include <linux/smp.h>
#include <linux/bitmap.h>
#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
@@ -28,8 +30,6 @@
/* Static vars */
static LIST_HEAD(sbridge_edac_list);
-static DEFINE_MUTEX(sbridge_edac_lock);
-static int probed;
/*
* Alter this version for the module when modifications are made
@@ -364,16 +364,6 @@
bool is_mirrored, is_lockstep, is_close_pg;
bool is_chan_hash;
- /* Fifo double buffers */
- struct mce mce_entry[MCE_LOG_LEN];
- struct mce mce_outentry[MCE_LOG_LEN];
-
- /* Fifo in/out counters */
- unsigned mce_in, mce_out;
-
- /* Count indicator to show errors not got */
- unsigned mce_overrun;
-
/* Memory description */
u64 tolm, tohm;
struct knl_pvt knl;
@@ -662,18 +652,6 @@
{0,} /* 0 terminated list. */
};
-/*
- * pci_device_id table for which devices we are looking for
- */
-static const struct pci_device_id sbridge_pci_tbl[] = {
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)},
- {0,} /* 0 terminated list. */
-};
-
/****************************************************************************
Ancillary status routines
@@ -3097,63 +3075,8 @@
}
/*
- * sbridge_check_error Retrieve and process errors reported by the
- * hardware. Called by the Core module.
- */
-static void sbridge_check_error(struct mem_ctl_info *mci)
-{
- struct sbridge_pvt *pvt = mci->pvt_info;
- int i;
- unsigned count = 0;
- struct mce *m;
-
- /*
- * MCE first step: Copy all mce errors into a temporary buffer
- * We use a double buffering here, to reduce the risk of
- * loosing an error.
- */
- smp_rmb();
- count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
- % MCE_LOG_LEN;
- if (!count)
- return;
-
- m = pvt->mce_outentry;
- if (pvt->mce_in + count > MCE_LOG_LEN) {
- unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
- smp_wmb();
- pvt->mce_in = 0;
- count -= l;
- m += l;
- }
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
- smp_wmb();
- pvt->mce_in += count;
-
- smp_rmb();
- if (pvt->mce_overrun) {
- sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
- pvt->mce_overrun);
- smp_wmb();
- pvt->mce_overrun = 0;
- }
-
- /*
- * MCE second step: parse errors and display
- */
- for (i = 0; i < count; i++)
- sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
-}
-
-/*
- * sbridge_mce_check_error Replicates mcelog routine to get errors
- * This routine simply queues mcelog errors, and
- * return. The error itself should be handled later
- * by sbridge_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
*/
static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
@@ -3198,21 +3121,7 @@
"%u APIC %x\n", mce->cpuvendor, mce->cpuid,
mce->time, mce->socketid, mce->apicid);
- smp_rmb();
- if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
- smp_wmb();
- pvt->mce_overrun++;
- return NOTIFY_DONE;
- }
-
- /* Copy memory error at the ringbuffer */
- memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
- smp_wmb();
- pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
- /* Handle fatal errors immediately */
- if (mce->mcgstatus & 1)
- sbridge_check_error(mci);
+ sbridge_mce_output_error(mci, mce);
/* Advice mcelog that the error were handled */
return NOTIFY_STOP;
@@ -3298,9 +3207,6 @@
mci->dev_name = pci_name(pdev);
mci->ctl_page_to_phys = NULL;
- /* Set the function pointer to an actual operation function */
- mci->edac_check = sbridge_check_error;
-
pvt->info.type = type;
switch (type) {
case IVY_BRIDGE:
@@ -3448,62 +3354,40 @@
return rc;
}
+#define ICPU(model, table) \
+ { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
+
+/* Order here must match "enum type" */
+static const struct x86_cpu_id sbridge_cpuids[] = {
+ ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */
+ ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */
+ ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */
+ ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */
+ ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */
+ { }
+};
+MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
+
/*
- * sbridge_probe Probe for ONE instance of device to see if it is
+ * sbridge_probe Get all devices and register memory controllers
* present.
* return:
* 0 for FOUND a device
* < 0 for error code
*/
-static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int sbridge_probe(const struct x86_cpu_id *id)
{
int rc = -ENODEV;
u8 mc, num_mc = 0;
struct sbridge_dev *sbridge_dev;
- enum type type = SANDY_BRIDGE;
+ struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data;
/* get the pci devices we want to reserve for our use */
- mutex_lock(&sbridge_edac_lock);
+ rc = sbridge_get_all_devices(&num_mc, ptable);
- /*
- * All memory controllers are allocated at the first pass.
- */
- if (unlikely(probed >= 1)) {
- mutex_unlock(&sbridge_edac_lock);
- return -ENODEV;
- }
- probed++;
-
- switch (pdev->device) {
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_ibridge_table);
- type = IVY_BRIDGE;
- break;
- case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_sbridge_table);
- type = SANDY_BRIDGE;
- break;
- case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_haswell_table);
- type = HASWELL;
- break;
- case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_broadwell_table);
- type = BROADWELL;
- break;
- case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0:
- rc = sbridge_get_all_devices_knl(&num_mc,
- pci_dev_descr_knl_table);
- type = KNIGHTS_LANDING;
- break;
- }
if (unlikely(rc < 0)) {
- edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device);
+ edac_dbg(0, "couldn't get all devices\n");
goto fail0;
}
@@ -3514,14 +3398,13 @@
mc, mc + 1, num_mc);
sbridge_dev->mc = mc++;
- rc = sbridge_register_mci(sbridge_dev, type);
+ rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids);
if (unlikely(rc < 0))
goto fail1;
}
sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION);
- mutex_unlock(&sbridge_edac_lock);
return 0;
fail1:
@@ -3530,74 +3413,47 @@
sbridge_put_all_devices();
fail0:
- mutex_unlock(&sbridge_edac_lock);
return rc;
}
/*
- * sbridge_remove destructor for one instance of device
+ * sbridge_remove cleanup
*
*/
-static void sbridge_remove(struct pci_dev *pdev)
+static void sbridge_remove(void)
{
struct sbridge_dev *sbridge_dev;
edac_dbg(0, "\n");
- /*
- * we have a trouble here: pdev value for removal will be wrong, since
- * it will point to the X58 register used to detect that the machine
- * is a Nehalem or upper design. However, due to the way several PCI
- * devices are grouped together to provide MC functionality, we need
- * to use a different method for releasing the devices
- */
-
- mutex_lock(&sbridge_edac_lock);
-
- if (unlikely(!probed)) {
- mutex_unlock(&sbridge_edac_lock);
- return;
- }
-
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
sbridge_unregister_mci(sbridge_dev);
/* Release PCI resources */
sbridge_put_all_devices();
-
- probed--;
-
- mutex_unlock(&sbridge_edac_lock);
}
-MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
-
-/*
- * sbridge_driver pci_driver structure for this module
- *
- */
-static struct pci_driver sbridge_driver = {
- .name = "sbridge_edac",
- .probe = sbridge_probe,
- .remove = sbridge_remove,
- .id_table = sbridge_pci_tbl,
-};
-
/*
* sbridge_init Module entry function
* Try to initialize this module for its devices
*/
static int __init sbridge_init(void)
{
- int pci_rc;
+ const struct x86_cpu_id *id;
+ int rc;
edac_dbg(2, "\n");
+ id = x86_match_cpu(sbridge_cpuids);
+ if (!id)
+ return -ENODEV;
+
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
- pci_rc = pci_register_driver(&sbridge_driver);
- if (pci_rc >= 0) {
+ rc = sbridge_probe(id);
+
+ if (rc >= 0) {
mce_register_decode_chain(&sbridge_mce_dec);
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
@@ -3605,9 +3461,9 @@
}
sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
- pci_rc);
+ rc);
- return pci_rc;
+ return rc;
}
/*
@@ -3617,7 +3473,7 @@
static void __exit sbridge_exit(void)
{
edac_dbg(2, "\n");
- pci_unregister_driver(&sbridge_driver);
+ sbridge_remove();
mce_unregister_decode_chain(&sbridge_mce_dec);
}
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index e1670d5..6394152 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -87,6 +87,31 @@
config EFI_ARMSTUB
bool
+config EFI_BOOTLOADER_CONTROL
+ tristate "EFI Bootloader Control"
+ depends on EFI_VARS
+ default n
+ ---help---
+ This module installs a reboot hook, such that if reboot() is
+ invoked with a string argument NNN, "NNN" is copied to the
+ "LoaderEntryOneShot" EFI variable, to be read by the
+ bootloader. If the string matches one of the boot labels
+ defined in its configuration, the bootloader will boot once
+ to that label. The "LoaderEntryRebootReason" EFI variable is
+ set with the reboot reason: "reboot" or "shutdown". The
+ bootloader reads this reboot reason and takes particular
+ action according to its policy.
+
+config EFI_CAPSULE_LOADER
+ tristate "EFI capsule loader"
+ depends on EFI
+ help
+ This option exposes a loader interface "/dev/efi_capsule_loader" for
+ users to load EFI capsules. This driver requires working runtime
+ capsule support in the firmware, which many OEMs do not provide.
+
+ Most users should say N.
+
endmenu
config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 62e654f..a219640 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -9,7 +9,8 @@
#
KASAN_SANITIZE_runtime-wrappers.o := n
-obj-$(CONFIG_EFI) += efi.o vars.o reboot.o
+obj-$(CONFIG_EFI) += efi.o vars.o reboot.o memattr.o
+obj-$(CONFIG_EFI) += capsule.o
obj-$(CONFIG_EFI_VARS) += efivars.o
obj-$(CONFIG_EFI_ESRT) += esrt.o
obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
@@ -18,7 +19,9 @@
obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o
obj-$(CONFIG_EFI_STUB) += libstub/
obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o
+obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o
arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o
obj-$(CONFIG_ARM) += $(arm-obj-y)
obj-$(CONFIG_ARM64) += $(arm-obj-y)
+obj-$(CONFIG_EFI_CAPSULE_LOADER) += capsule-loader.o
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 8714f8c..a850cbc 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -11,17 +11,19 @@
*
*/
+#define pr_fmt(fmt) "efi: " fmt
+
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/mm_types.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
#include <asm/efi.h>
-struct efi_memory_map memmap;
-
u64 efi_system_table;
static int __init is_normal_ram(efi_memory_desc_t *md)
@@ -40,7 +42,7 @@
{
efi_memory_desc_t *md;
- for_each_efi_memory_desc(&memmap, md) {
+ for_each_efi_memory_desc(md) {
if (!(md->attribute & EFI_MEMORY_RUNTIME))
continue;
if (md->virt_addr == 0)
@@ -53,6 +55,36 @@
return addr;
}
+static __initdata unsigned long screen_info_table = EFI_INVALID_TABLE_ADDR;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+ {LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID, NULL, &screen_info_table},
+ {NULL_GUID, NULL, NULL}
+};
+
+static void __init init_screen_info(void)
+{
+ struct screen_info *si;
+
+ if (screen_info_table != EFI_INVALID_TABLE_ADDR) {
+ si = early_memremap_ro(screen_info_table, sizeof(*si));
+ if (!si) {
+ pr_err("Could not map screen_info config table\n");
+ return;
+ }
+ screen_info = *si;
+ early_memunmap(si, sizeof(*si));
+
+ /* dummycon on ARM needs non-zero values for columns/lines */
+ screen_info.orig_video_cols = 80;
+ screen_info.orig_video_lines = 25;
+ }
+
+ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
+ memblock_is_map_memory(screen_info.lfb_base))
+ memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size);
+}
+
static int __init uefi_init(void)
{
efi_char16_t *c16;
@@ -85,6 +117,8 @@
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
+ efi.runtime_version = efi.systab->hdr.revision;
+
/* Show what we know for posterity */
c16 = early_memremap_ro(efi_to_phys(efi.systab->fw_vendor),
sizeof(vendor) * sizeof(efi_char16_t));
@@ -108,7 +142,8 @@
goto out;
}
retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables,
- sizeof(efi_config_table_t), NULL);
+ sizeof(efi_config_table_t),
+ arch_tables);
early_memunmap(config_tables, table_size);
out:
@@ -143,7 +178,15 @@
if (efi_enabled(EFI_DBG))
pr_info("Processing EFI memory map:\n");
- for_each_efi_memory_desc(&memmap, md) {
+ /*
+ * Discard memblocks discovered so far: if there are any at this
+ * point, they originate from memory nodes in the DT, and UEFI
+ * uses its own memory map instead.
+ */
+ memblock_dump_all();
+ memblock_remove(0, (phys_addr_t)ULLONG_MAX);
+
+ for_each_efi_memory_desc(md) {
paddr = md->phys_addr;
npages = md->num_pages;
@@ -184,9 +227,9 @@
efi_system_table = params.system_table;
- memmap.phys_map = params.mmap;
- memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
- if (memmap.map == NULL) {
+ efi.memmap.phys_map = params.mmap;
+ efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
+ if (efi.memmap.map == NULL) {
/*
* If we are booting via UEFI, the UEFI memory map is the only
* description of memory we have, so there is little point in
@@ -194,28 +237,37 @@
*/
panic("Unable to map EFI memory map.\n");
}
- memmap.map_end = memmap.map + params.mmap_size;
- memmap.desc_size = params.desc_size;
- memmap.desc_version = params.desc_ver;
+ efi.memmap.map_end = efi.memmap.map + params.mmap_size;
+ efi.memmap.desc_size = params.desc_size;
+ efi.memmap.desc_version = params.desc_ver;
+
+ WARN(efi.memmap.desc_version != 1,
+ "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
+ efi.memmap.desc_version);
if (uefi_init() < 0)
return;
reserve_regions();
- early_memunmap(memmap.map, params.mmap_size);
+ efi_memattr_init();
+ early_memunmap(efi.memmap.map, params.mmap_size);
- if (IS_ENABLED(CONFIG_ARM)) {
- /*
- * ARM currently does not allow ioremap_cache() to be called on
- * memory regions that are covered by struct page. So remove the
- * UEFI memory map from the linear mapping.
- */
- memblock_mark_nomap(params.mmap & PAGE_MASK,
- PAGE_ALIGN(params.mmap_size +
- (params.mmap & ~PAGE_MASK)));
- } else {
- memblock_reserve(params.mmap & PAGE_MASK,
- PAGE_ALIGN(params.mmap_size +
- (params.mmap & ~PAGE_MASK)));
- }
+ memblock_reserve(params.mmap & PAGE_MASK,
+ PAGE_ALIGN(params.mmap_size +
+ (params.mmap & ~PAGE_MASK)));
+
+ init_screen_info();
}
+
+static int __init register_gop_device(void)
+{
+ void *pd;
+
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+ return 0;
+
+ pd = platform_device_register_data(NULL, "efi-framebuffer", 0,
+ &screen_info, sizeof(screen_info));
+ return PTR_ERR_OR_ZERO(pd);
+}
+subsys_initcall(register_gop_device);
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 6ae21e4..17ccf0a 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -42,11 +42,13 @@
static bool __init efi_virtmap_init(void)
{
efi_memory_desc_t *md;
+ bool systab_found;
efi_mm.pgd = pgd_alloc(&efi_mm);
init_new_context(NULL, &efi_mm);
- for_each_efi_memory_desc(&memmap, md) {
+ systab_found = false;
+ for_each_efi_memory_desc(md) {
phys_addr_t phys = md->phys_addr;
int ret;
@@ -64,7 +66,25 @@
&phys, ret);
return false;
}
+ /*
+ * If this entry covers the address of the UEFI system table,
+ * calculate and record its virtual address.
+ */
+ if (efi_system_table >= phys &&
+ efi_system_table < phys + (md->num_pages * EFI_PAGE_SIZE)) {
+ efi.systab = (void *)(unsigned long)(efi_system_table -
+ phys + md->virt_addr);
+ systab_found = true;
+ }
}
+ if (!systab_found) {
+ pr_err("No virtual mapping found for the UEFI System Table\n");
+ return false;
+ }
+
+ if (efi_memattr_apply_permissions(&efi_mm, efi_set_mapping_permissions))
+ return false;
+
return true;
}
@@ -89,26 +109,17 @@
pr_info("Remapping and enabling EFI services.\n");
- mapsize = memmap.map_end - memmap.map;
- memmap.map = (__force void *)ioremap_cache(memmap.phys_map,
- mapsize);
- if (!memmap.map) {
+ mapsize = efi.memmap.map_end - efi.memmap.map;
+
+ efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB);
+ if (!efi.memmap.map) {
pr_err("Failed to remap EFI memory map\n");
return -ENOMEM;
}
- memmap.map_end = memmap.map + mapsize;
- efi.memmap = &memmap;
-
- efi.systab = (__force void *)ioremap_cache(efi_system_table,
- sizeof(efi_system_table_t));
- if (!efi.systab) {
- pr_err("Failed to remap EFI System Table\n");
- return -ENOMEM;
- }
- set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+ efi.memmap.map_end = efi.memmap.map + mapsize;
if (!efi_virtmap_init()) {
- pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
+ pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
return -ENOMEM;
}
@@ -116,8 +127,6 @@
efi_native_runtime_setup();
set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
- efi.runtime_version = efi.systab->hdr.revision;
-
return 0;
}
early_initcall(arm_enable_runtime_services);
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
new file mode 100644
index 0000000..c99c24b
--- /dev/null
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -0,0 +1,343 @@
+/*
+ * EFI capsule loader driver.
+ *
+ * Copyright 2015 Intel Corporation
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/miscdevice.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/efi.h>
+
+#define NO_FURTHER_WRITE_ACTION -1
+
+struct capsule_info {
+ bool header_obtained;
+ int reset_type;
+ long index;
+ size_t count;
+ size_t total_size;
+ struct page **pages;
+ size_t page_bytes_remain;
+};
+
+/**
+ * efi_free_all_buff_pages - free all previous allocated buffer pages
+ * @cap_info: pointer to current instance of capsule_info structure
+ *
+ * In addition to freeing buffer pages, it flags NO_FURTHER_WRITE_ACTION
+ * to cease processing data in subsequent write(2) calls until close(2)
+ * is called.
+ **/
+static void efi_free_all_buff_pages(struct capsule_info *cap_info)
+{
+ while (cap_info->index > 0)
+ __free_page(cap_info->pages[--cap_info->index]);
+
+ cap_info->index = NO_FURTHER_WRITE_ACTION;
+}
+
+/**
+ * efi_capsule_setup_info - obtain the efi capsule header in the binary and
+ * setup capsule_info structure
+ * @cap_info: pointer to current instance of capsule_info structure
+ * @kbuff: a mapped first page buffer pointer
+ * @hdr_bytes: the total received number of bytes for efi header
+ **/
+static ssize_t efi_capsule_setup_info(struct capsule_info *cap_info,
+ void *kbuff, size_t hdr_bytes)
+{
+ efi_capsule_header_t *cap_hdr;
+ size_t pages_needed;
+ int ret;
+ void *temp_page;
+
+ /* Only process data block that is larger than efi header size */
+ if (hdr_bytes < sizeof(efi_capsule_header_t))
+ return 0;
+
+ /* Reset back to the correct offset of header */
+ cap_hdr = kbuff - cap_info->count;
+ pages_needed = ALIGN(cap_hdr->imagesize, PAGE_SIZE) >> PAGE_SHIFT;
+
+ if (pages_needed == 0) {
+ pr_err("%s: pages count invalid\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Check if the capsule binary supported */
+ ret = efi_capsule_supported(cap_hdr->guid, cap_hdr->flags,
+ cap_hdr->imagesize,
+ &cap_info->reset_type);
+ if (ret) {
+ pr_err("%s: efi_capsule_supported() failed\n",
+ __func__);
+ return ret;
+ }
+
+ cap_info->total_size = cap_hdr->imagesize;
+ temp_page = krealloc(cap_info->pages,
+ pages_needed * sizeof(void *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!temp_page) {
+ pr_debug("%s: krealloc() failed\n", __func__);
+ return -ENOMEM;
+ }
+
+ cap_info->pages = temp_page;
+ cap_info->header_obtained = true;
+
+ return 0;
+}
+
+/**
+ * efi_capsule_submit_update - invoke the efi_capsule_update API once binary
+ * upload done
+ * @cap_info: pointer to current instance of capsule_info structure
+ **/
+static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
+{
+ int ret;
+ void *cap_hdr_temp;
+
+ cap_hdr_temp = kmap(cap_info->pages[0]);
+ if (!cap_hdr_temp) {
+ pr_debug("%s: kmap() failed\n", __func__);
+ return -EFAULT;
+ }
+
+ ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
+ kunmap(cap_info->pages[0]);
+ if (ret) {
+ pr_err("%s: efi_capsule_update() failed\n", __func__);
+ return ret;
+ }
+
+ /* Indicate capsule binary uploading is done */
+ cap_info->index = NO_FURTHER_WRITE_ACTION;
+ pr_info("%s: Successfully upload capsule file with reboot type '%s'\n",
+ __func__, !cap_info->reset_type ? "RESET_COLD" :
+ cap_info->reset_type == 1 ? "RESET_WARM" :
+ "RESET_SHUTDOWN");
+ return 0;
+}
+
+/**
+ * efi_capsule_write - store the capsule binary and pass it to
+ * efi_capsule_update() API
+ * @file: file pointer
+ * @buff: buffer pointer
+ * @count: number of bytes in @buff
+ * @offp: not used
+ *
+ * Expectation:
+ * - A user space tool should start at the beginning of capsule binary and
+ * pass data in sequentially.
+ * - Users should close and re-open this file note in order to upload more
+ * capsules.
+ * - After an error returned, user should close the file and restart the
+ * operation for the next try otherwise -EIO will be returned until the
+ * file is closed.
+ * - An EFI capsule header must be located at the beginning of capsule
+ * binary file and passed in as first block data of write operation.
+ **/
+static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
+ size_t count, loff_t *offp)
+{
+ int ret = 0;
+ struct capsule_info *cap_info = file->private_data;
+ struct page *page;
+ void *kbuff = NULL;
+ size_t write_byte;
+
+ if (count == 0)
+ return 0;
+
+ /* Return error while NO_FURTHER_WRITE_ACTION is flagged */
+ if (cap_info->index < 0)
+ return -EIO;
+
+ /* Only alloc a new page when previous page is full */
+ if (!cap_info->page_bytes_remain) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ pr_debug("%s: alloc_page() failed\n", __func__);
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ cap_info->pages[cap_info->index++] = page;
+ cap_info->page_bytes_remain = PAGE_SIZE;
+ }
+
+ page = cap_info->pages[cap_info->index - 1];
+
+ kbuff = kmap(page);
+ if (!kbuff) {
+ pr_debug("%s: kmap() failed\n", __func__);
+ ret = -EFAULT;
+ goto failed;
+ }
+ kbuff += PAGE_SIZE - cap_info->page_bytes_remain;
+
+ /* Copy capsule binary data from user space to kernel space buffer */
+ write_byte = min_t(size_t, count, cap_info->page_bytes_remain);
+ if (copy_from_user(kbuff, buff, write_byte)) {
+ pr_debug("%s: copy_from_user() failed\n", __func__);
+ ret = -EFAULT;
+ goto fail_unmap;
+ }
+ cap_info->page_bytes_remain -= write_byte;
+
+ /* Setup capsule binary info structure */
+ if (!cap_info->header_obtained) {
+ ret = efi_capsule_setup_info(cap_info, kbuff,
+ cap_info->count + write_byte);
+ if (ret)
+ goto fail_unmap;
+ }
+
+ cap_info->count += write_byte;
+ kunmap(page);
+
+ /* Submit the full binary to efi_capsule_update() API */
+ if (cap_info->header_obtained &&
+ cap_info->count >= cap_info->total_size) {
+ if (cap_info->count > cap_info->total_size) {
+ pr_err("%s: upload size exceeded header defined size\n",
+ __func__);
+ ret = -EINVAL;
+ goto failed;
+ }
+
+ ret = efi_capsule_submit_update(cap_info);
+ if (ret)
+ goto failed;
+ }
+
+ return write_byte;
+
+fail_unmap:
+ kunmap(page);
+failed:
+ efi_free_all_buff_pages(cap_info);
+ return ret;
+}
+
+/**
+ * efi_capsule_flush - called by file close or file flush
+ * @file: file pointer
+ * @id: not used
+ *
+ * If a capsule is being partially uploaded then calling this function
+ * will be treated as upload termination and will free those completed
+ * buffer pages and -ECANCELED will be returned.
+ **/
+static int efi_capsule_flush(struct file *file, fl_owner_t id)
+{
+ int ret = 0;
+ struct capsule_info *cap_info = file->private_data;
+
+ if (cap_info->index > 0) {
+ pr_err("%s: capsule upload not complete\n", __func__);
+ efi_free_all_buff_pages(cap_info);
+ ret = -ECANCELED;
+ }
+
+ return ret;
+}
+
+/**
+ * efi_capsule_release - called by file close
+ * @inode: not used
+ * @file: file pointer
+ *
+ * We will not free successfully submitted pages since efi update
+ * requires data to be maintained across system reboot.
+ **/
+static int efi_capsule_release(struct inode *inode, struct file *file)
+{
+ struct capsule_info *cap_info = file->private_data;
+
+ kfree(cap_info->pages);
+ kfree(file->private_data);
+ file->private_data = NULL;
+ return 0;
+}
+
+/**
+ * efi_capsule_open - called by file open
+ * @inode: not used
+ * @file: file pointer
+ *
+ * Will allocate each capsule_info memory for each file open call.
+ * This provided the capability to support multiple file open feature
+ * where user is not needed to wait for others to finish in order to
+ * upload their capsule binary.
+ **/
+static int efi_capsule_open(struct inode *inode, struct file *file)
+{
+ struct capsule_info *cap_info;
+
+ cap_info = kzalloc(sizeof(*cap_info), GFP_KERNEL);
+ if (!cap_info)
+ return -ENOMEM;
+
+ cap_info->pages = kzalloc(sizeof(void *), GFP_KERNEL);
+ if (!cap_info->pages) {
+ kfree(cap_info);
+ return -ENOMEM;
+ }
+
+ file->private_data = cap_info;
+
+ return 0;
+}
+
+static const struct file_operations efi_capsule_fops = {
+ .owner = THIS_MODULE,
+ .open = efi_capsule_open,
+ .write = efi_capsule_write,
+ .flush = efi_capsule_flush,
+ .release = efi_capsule_release,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice efi_capsule_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "efi_capsule_loader",
+ .fops = &efi_capsule_fops,
+};
+
+static int __init efi_capsule_loader_init(void)
+{
+ int ret;
+
+ if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ return -ENODEV;
+
+ ret = misc_register(&efi_capsule_misc);
+ if (ret)
+ pr_err("%s: Failed to register misc char file note\n",
+ __func__);
+
+ return ret;
+}
+module_init(efi_capsule_loader_init);
+
+static void __exit efi_capsule_loader_exit(void)
+{
+ misc_deregister(&efi_capsule_misc);
+}
+module_exit(efi_capsule_loader_exit);
+
+MODULE_DESCRIPTION("EFI capsule firmware binary loader");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
new file mode 100644
index 0000000..53b9fd2
--- /dev/null
+++ b/drivers/firmware/efi/capsule.c
@@ -0,0 +1,308 @@
+/*
+ * EFI capsule support.
+ *
+ * Copyright 2013 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/highmem.h>
+#include <linux/efi.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+
+typedef struct {
+ u64 length;
+ u64 data;
+} efi_capsule_block_desc_t;
+
+static bool capsule_pending;
+static bool stop_capsules;
+static int efi_reset_type = -1;
+
+/*
+ * capsule_mutex serialises access to both capsule_pending and
+ * efi_reset_type and stop_capsules.
+ */
+static DEFINE_MUTEX(capsule_mutex);
+
+/**
+ * efi_capsule_pending - has a capsule been passed to the firmware?
+ * @reset_type: store the type of EFI reset if capsule is pending
+ *
+ * To ensure that the registered capsule is processed correctly by the
+ * firmware we need to perform a specific type of reset. If a capsule is
+ * pending return the reset type in @reset_type.
+ *
+ * This function will race with callers of efi_capsule_update(), for
+ * example, calling this function while somebody else is in
+ * efi_capsule_update() but hasn't reached efi_capsue_update_locked()
+ * will miss the updates to capsule_pending and efi_reset_type after
+ * efi_capsule_update_locked() completes.
+ *
+ * A non-racy use is from platform reboot code because we use
+ * system_state to ensure no capsules can be sent to the firmware once
+ * we're at SYSTEM_RESTART. See efi_capsule_update_locked().
+ */
+bool efi_capsule_pending(int *reset_type)
+{
+ if (!capsule_pending)
+ return false;
+
+ if (reset_type)
+ *reset_type = efi_reset_type;
+
+ return true;
+}
+
+/*
+ * Whitelist of EFI capsule flags that we support.
+ *
+ * We do not handle EFI_CAPSULE_INITIATE_RESET because that would
+ * require us to prepare the kernel for reboot. Refuse to load any
+ * capsules with that flag and any other flags that we do not know how
+ * to handle.
+ */
+#define EFI_CAPSULE_SUPPORTED_FLAG_MASK \
+ (EFI_CAPSULE_PERSIST_ACROSS_RESET | EFI_CAPSULE_POPULATE_SYSTEM_TABLE)
+
+/**
+ * efi_capsule_supported - does the firmware support the capsule?
+ * @guid: vendor guid of capsule
+ * @flags: capsule flags
+ * @size: size of capsule data
+ * @reset: the reset type required for this capsule
+ *
+ * Check whether a capsule with @flags is supported by the firmware
+ * and that @size doesn't exceed the maximum size for a capsule.
+ *
+ * No attempt is made to check @reset against the reset type required
+ * by any pending capsules because of the races involved.
+ */
+int efi_capsule_supported(efi_guid_t guid, u32 flags, size_t size, int *reset)
+{
+ efi_capsule_header_t capsule;
+ efi_capsule_header_t *cap_list[] = { &capsule };
+ efi_status_t status;
+ u64 max_size;
+
+ if (flags & ~EFI_CAPSULE_SUPPORTED_FLAG_MASK)
+ return -EINVAL;
+
+ capsule.headersize = capsule.imagesize = sizeof(capsule);
+ memcpy(&capsule.guid, &guid, sizeof(efi_guid_t));
+ capsule.flags = flags;
+
+ status = efi.query_capsule_caps(cap_list, 1, &max_size, reset);
+ if (status != EFI_SUCCESS)
+ return efi_status_to_err(status);
+
+ if (size > max_size)
+ return -ENOSPC;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_supported);
+
+/*
+ * Every scatter gather list (block descriptor) page must end with a
+ * continuation pointer. The last continuation pointer of the last
+ * page must be zero to mark the end of the chain.
+ */
+#define SGLIST_PER_PAGE ((PAGE_SIZE / sizeof(efi_capsule_block_desc_t)) - 1)
+
+/*
+ * How many scatter gather list (block descriptor) pages do we need
+ * to map @count pages?
+ */
+static inline unsigned int sg_pages_num(unsigned int count)
+{
+ return DIV_ROUND_UP(count, SGLIST_PER_PAGE);
+}
+
+/**
+ * efi_capsule_update_locked - pass a single capsule to the firmware
+ * @capsule: capsule to send to the firmware
+ * @sg_pages: array of scatter gather (block descriptor) pages
+ * @reset: the reset type required for @capsule
+ *
+ * Since this function must be called under capsule_mutex check
+ * whether efi_reset_type will conflict with @reset, and atomically
+ * set it and capsule_pending if a capsule was successfully sent to
+ * the firmware.
+ *
+ * We also check to see if the system is about to restart, and if so,
+ * abort. This avoids races between efi_capsule_update() and
+ * efi_capsule_pending().
+ */
+static int
+efi_capsule_update_locked(efi_capsule_header_t *capsule,
+ struct page **sg_pages, int reset)
+{
+ efi_physical_addr_t sglist_phys;
+ efi_status_t status;
+
+ lockdep_assert_held(&capsule_mutex);
+
+ /*
+ * If someone has already registered a capsule that requires a
+ * different reset type, we're out of luck and must abort.
+ */
+ if (efi_reset_type >= 0 && efi_reset_type != reset) {
+ pr_err("Conflicting capsule reset type %d (%d).\n",
+ reset, efi_reset_type);
+ return -EINVAL;
+ }
+
+ /*
+ * If the system is getting ready to restart it may have
+ * called efi_capsule_pending() to make decisions (such as
+ * whether to force an EFI reboot), and we're racing against
+ * that call. Abort in that case.
+ */
+ if (unlikely(stop_capsules)) {
+ pr_warn("Capsule update raced with reboot, aborting.\n");
+ return -EINVAL;
+ }
+
+ sglist_phys = page_to_phys(sg_pages[0]);
+
+ status = efi.update_capsule(&capsule, 1, sglist_phys);
+ if (status == EFI_SUCCESS) {
+ capsule_pending = true;
+ efi_reset_type = reset;
+ }
+
+ return efi_status_to_err(status);
+}
+
+/**
+ * efi_capsule_update - send a capsule to the firmware
+ * @capsule: capsule to send to firmware
+ * @pages: an array of capsule data pages
+ *
+ * Build a scatter gather list with EFI capsule block descriptors to
+ * map the capsule described by @capsule with its data in @pages and
+ * send it to the firmware via the UpdateCapsule() runtime service.
+ *
+ * @capsule must be a virtual mapping of the first page in @pages
+ * (@pages[0]) in the kernel address space. That is, a
+ * capsule_header_t that describes the entire contents of the capsule
+ * must be at the start of the first data page.
+ *
+ * Even though this function will validate that the firmware supports
+ * the capsule guid, users will likely want to check that
+ * efi_capsule_supported() returns true before calling this function
+ * because it makes it easier to print helpful error messages.
+ *
+ * If the capsule is successfully submitted to the firmware, any
+ * subsequent calls to efi_capsule_pending() will return true. @pages
+ * must not be released or modified if this function returns
+ * successfully.
+ *
+ * Callers must be prepared for this function to fail, which can
+ * happen if we raced with system reboot or if there is already a
+ * pending capsule that has a reset type that conflicts with the one
+ * required by @capsule. Do NOT use efi_capsule_pending() to detect
+ * this conflict since that would be racy. Instead, submit the capsule
+ * to efi_capsule_update() and check the return value.
+ *
+ * Return 0 on success, a converted EFI status code on failure.
+ */
+int efi_capsule_update(efi_capsule_header_t *capsule, struct page **pages)
+{
+ u32 imagesize = capsule->imagesize;
+ efi_guid_t guid = capsule->guid;
+ unsigned int count, sg_count;
+ u32 flags = capsule->flags;
+ struct page **sg_pages;
+ int rv, reset_type;
+ int i, j;
+
+ rv = efi_capsule_supported(guid, flags, imagesize, &reset_type);
+ if (rv)
+ return rv;
+
+ count = DIV_ROUND_UP(imagesize, PAGE_SIZE);
+ sg_count = sg_pages_num(count);
+
+ sg_pages = kzalloc(sg_count * sizeof(*sg_pages), GFP_KERNEL);
+ if (!sg_pages)
+ return -ENOMEM;
+
+ for (i = 0; i < sg_count; i++) {
+ sg_pages[i] = alloc_page(GFP_KERNEL);
+ if (!sg_pages[i]) {
+ rv = -ENOMEM;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < sg_count; i++) {
+ efi_capsule_block_desc_t *sglist;
+
+ sglist = kmap(sg_pages[i]);
+ if (!sglist) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ for (j = 0; j < SGLIST_PER_PAGE && count > 0; j++) {
+ u64 sz = min_t(u64, imagesize, PAGE_SIZE);
+
+ sglist[j].length = sz;
+ sglist[j].data = page_to_phys(*pages++);
+
+ imagesize -= sz;
+ count--;
+ }
+
+ /* Continuation pointer */
+ sglist[j].length = 0;
+
+ if (i + 1 == sg_count)
+ sglist[j].data = 0;
+ else
+ sglist[j].data = page_to_phys(sg_pages[i + 1]);
+
+ kunmap(sg_pages[i]);
+ }
+
+ mutex_lock(&capsule_mutex);
+ rv = efi_capsule_update_locked(capsule, sg_pages, reset_type);
+ mutex_unlock(&capsule_mutex);
+
+out:
+ for (i = 0; rv && i < sg_count; i++) {
+ if (sg_pages[i])
+ __free_page(sg_pages[i]);
+ }
+
+ kfree(sg_pages);
+ return rv;
+}
+EXPORT_SYMBOL_GPL(efi_capsule_update);
+
+static int capsule_reboot_notify(struct notifier_block *nb, unsigned long event, void *cmd)
+{
+ mutex_lock(&capsule_mutex);
+ stop_capsules = true;
+ mutex_unlock(&capsule_mutex);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block capsule_reboot_nb = {
+ .notifier_call = capsule_reboot_notify,
+};
+
+static int __init capsule_reboot_register(void)
+{
+ return register_reboot_notifier(&capsule_reboot_nb);
+}
+core_initcall(capsule_reboot_register);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 3a69ed5..05509f3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -43,6 +43,7 @@
.config_table = EFI_INVALID_TABLE_ADDR,
.esrt = EFI_INVALID_TABLE_ADDR,
.properties_table = EFI_INVALID_TABLE_ADDR,
+ .mem_attr_table = EFI_INVALID_TABLE_ADDR,
};
EXPORT_SYMBOL(efi);
@@ -256,7 +257,7 @@
*/
int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
{
- struct efi_memory_map *map = efi.memmap;
+ struct efi_memory_map *map = &efi.memmap;
phys_addr_t p, e;
if (!efi_enabled(EFI_MEMMAP)) {
@@ -338,6 +339,7 @@
{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
{EFI_SYSTEM_RESOURCE_TABLE_GUID, "ESRT", &efi.esrt},
{EFI_PROPERTIES_TABLE_GUID, "PROP", &efi.properties_table},
+ {EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table},
{NULL_GUID, NULL, NULL},
};
@@ -351,8 +353,9 @@
for (i = 0; efi_guidcmp(table_types[i].guid, NULL_GUID); i++) {
if (!efi_guidcmp(*guid, table_types[i].guid)) {
*(table_types[i].ptr) = table;
- pr_cont(" %s=0x%lx ",
- table_types[i].name, table);
+ if (table_types[i].name)
+ pr_cont(" %s=0x%lx ",
+ table_types[i].name, table);
return 1;
}
}
@@ -620,16 +623,12 @@
*/
u64 __weak efi_mem_attributes(unsigned long phys_addr)
{
- struct efi_memory_map *map;
efi_memory_desc_t *md;
- void *p;
if (!efi_enabled(EFI_MEMMAP))
return 0;
- map = efi.memmap;
- for (p = map->map; p < map->map_end; p += map->desc_size) {
- md = p;
+ for_each_efi_memory_desc(md) {
if ((md->phys_addr <= phys_addr) &&
(phys_addr < (md->phys_addr +
(md->num_pages << EFI_PAGE_SHIFT))))
@@ -637,3 +636,36 @@
}
return 0;
}
+
+int efi_status_to_err(efi_status_t status)
+{
+ int err;
+
+ switch (status) {
+ case EFI_SUCCESS:
+ err = 0;
+ break;
+ case EFI_INVALID_PARAMETER:
+ err = -EINVAL;
+ break;
+ case EFI_OUT_OF_RESOURCES:
+ err = -ENOSPC;
+ break;
+ case EFI_DEVICE_ERROR:
+ err = -EIO;
+ break;
+ case EFI_WRITE_PROTECTED:
+ err = -EROFS;
+ break;
+ case EFI_SECURITY_VIOLATION:
+ err = -EACCES;
+ break;
+ case EFI_NOT_FOUND:
+ err = -ENOENT;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
diff --git a/drivers/firmware/efi/efibc.c b/drivers/firmware/efi/efibc.c
new file mode 100644
index 0000000..8dd0c70
--- /dev/null
+++ b/drivers/firmware/efi/efibc.c
@@ -0,0 +1,113 @@
+/*
+ * efibc: control EFI bootloaders which obey LoaderEntryOneShot var
+ * Copyright (c) 2013-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt) "efibc: " fmt
+
+#include <linux/efi.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+
+static void efibc_str_to_str16(const char *str, efi_char16_t *str16)
+{
+ size_t i;
+
+ for (i = 0; i < strlen(str); i++)
+ str16[i] = str[i];
+
+ str16[i] = '\0';
+}
+
+static int efibc_set_variable(const char *name, const char *value)
+{
+ int ret;
+ efi_guid_t guid = LINUX_EFI_LOADER_ENTRY_GUID;
+ struct efivar_entry *entry;
+ size_t size = (strlen(value) + 1) * sizeof(efi_char16_t);
+
+ if (size > sizeof(entry->var.Data)) {
+ pr_err("value is too large");
+ return -EINVAL;
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ pr_err("failed to allocate efivar entry");
+ return -ENOMEM;
+ }
+
+ efibc_str_to_str16(name, entry->var.VariableName);
+ efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
+ memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
+
+ ret = efivar_entry_set(entry,
+ EFI_VARIABLE_NON_VOLATILE
+ | EFI_VARIABLE_BOOTSERVICE_ACCESS
+ | EFI_VARIABLE_RUNTIME_ACCESS,
+ size, entry->var.Data, NULL);
+ if (ret)
+ pr_err("failed to set %s EFI variable: 0x%x\n",
+ name, ret);
+
+ kfree(entry);
+ return ret;
+}
+
+static int efibc_reboot_notifier_call(struct notifier_block *notifier,
+ unsigned long event, void *data)
+{
+ const char *reason = "shutdown";
+ int ret;
+
+ if (event == SYS_RESTART)
+ reason = "reboot";
+
+ ret = efibc_set_variable("LoaderEntryRebootReason", reason);
+ if (ret || !data)
+ return NOTIFY_DONE;
+
+ efibc_set_variable("LoaderEntryOneShot", (char *)data);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block efibc_reboot_notifier = {
+ .notifier_call = efibc_reboot_notifier_call,
+};
+
+static int __init efibc_init(void)
+{
+ int ret;
+
+ if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ return -ENODEV;
+
+ ret = register_reboot_notifier(&efibc_reboot_notifier);
+ if (ret)
+ pr_err("unable to register reboot notifier\n");
+
+ return ret;
+}
+module_init(efibc_init);
+
+static void __exit efibc_exit(void)
+{
+ unregister_reboot_notifier(&efibc_reboot_notifier);
+}
+module_exit(efibc_exit);
+
+MODULE_AUTHOR("Jeremy Compostella <jeremy.compostella@intel.com>");
+MODULE_AUTHOR("Matt Gumbel <matthew.k.gumbel@intel.com");
+MODULE_DESCRIPTION("EFI Bootloader Control");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 096adcb..116b244 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -661,7 +661,7 @@
return;
err = efivar_init(efivar_update_sysfs_entry, entry,
- true, false, &efivar_sysfs_list);
+ false, &efivar_sysfs_list);
if (!err)
break;
@@ -730,8 +730,7 @@
return -ENOMEM;
}
- efivar_init(efivars_sysfs_callback, NULL, false,
- true, &efivar_sysfs_list);
+ efivar_init(efivars_sysfs_callback, NULL, true, &efivar_sysfs_list);
error = create_efivars_bin_attributes();
if (error) {
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index ed3a854..48430ab 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -57,7 +57,7 @@
void __init efi_fake_memmap(void)
{
u64 start, end, m_start, m_end, m_attr;
- int new_nr_map = memmap.nr_map;
+ int new_nr_map = efi.memmap.nr_map;
efi_memory_desc_t *md;
phys_addr_t new_memmap_phy;
void *new_memmap;
@@ -68,8 +68,7 @@
return;
/* count up the number of EFI memory descriptor */
- for (old = memmap.map; old < memmap.map_end; old += memmap.desc_size) {
- md = old;
+ for_each_efi_memory_desc(md) {
start = md->phys_addr;
end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
@@ -95,25 +94,25 @@
}
/* allocate memory for new EFI memmap */
- new_memmap_phy = memblock_alloc(memmap.desc_size * new_nr_map,
+ new_memmap_phy = memblock_alloc(efi.memmap.desc_size * new_nr_map,
PAGE_SIZE);
if (!new_memmap_phy)
return;
/* create new EFI memmap */
new_memmap = early_memremap(new_memmap_phy,
- memmap.desc_size * new_nr_map);
+ efi.memmap.desc_size * new_nr_map);
if (!new_memmap) {
- memblock_free(new_memmap_phy, memmap.desc_size * new_nr_map);
+ memblock_free(new_memmap_phy, efi.memmap.desc_size * new_nr_map);
return;
}
- for (old = memmap.map, new = new_memmap;
- old < memmap.map_end;
- old += memmap.desc_size, new += memmap.desc_size) {
+ for (old = efi.memmap.map, new = new_memmap;
+ old < efi.memmap.map_end;
+ old += efi.memmap.desc_size, new += efi.memmap.desc_size) {
/* copy original EFI memory descriptor */
- memcpy(new, old, memmap.desc_size);
+ memcpy(new, old, efi.memmap.desc_size);
md = new;
start = md->phys_addr;
end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
@@ -134,8 +133,8 @@
md->num_pages = (m_end - md->phys_addr + 1) >>
EFI_PAGE_SHIFT;
/* latter part */
- new += memmap.desc_size;
- memcpy(new, old, memmap.desc_size);
+ new += efi.memmap.desc_size;
+ memcpy(new, old, efi.memmap.desc_size);
md = new;
md->phys_addr = m_end + 1;
md->num_pages = (end - md->phys_addr + 1) >>
@@ -147,16 +146,16 @@
md->num_pages = (m_start - md->phys_addr) >>
EFI_PAGE_SHIFT;
/* middle part */
- new += memmap.desc_size;
- memcpy(new, old, memmap.desc_size);
+ new += efi.memmap.desc_size;
+ memcpy(new, old, efi.memmap.desc_size);
md = new;
md->attribute |= m_attr;
md->phys_addr = m_start;
md->num_pages = (m_end - m_start + 1) >>
EFI_PAGE_SHIFT;
/* last part */
- new += memmap.desc_size;
- memcpy(new, old, memmap.desc_size);
+ new += efi.memmap.desc_size;
+ memcpy(new, old, efi.memmap.desc_size);
md = new;
md->phys_addr = m_end + 1;
md->num_pages = (end - m_end) >>
@@ -169,8 +168,8 @@
md->num_pages = (m_start - md->phys_addr) >>
EFI_PAGE_SHIFT;
/* latter part */
- new += memmap.desc_size;
- memcpy(new, old, memmap.desc_size);
+ new += efi.memmap.desc_size;
+ memcpy(new, old, efi.memmap.desc_size);
md = new;
md->phys_addr = m_start;
md->num_pages = (end - md->phys_addr + 1) >>
@@ -182,10 +181,10 @@
/* swap into new EFI memmap */
efi_unmap_memmap();
- memmap.map = new_memmap;
- memmap.phys_map = new_memmap_phy;
- memmap.nr_map = new_nr_map;
- memmap.map_end = memmap.map + memmap.nr_map * memmap.desc_size;
+ efi.memmap.map = new_memmap;
+ efi.memmap.phys_map = new_memmap_phy;
+ efi.memmap.nr_map = new_nr_map;
+ efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size;
set_bit(EFI_MEMMAP, &efi.flags);
/* print new EFI memmap */
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index da99bbb..c069451 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -28,7 +28,7 @@
# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
KCOV_INSTRUMENT := n
-lib-y := efi-stub-helper.o
+lib-y := efi-stub-helper.o gop.o
# include the stub's generic dependencies from lib/ when building for ARM/arm64
arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 414deb8..993aa56 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -20,27 +20,49 @@
bool __nokaslr;
-static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
+static int efi_get_secureboot(efi_system_table_t *sys_table_arg)
{
- static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
- static efi_char16_t const var_name[] = {
+ static efi_char16_t const sb_var_name[] = {
'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 };
+ static efi_char16_t const sm_var_name[] = {
+ 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 };
+ efi_guid_t var_guid = EFI_GLOBAL_VARIABLE_GUID;
efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable;
- unsigned long size = sizeof(u8);
- efi_status_t status;
u8 val;
+ unsigned long size = sizeof(val);
+ efi_status_t status;
- status = f_getvar((efi_char16_t *)var_name, (efi_guid_t *)&var_guid,
+ status = f_getvar((efi_char16_t *)sb_var_name, (efi_guid_t *)&var_guid,
NULL, &size, &val);
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ if (val == 0)
+ return 0;
+
+ status = f_getvar((efi_char16_t *)sm_var_name, (efi_guid_t *)&var_guid,
+ NULL, &size, &val);
+
+ if (status != EFI_SUCCESS)
+ goto out_efi_err;
+
+ if (val == 1)
+ return 0;
+
+ return 1;
+
+out_efi_err:
switch (status) {
- case EFI_SUCCESS:
- return val;
case EFI_NOT_FOUND:
return 0;
+ case EFI_DEVICE_ERROR:
+ return -EIO;
+ case EFI_SECURITY_VIOLATION:
+ return -EACCES;
default:
- return 1;
+ return -EINVAL;
}
}
@@ -147,6 +169,25 @@
out->output_string(out, str);
}
+static struct screen_info *setup_graphics(efi_system_table_t *sys_table_arg)
+{
+ efi_guid_t gop_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ efi_status_t status;
+ unsigned long size;
+ void **gop_handle = NULL;
+ struct screen_info *si = NULL;
+
+ size = 0;
+ status = efi_call_early(locate_handle, EFI_LOCATE_BY_PROTOCOL,
+ &gop_proto, NULL, &size, gop_handle);
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ si = alloc_screen_info(sys_table_arg);
+ if (!si)
+ return NULL;
+ efi_setup_gop(sys_table_arg, si, &gop_proto, size);
+ }
+ return si;
+}
/*
* This function handles the architcture specific differences between arm and
@@ -185,6 +226,8 @@
efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID;
unsigned long reserve_addr = 0;
unsigned long reserve_size = 0;
+ int secure_boot = 0;
+ struct screen_info *si;
/* Check if we were booted by the EFI firmware */
if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
@@ -237,6 +280,8 @@
__nokaslr = true;
}
+ si = setup_graphics(sys_table);
+
status = handle_kernel_image(sys_table, image_addr, &image_size,
&reserve_addr,
&reserve_size,
@@ -250,12 +295,21 @@
if (status != EFI_SUCCESS)
pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
+ secure_boot = efi_get_secureboot(sys_table);
+ if (secure_boot > 0)
+ pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+
+ if (secure_boot < 0) {
+ pr_efi_err(sys_table,
+ "could not determine UEFI Secure Boot status.\n");
+ }
+
/*
* Unauthenticated device tree data is a security hazard, so
* ignore 'dtb=' unless UEFI Secure Boot is disabled.
*/
- if (efi_secureboot_enabled(sys_table)) {
- pr_efi(sys_table, "UEFI Secure Boot is enabled.\n");
+ if (secure_boot != 0 && strstr(cmdline_ptr, "dtb=")) {
+ pr_efi(sys_table, "Ignoring DTB from command line.\n");
} else {
status = handle_cmdline_files(sys_table, image, cmdline_ptr,
"dtb=",
@@ -309,6 +363,7 @@
efi_free(sys_table, image_size, *image_addr);
efi_free(sys_table, reserve_size, reserve_addr);
fail_free_cmdline:
+ free_screen_info(sys_table, si);
efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
fail:
return EFI_ERROR;
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 6f42be4..e1f0b28 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -26,6 +26,43 @@
return EFI_SUCCESS;
}
+static efi_guid_t screen_info_guid = LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID;
+
+struct screen_info *alloc_screen_info(efi_system_table_t *sys_table_arg)
+{
+ struct screen_info *si;
+ efi_status_t status;
+
+ /*
+ * Unlike on arm64, where we can directly fill out the screen_info
+ * structure from the stub, we need to allocate a buffer to hold
+ * its contents while we hand over to the kernel proper from the
+ * decompressor.
+ */
+ status = efi_call_early(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
+ sizeof(*si), (void **)&si);
+
+ if (status != EFI_SUCCESS)
+ return NULL;
+
+ status = efi_call_early(install_configuration_table,
+ &screen_info_guid, si);
+ if (status == EFI_SUCCESS)
+ return si;
+
+ efi_call_early(free_pool, si);
+ return NULL;
+}
+
+void free_screen_info(efi_system_table_t *sys_table_arg, struct screen_info *si)
+{
+ if (!si)
+ return;
+
+ efi_call_early(install_configuration_table, &screen_info_guid, NULL);
+ efi_call_early(free_pool, si);
+}
+
efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
unsigned long *image_addr,
unsigned long *image_size,
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index a90f645..eae693e 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -81,15 +81,24 @@
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
/*
+ * If CONFIG_DEBUG_ALIGN_RODATA is not set, produce a
+ * displacement in the interval [0, MIN_KIMG_ALIGN) that
+ * is a multiple of the minimal segment alignment (SZ_64K)
+ */
+ u32 mask = (MIN_KIMG_ALIGN - 1) & ~(SZ_64K - 1);
+ u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ?
+ (phys_seed >> 32) & mask : TEXT_OFFSET;
+
+ /*
* If KASLR is enabled, and we have some randomness available,
* locate the kernel at a randomized offset in physical memory.
*/
- *reserve_size = kernel_memsize + TEXT_OFFSET;
+ *reserve_size = kernel_memsize + offset;
status = efi_random_alloc(sys_table_arg, *reserve_size,
MIN_KIMG_ALIGN, reserve_addr,
- phys_seed);
+ (u32)phys_seed);
- *image_addr = *reserve_addr + TEXT_OFFSET;
+ *image_addr = *reserve_addr + offset;
} else {
/*
* Else, try a straight allocation at the preferred offset.
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 29ed2f9..3bd127f9 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -125,10 +125,12 @@
map.map_end = map.map + map_size;
- for_each_efi_memory_desc(&map, md)
- if (md->attribute & EFI_MEMORY_WB)
+ for_each_efi_memory_desc_in_map(&map, md) {
+ if (md->attribute & EFI_MEMORY_WB) {
if (membase > md->phys_addr)
membase = md->phys_addr;
+ }
+ }
efi_call_early(free_pool, map.map);
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 6dba78a..e58abfa 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -24,7 +24,7 @@
unsigned long map_size, unsigned long desc_size,
u32 desc_ver)
{
- int node, prev, num_rsv;
+ int node, num_rsv;
int status;
u32 fdt_val32;
u64 fdt_val64;
@@ -54,28 +54,6 @@
goto fdt_set_fail;
/*
- * Delete any memory nodes present. We must delete nodes which
- * early_init_dt_scan_memory may try to use.
- */
- prev = 0;
- for (;;) {
- const char *type;
- int len;
-
- node = fdt_next_node(fdt, prev, NULL);
- if (node < 0)
- break;
-
- type = fdt_getprop(fdt, node, "device_type", &len);
- if (type && strncmp(type, "memory", len) == 0) {
- fdt_del_node(fdt, node);
- continue;
- }
-
- prev = node;
- }
-
- /*
* Delete all memory reserve map entries. When booting via UEFI,
* kernel will use the UEFI memory map to find reserved regions.
*/
diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
new file mode 100644
index 0000000..932742e
--- /dev/null
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -0,0 +1,354 @@
+/* -----------------------------------------------------------------------
+ *
+ * Copyright 2011 Intel Corporation; author Matt Fleming
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <linux/efi.h>
+#include <linux/screen_info.h>
+#include <asm/efi.h>
+#include <asm/setup.h>
+
+static void find_bits(unsigned long mask, u8 *pos, u8 *size)
+{
+ u8 first, len;
+
+ first = 0;
+ len = 0;
+
+ if (mask) {
+ while (!(mask & 0x1)) {
+ mask = mask >> 1;
+ first++;
+ }
+
+ while (mask & 0x1) {
+ mask = mask >> 1;
+ len++;
+ }
+ }
+
+ *pos = first;
+ *size = len;
+}
+
+static void
+setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
+ struct efi_pixel_bitmask pixel_info, int pixel_format)
+{
+ if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
+ si->lfb_depth = 32;
+ si->lfb_linelength = pixels_per_scan_line * 4;
+ si->red_size = 8;
+ si->red_pos = 0;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 16;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
+ } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) {
+ si->lfb_depth = 32;
+ si->lfb_linelength = pixels_per_scan_line * 4;
+ si->red_size = 8;
+ si->red_pos = 16;
+ si->green_size = 8;
+ si->green_pos = 8;
+ si->blue_size = 8;
+ si->blue_pos = 0;
+ si->rsvd_size = 8;
+ si->rsvd_pos = 24;
+ } else if (pixel_format == PIXEL_BIT_MASK) {
+ find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size);
+ find_bits(pixel_info.green_mask, &si->green_pos,
+ &si->green_size);
+ find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size);
+ find_bits(pixel_info.reserved_mask, &si->rsvd_pos,
+ &si->rsvd_size);
+ si->lfb_depth = si->red_size + si->green_size +
+ si->blue_size + si->rsvd_size;
+ si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8;
+ } else {
+ si->lfb_depth = 4;
+ si->lfb_linelength = si->lfb_width / 2;
+ si->red_size = 0;
+ si->red_pos = 0;
+ si->green_size = 0;
+ si->green_pos = 0;
+ si->blue_size = 0;
+ si->blue_pos = 0;
+ si->rsvd_size = 0;
+ si->rsvd_pos = 0;
+ }
+}
+
+static efi_status_t
+__gop_query32(efi_system_table_t *sys_table_arg,
+ struct efi_graphics_output_protocol_32 *gop32,
+ struct efi_graphics_output_mode_info **info,
+ unsigned long *size, u64 *fb_base)
+{
+ struct efi_graphics_output_protocol_mode_32 *mode;
+ efi_graphics_output_protocol_query_mode query_mode;
+ efi_status_t status;
+ unsigned long m;
+
+ m = gop32->mode;
+ mode = (struct efi_graphics_output_protocol_mode_32 *)m;
+ query_mode = (void *)(unsigned long)gop32->query_mode;
+
+ status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size,
+ info);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ *fb_base = mode->frame_buffer_base;
+ return status;
+}
+
+static efi_status_t
+setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
+ efi_guid_t *proto, unsigned long size, void **gop_handle)
+{
+ struct efi_graphics_output_protocol_32 *gop32, *first_gop;
+ unsigned long nr_gops;
+ u16 width, height;
+ u32 pixels_per_scan_line;
+ u32 ext_lfb_base;
+ u64 fb_base;
+ struct efi_pixel_bitmask pixel_info;
+ int pixel_format;
+ efi_status_t status = EFI_NOT_FOUND;
+ u32 *handles = (u32 *)(unsigned long)gop_handle;
+ int i;
+
+ first_gop = NULL;
+ gop32 = NULL;
+
+ nr_gops = size / sizeof(u32);
+ for (i = 0; i < nr_gops; i++) {
+ struct efi_graphics_output_mode_info *info = NULL;
+ efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+ bool conout_found = false;
+ void *dummy = NULL;
+ efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
+ u64 current_fb_base;
+
+ status = efi_call_early(handle_protocol, h,
+ proto, (void **)&gop32);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ status = efi_call_early(handle_protocol, h,
+ &conout_proto, &dummy);
+ if (status == EFI_SUCCESS)
+ conout_found = true;
+
+ status = __gop_query32(sys_table_arg, gop32, &info, &size,
+ ¤t_fb_base);
+ if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+ /*
+ * Systems that use the UEFI Console Splitter may
+ * provide multiple GOP devices, not all of which are
+ * backed by real hardware. The workaround is to search
+ * for a GOP implementing the ConOut protocol, and if
+ * one isn't found, to just fall back to the first GOP.
+ */
+ width = info->horizontal_resolution;
+ height = info->vertical_resolution;
+ pixel_format = info->pixel_format;
+ pixel_info = info->pixel_information;
+ pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
+
+ /*
+ * Once we've found a GOP supporting ConOut,
+ * don't bother looking any further.
+ */
+ first_gop = gop32;
+ if (conout_found)
+ break;
+ }
+ }
+
+ /* Did we find any GOPs? */
+ if (!first_gop)
+ goto out;
+
+ /* EFI framebuffer */
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+ si->lfb_width = width;
+ si->lfb_height = height;
+ si->lfb_base = fb_base;
+
+ ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+ if (ext_lfb_base) {
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ si->ext_lfb_base = ext_lfb_base;
+ }
+
+ si->pages = 1;
+
+ setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+
+ si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+ si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+out:
+ return status;
+}
+
+static efi_status_t
+__gop_query64(efi_system_table_t *sys_table_arg,
+ struct efi_graphics_output_protocol_64 *gop64,
+ struct efi_graphics_output_mode_info **info,
+ unsigned long *size, u64 *fb_base)
+{
+ struct efi_graphics_output_protocol_mode_64 *mode;
+ efi_graphics_output_protocol_query_mode query_mode;
+ efi_status_t status;
+ unsigned long m;
+
+ m = gop64->mode;
+ mode = (struct efi_graphics_output_protocol_mode_64 *)m;
+ query_mode = (void *)(unsigned long)gop64->query_mode;
+
+ status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size,
+ info);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ *fb_base = mode->frame_buffer_base;
+ return status;
+}
+
+static efi_status_t
+setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
+ efi_guid_t *proto, unsigned long size, void **gop_handle)
+{
+ struct efi_graphics_output_protocol_64 *gop64, *first_gop;
+ unsigned long nr_gops;
+ u16 width, height;
+ u32 pixels_per_scan_line;
+ u32 ext_lfb_base;
+ u64 fb_base;
+ struct efi_pixel_bitmask pixel_info;
+ int pixel_format;
+ efi_status_t status = EFI_NOT_FOUND;
+ u64 *handles = (u64 *)(unsigned long)gop_handle;
+ int i;
+
+ first_gop = NULL;
+ gop64 = NULL;
+
+ nr_gops = size / sizeof(u64);
+ for (i = 0; i < nr_gops; i++) {
+ struct efi_graphics_output_mode_info *info = NULL;
+ efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+ bool conout_found = false;
+ void *dummy = NULL;
+ efi_handle_t h = (efi_handle_t)(unsigned long)handles[i];
+ u64 current_fb_base;
+
+ status = efi_call_early(handle_protocol, h,
+ proto, (void **)&gop64);
+ if (status != EFI_SUCCESS)
+ continue;
+
+ status = efi_call_early(handle_protocol, h,
+ &conout_proto, &dummy);
+ if (status == EFI_SUCCESS)
+ conout_found = true;
+
+ status = __gop_query64(sys_table_arg, gop64, &info, &size,
+ ¤t_fb_base);
+ if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+ /*
+ * Systems that use the UEFI Console Splitter may
+ * provide multiple GOP devices, not all of which are
+ * backed by real hardware. The workaround is to search
+ * for a GOP implementing the ConOut protocol, and if
+ * one isn't found, to just fall back to the first GOP.
+ */
+ width = info->horizontal_resolution;
+ height = info->vertical_resolution;
+ pixel_format = info->pixel_format;
+ pixel_info = info->pixel_information;
+ pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
+
+ /*
+ * Once we've found a GOP supporting ConOut,
+ * don't bother looking any further.
+ */
+ first_gop = gop64;
+ if (conout_found)
+ break;
+ }
+ }
+
+ /* Did we find any GOPs? */
+ if (!first_gop)
+ goto out;
+
+ /* EFI framebuffer */
+ si->orig_video_isVGA = VIDEO_TYPE_EFI;
+
+ si->lfb_width = width;
+ si->lfb_height = height;
+ si->lfb_base = fb_base;
+
+ ext_lfb_base = (u64)(unsigned long)fb_base >> 32;
+ if (ext_lfb_base) {
+ si->capabilities |= VIDEO_CAPABILITY_64BIT_BASE;
+ si->ext_lfb_base = ext_lfb_base;
+ }
+
+ si->pages = 1;
+
+ setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format);
+
+ si->lfb_size = si->lfb_linelength * si->lfb_height;
+
+ si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+out:
+ return status;
+}
+
+/*
+ * See if we have Graphics Output Protocol
+ */
+efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+ struct screen_info *si, efi_guid_t *proto,
+ unsigned long size)
+{
+ efi_status_t status;
+ void **gop_handle = NULL;
+
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size, (void **)&gop_handle);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_early(locate_handle,
+ EFI_LOCATE_BY_PROTOCOL,
+ proto, NULL, &size, gop_handle);
+ if (status != EFI_SUCCESS)
+ goto free_handle;
+
+ if (efi_is_64bit()) {
+ status = setup_gop64(sys_table_arg, si, proto, size,
+ gop_handle);
+ } else {
+ status = setup_gop32(sys_table_arg, si, proto, size,
+ gop_handle);
+ }
+
+free_handle:
+ efi_call_early(free_pool, gop_handle);
+ return status;
+}
diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
new file mode 100644
index 0000000..236004b
--- /dev/null
+++ b/drivers/firmware/efi/memattr.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "efi: memattr: " fmt
+
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+
+#include <asm/early_ioremap.h>
+
+static int __initdata tbl_size;
+
+/*
+ * Reserve the memory associated with the Memory Attributes configuration
+ * table, if it exists.
+ */
+int __init efi_memattr_init(void)
+{
+ efi_memory_attributes_table_t *tbl;
+
+ if (efi.mem_attr_table == EFI_INVALID_TABLE_ADDR)
+ return 0;
+
+ tbl = early_memremap(efi.mem_attr_table, sizeof(*tbl));
+ if (!tbl) {
+ pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n",
+ efi.mem_attr_table);
+ return -ENOMEM;
+ }
+
+ if (tbl->version > 1) {
+ pr_warn("Unexpected EFI Memory Attributes table version %d\n",
+ tbl->version);
+ goto unmap;
+ }
+
+ tbl_size = sizeof(*tbl) + tbl->num_entries * tbl->desc_size;
+ memblock_reserve(efi.mem_attr_table, tbl_size);
+
+unmap:
+ early_memunmap(tbl, sizeof(*tbl));
+ return 0;
+}
+
+/*
+ * Returns a copy @out of the UEFI memory descriptor @in if it is covered
+ * entirely by a UEFI memory map entry with matching attributes. The virtual
+ * address of @out is set according to the matching entry that was found.
+ */
+static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out)
+{
+ u64 in_paddr = in->phys_addr;
+ u64 in_size = in->num_pages << EFI_PAGE_SHIFT;
+ efi_memory_desc_t *md;
+
+ *out = *in;
+
+ if (in->type != EFI_RUNTIME_SERVICES_CODE &&
+ in->type != EFI_RUNTIME_SERVICES_DATA) {
+ pr_warn("Entry type should be RuntimeServiceCode/Data\n");
+ return false;
+ }
+
+ if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) {
+ pr_warn("Entry attributes invalid: RO and XP bits both cleared\n");
+ return false;
+ }
+
+ if (PAGE_SIZE > EFI_PAGE_SIZE &&
+ (!PAGE_ALIGNED(in->phys_addr) ||
+ !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) {
+ /*
+ * Since arm64 may execute with page sizes of up to 64 KB, the
+ * UEFI spec mandates that RuntimeServices memory regions must
+ * be 64 KB aligned. We need to validate this here since we will
+ * not be able to tighten permissions on such regions without
+ * affecting adjacent regions.
+ */
+ pr_warn("Entry address region misaligned\n");
+ return false;
+ }
+
+ for_each_efi_memory_desc(md) {
+ u64 md_paddr = md->phys_addr;
+ u64 md_size = md->num_pages << EFI_PAGE_SHIFT;
+
+ if (!(md->attribute & EFI_MEMORY_RUNTIME))
+ continue;
+ if (md->virt_addr == 0) {
+ /* no virtual mapping has been installed by the stub */
+ break;
+ }
+
+ if (md_paddr > in_paddr || (in_paddr - md_paddr) >= md_size)
+ continue;
+
+ /*
+ * This entry covers the start of @in, check whether
+ * it covers the end as well.
+ */
+ if (md_paddr + md_size < in_paddr + in_size) {
+ pr_warn("Entry covers multiple EFI memory map regions\n");
+ return false;
+ }
+
+ if (md->type != in->type) {
+ pr_warn("Entry type deviates from EFI memory map region type\n");
+ return false;
+ }
+
+ out->virt_addr = in_paddr + (md->virt_addr - md_paddr);
+
+ return true;
+ }
+
+ pr_warn("No matching entry found in the EFI memory map\n");
+ return false;
+}
+
+/*
+ * To be called after the EFI page tables have been populated. If a memory
+ * attributes table is available, its contents will be used to update the
+ * mappings with tightened permissions as described by the table.
+ * This requires the UEFI memory map to have already been populated with
+ * virtual addresses.
+ */
+int __init efi_memattr_apply_permissions(struct mm_struct *mm,
+ efi_memattr_perm_setter fn)
+{
+ efi_memory_attributes_table_t *tbl;
+ int i, ret;
+
+ if (tbl_size <= sizeof(*tbl))
+ return 0;
+
+ /*
+ * We need the EFI memory map to be setup so we can use it to
+ * lookup the virtual addresses of all entries in the of EFI
+ * Memory Attributes table. If it isn't available, this
+ * function should not be called.
+ */
+ if (WARN_ON(!efi_enabled(EFI_MEMMAP)))
+ return 0;
+
+ tbl = memremap(efi.mem_attr_table, tbl_size, MEMREMAP_WB);
+ if (!tbl) {
+ pr_err("Failed to map EFI Memory Attributes table @ 0x%lx\n",
+ efi.mem_attr_table);
+ return -ENOMEM;
+ }
+
+ if (efi_enabled(EFI_DBG))
+ pr_info("Processing EFI Memory Attributes table:\n");
+
+ for (i = ret = 0; ret == 0 && i < tbl->num_entries; i++) {
+ efi_memory_desc_t md;
+ unsigned long size;
+ bool valid;
+ char buf[64];
+
+ valid = entry_is_valid((void *)tbl->entry + i * tbl->desc_size,
+ &md);
+ size = md.num_pages << EFI_PAGE_SHIFT;
+ if (efi_enabled(EFI_DBG) || !valid)
+ pr_info("%s 0x%012llx-0x%012llx %s\n",
+ valid ? "" : "!", md.phys_addr,
+ md.phys_addr + size - 1,
+ efi_md_typeattr_format(buf, sizeof(buf), &md));
+
+ if (valid)
+ ret = fn(mm, &md);
+ }
+ memunmap(tbl);
+ return ret;
+}
diff --git a/drivers/firmware/efi/reboot.c b/drivers/firmware/efi/reboot.c
index 9c59d1c..62ead9b 100644
--- a/drivers/firmware/efi/reboot.c
+++ b/drivers/firmware/efi/reboot.c
@@ -9,7 +9,8 @@
void efi_reboot(enum reboot_mode reboot_mode, const char *__unused)
{
- int efi_mode;
+ const char *str[] = { "cold", "warm", "shutdown", "platform" };
+ int efi_mode, cap_reset_mode;
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return;
@@ -30,6 +31,15 @@
if (efi_reboot_quirk_mode != -1)
efi_mode = efi_reboot_quirk_mode;
+ if (efi_capsule_pending(&cap_reset_mode)) {
+ if (efi_mode != cap_reset_mode)
+ printk(KERN_CRIT "efi: %s reset requested but pending "
+ "capsule update requires %s reset... Performing "
+ "%s reset.\n", str[efi_mode], str[cap_reset_mode],
+ str[cap_reset_mode]);
+ efi_mode = cap_reset_mode;
+ }
+
efi.reset_system(efi_mode, EFI_SUCCESS, 0, NULL);
}
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index de69530..23bef6b 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -16,10 +16,70 @@
#include <linux/bug.h>
#include <linux/efi.h>
+#include <linux/irqflags.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
+#include <linux/stringify.h>
#include <asm/efi.h>
+static void efi_call_virt_check_flags(unsigned long flags, const char *call)
+{
+ unsigned long cur_flags, mismatch;
+
+ local_save_flags(cur_flags);
+
+ mismatch = flags ^ cur_flags;
+ if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK))
+ return;
+
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE);
+ pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n",
+ flags, cur_flags, call);
+ local_irq_restore(flags);
+}
+
+/*
+ * Arch code can implement the following three template macros, avoiding
+ * reptition for the void/non-void return cases of {__,}efi_call_virt:
+ *
+ * * arch_efi_call_virt_setup
+ *
+ * Sets up the environment for the call (e.g. switching page tables,
+ * allowing kernel-mode use of floating point, if required).
+ *
+ * * arch_efi_call_virt
+ *
+ * Performs the call. The last expression in the macro must be the call
+ * itself, allowing the logic to be shared by the void and non-void
+ * cases.
+ *
+ * * arch_efi_call_virt_teardown
+ *
+ * Restores the usual kernel environment once the call has returned.
+ */
+
+#define efi_call_virt(f, args...) \
+({ \
+ efi_status_t __s; \
+ unsigned long flags; \
+ arch_efi_call_virt_setup(); \
+ local_save_flags(flags); \
+ __s = arch_efi_call_virt(f, args); \
+ efi_call_virt_check_flags(flags, __stringify(f)); \
+ arch_efi_call_virt_teardown(); \
+ __s; \
+})
+
+#define __efi_call_virt(f, args...) \
+({ \
+ unsigned long flags; \
+ arch_efi_call_virt_setup(); \
+ local_save_flags(flags); \
+ arch_efi_call_virt(f, args); \
+ efi_call_virt_check_flags(flags, __stringify(f)); \
+ arch_efi_call_virt_teardown(); \
+})
+
/*
* According to section 7.1 of the UEFI spec, Runtime Services are not fully
* reentrant, and there are particular combinations of calls that need to be
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 34b7419..d3b7513 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -329,39 +329,6 @@
return fops->query_variable_store(attributes, size, true);
}
-static int efi_status_to_err(efi_status_t status)
-{
- int err;
-
- switch (status) {
- case EFI_SUCCESS:
- err = 0;
- break;
- case EFI_INVALID_PARAMETER:
- err = -EINVAL;
- break;
- case EFI_OUT_OF_RESOURCES:
- err = -ENOSPC;
- break;
- case EFI_DEVICE_ERROR:
- err = -EIO;
- break;
- case EFI_WRITE_PROTECTED:
- err = -EROFS;
- break;
- case EFI_SECURITY_VIOLATION:
- err = -EACCES;
- break;
- case EFI_NOT_FOUND:
- err = -ENOENT;
- break;
- default:
- err = -EINVAL;
- }
-
- return err;
-}
-
static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor,
struct list_head *head)
{
@@ -452,8 +419,7 @@
* Returns 0 on success, or a kernel error code on failure.
*/
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
- void *data, bool atomic, bool duplicates,
- struct list_head *head)
+ void *data, bool duplicates, struct list_head *head)
{
const struct efivar_operations *ops = __efivars->ops;
unsigned long variable_name_size = 1024;
@@ -483,7 +449,7 @@
&vendor_guid);
switch (status) {
case EFI_SUCCESS:
- if (!atomic)
+ if (duplicates)
spin_unlock_irq(&__efivars->lock);
variable_name_size = var_name_strnsize(variable_name,
@@ -498,21 +464,19 @@
* and may end up looping here forever.
*/
if (duplicates &&
- variable_is_present(variable_name, &vendor_guid, head)) {
+ variable_is_present(variable_name, &vendor_guid,
+ head)) {
dup_variable_bug(variable_name, &vendor_guid,
variable_name_size);
- if (!atomic)
- spin_lock_irq(&__efivars->lock);
-
status = EFI_NOT_FOUND;
- break;
+ } else {
+ err = func(variable_name, vendor_guid,
+ variable_name_size, data);
+ if (err)
+ status = EFI_NOT_FOUND;
}
- err = func(variable_name, vendor_guid, variable_name_size, data);
- if (err)
- status = EFI_NOT_FOUND;
-
- if (!atomic)
+ if (duplicates)
spin_lock_irq(&__efivars->lock);
break;
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index b5d0580..fa4ea22 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -355,7 +355,7 @@
/* ARM specific CPU idle operations */
#ifdef CONFIG_ARM
-static struct cpuidle_ops psci_cpuidle_ops __initdata = {
+static const struct cpuidle_ops psci_cpuidle_ops __initconst = {
.suspend = psci_cpu_suspend_enter,
.init = psci_dt_cpu_init_idle,
};
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 6743ff7..059f7c3 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -72,7 +72,7 @@
{
#if defined(CONFIG_X86)
- if (cpu_has_clflush) {
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
drm_cache_flush_clflush(pages, num_pages);
return;
}
@@ -105,7 +105,7 @@
drm_clflush_sg(struct sg_table *st)
{
#if defined(CONFIG_X86)
- if (cpu_has_clflush) {
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
struct sg_page_iter sg_iter;
mb();
@@ -129,7 +129,7 @@
drm_clflush_virt_range(void *addr, unsigned long length)
{
#if defined(CONFIG_X86)
- if (cpu_has_clflush) {
+ if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
const int size = boot_cpu_data.x86_clflush_size;
void *end = addr + length;
addr = (void *)(((unsigned long)addr) & -size);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dabc089..f2cb9a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1732,7 +1732,7 @@
if (args->flags & ~(I915_MMAP_WC))
return -EINVAL;
- if (args->flags & I915_MMAP_WC && !cpu_has_pat)
+ if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
return -ENODEV;
obj = drm_gem_object_lookup(dev, file, args->handle);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1328bc5..b845f46 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -488,7 +488,7 @@
ret = relocate_entry_cpu(obj, reloc, target_offset);
else if (obj->map_and_fenceable)
ret = relocate_entry_gtt(obj, reloc, target_offset);
- else if (cpu_has_clflush)
+ else if (static_cpu_has(X86_FEATURE_CLFLUSH))
ret = relocate_entry_clflush(obj, reloc, target_offset);
else {
WARN_ONCE(1, "Impossible case in relocation handling\n");
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 5c2d13a..ff94007 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -288,7 +288,7 @@
config SENSORS_FAM15H_POWER
tristate "AMD Family 15h processor power"
- depends on X86 && PCI
+ depends on X86 && PCI && CPU_SUP_AMD
help
If you say yes here you get support for processor power
information of your AMD family 15h CPU.
@@ -621,7 +621,8 @@
If you say yes here you get support for ITE IT8705F, IT8712F, IT8716F,
IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F, IT8758E,
IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E,
- IT8603E, IT8620E, and IT8623E sensor chips, and the SiS950 clone.
+ IT8603E, IT8620E, IT8623E, and IT8628E sensor chips, and the SiS950
+ clone.
This driver can also be built as a module. If so, the module
will be called it87.
@@ -821,6 +822,16 @@
This driver can also be built as a module. If so, the module
will be called max197.
+config SENSORS_MAX31722
+tristate "MAX31722 temperature sensor"
+ depends on SPI
+ help
+ Support for the Maxim Integrated MAX31722/MAX31723 digital
+ thermometers/thermostats operating over an SPI interface.
+
+ This driver can also be built as a module. If so, the module
+ will be called max31722.
+
config SENSORS_MAX6639
tristate "Maxim MAX6639 sensor chip"
depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 58cc3ac..2ef5b7c 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -112,6 +112,7 @@
obj-$(CONFIG_SENSORS_MAX1619) += max1619.o
obj-$(CONFIG_SENSORS_MAX1668) += max1668.o
obj-$(CONFIG_SENSORS_MAX197) += max197.o
+obj-$(CONFIG_SENSORS_MAX31722) += max31722.o
obj-$(CONFIG_SENSORS_MAX6639) += max6639.o
obj-$(CONFIG_SENSORS_MAX6642) += max6642.o
obj-$(CONFIG_SENSORS_MAX6650) += max6650.o
diff --git a/drivers/hwmon/ads7828.c b/drivers/hwmon/ads7828.c
index 6c99ee7..ee396ff 100644
--- a/drivers/hwmon/ads7828.c
+++ b/drivers/hwmon/ads7828.c
@@ -120,6 +120,7 @@
unsigned int vref_mv = ADS7828_INT_VREF_MV;
bool diff_input = false;
bool ext_vref = false;
+ unsigned int regval;
data = devm_kzalloc(dev, sizeof(struct ads7828_data), GFP_KERNEL);
if (!data)
@@ -154,6 +155,15 @@
if (!diff_input)
data->cmd_byte |= ADS7828_CMD_SD_SE;
+ /*
+ * Datasheet specifies internal reference voltage is disabled by
+ * default. The internal reference voltage needs to be enabled and
+ * voltage needs to settle before getting valid ADC data. So perform a
+ * dummy read to enable the internal reference voltage.
+ */
+ if (!ext_vref)
+ regmap_read(data->regmap, data->cmd_byte, ®val);
+
hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
data,
ads7828_groups);
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 4f695d8..eb97a92 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -1,7 +1,7 @@
/*
* fam15h_power.c - AMD Family 15h processor power monitoring
*
- * Copyright (c) 2011 Advanced Micro Devices, Inc.
+ * Copyright (c) 2011-2016 Advanced Micro Devices, Inc.
* Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
*
*
@@ -25,6 +25,10 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/bitops.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/time.h>
+#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -44,8 +48,14 @@
#define FAM15H_MIN_NUM_ATTRS 2
#define FAM15H_NUM_GROUPS 2
+#define MAX_CUS 8
+/* set maximum interval as 1 second */
+#define MAX_INTERVAL 1000
+
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
+#define MSR_F15H_PTSC 0xc0010280
#define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
@@ -59,8 +69,20 @@
struct attribute_group group;
/* maximum accumulated power of a compute unit */
u64 max_cu_acc_power;
+ /* accumulated power of the compute units */
+ u64 cu_acc_power[MAX_CUS];
+ /* performance timestamp counter */
+ u64 cpu_sw_pwr_ptsc[MAX_CUS];
+ /* online/offline status of current compute unit */
+ int cu_on[MAX_CUS];
+ unsigned long power_period;
};
+static bool is_carrizo_or_later(void)
+{
+ return boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60;
+}
+
static ssize_t show_power(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -77,7 +99,7 @@
* On Carrizo and later platforms, TdpRunAvgAccCap bit field
* is extended to 4:31 from 4:25.
*/
- if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) {
+ if (is_carrizo_or_later()) {
running_avg_capture = val >> 4;
running_avg_capture = sign_extend32(running_avg_capture, 27);
} else {
@@ -94,7 +116,7 @@
* On Carrizo and later platforms, ApmTdpLimit bit field
* is extended to 16:31 from 16:28.
*/
- if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+ if (is_carrizo_or_later())
tdp_limit = val >> 16;
else
tdp_limit = (val >> 16) & 0x1fff;
@@ -125,6 +147,167 @@
}
static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL);
+static void do_read_registers_on_cu(void *_data)
+{
+ struct fam15h_power_data *data = _data;
+ int cpu, cu;
+
+ cpu = smp_processor_id();
+
+ /*
+ * With the new x86 topology modelling, cpu core id actually
+ * is compute unit id.
+ */
+ cu = cpu_data(cpu).cpu_core_id;
+
+ rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, &data->cu_acc_power[cu]);
+ rdmsrl_safe(MSR_F15H_PTSC, &data->cpu_sw_pwr_ptsc[cu]);
+
+ data->cu_on[cu] = 1;
+}
+
+/*
+ * This function is only able to be called when CPUID
+ * Fn8000_0007:EDX[12] is set.
+ */
+static int read_registers(struct fam15h_power_data *data)
+{
+ int this_cpu, ret, cpu;
+ int core, this_core;
+ cpumask_var_t mask;
+
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
+
+ get_online_cpus();
+ this_cpu = smp_processor_id();
+
+ /*
+ * Choose the first online core of each compute unit, and then
+ * read their MSR value of power and ptsc in a single IPI,
+ * because the MSR value of CPU core represent the compute
+ * unit's.
+ */
+ core = -1;
+
+ for_each_online_cpu(cpu) {
+ this_core = topology_core_id(cpu);
+
+ if (this_core == core)
+ continue;
+
+ core = this_core;
+
+ /* get any CPU on this compute unit */
+ cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask);
+ }
+
+ if (cpumask_test_cpu(this_cpu, mask))
+ do_read_registers_on_cu(data);
+
+ smp_call_function_many(mask, do_read_registers_on_cu, data, true);
+ put_online_cpus();
+
+ free_cpumask_var(mask);
+
+ return 0;
+}
+
+static ssize_t acc_show_power(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct fam15h_power_data *data = dev_get_drvdata(dev);
+ u64 prev_cu_acc_power[MAX_CUS], prev_ptsc[MAX_CUS],
+ jdelta[MAX_CUS];
+ u64 tdelta, avg_acc;
+ int cu, cu_num, ret;
+ signed long leftover;
+
+ /*
+ * With the new x86 topology modelling, x86_max_cores is the
+ * compute unit number.
+ */
+ cu_num = boot_cpu_data.x86_max_cores;
+
+ ret = read_registers(data);
+ if (ret)
+ return 0;
+
+ for (cu = 0; cu < cu_num; cu++) {
+ prev_cu_acc_power[cu] = data->cu_acc_power[cu];
+ prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
+ }
+
+ leftover = schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
+ if (leftover)
+ return 0;
+
+ ret = read_registers(data);
+ if (ret)
+ return 0;
+
+ for (cu = 0, avg_acc = 0; cu < cu_num; cu++) {
+ /* check if current compute unit is online */
+ if (data->cu_on[cu] == 0)
+ continue;
+
+ if (data->cu_acc_power[cu] < prev_cu_acc_power[cu]) {
+ jdelta[cu] = data->max_cu_acc_power + data->cu_acc_power[cu];
+ jdelta[cu] -= prev_cu_acc_power[cu];
+ } else {
+ jdelta[cu] = data->cu_acc_power[cu] - prev_cu_acc_power[cu];
+ }
+ tdelta = data->cpu_sw_pwr_ptsc[cu] - prev_ptsc[cu];
+ jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+ do_div(jdelta[cu], tdelta);
+
+ /* the unit is microWatt */
+ avg_acc += jdelta[cu];
+ }
+
+ return sprintf(buf, "%llu\n", (unsigned long long)avg_acc);
+}
+static DEVICE_ATTR(power1_average, S_IRUGO, acc_show_power, NULL);
+
+static ssize_t acc_show_power_period(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%lu\n", data->power_period);
+}
+
+static ssize_t acc_set_power_period(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct fam15h_power_data *data = dev_get_drvdata(dev);
+ unsigned long temp;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &temp);
+ if (ret)
+ return ret;
+
+ if (temp > MAX_INTERVAL)
+ return -EINVAL;
+
+ /* the interval value should be greater than 0 */
+ if (temp <= 0)
+ return -EINVAL;
+
+ data->power_period = temp;
+
+ return count;
+}
+static DEVICE_ATTR(power1_average_interval, S_IRUGO | S_IWUSR,
+ acc_show_power_period, acc_set_power_period);
+
static int fam15h_power_init_attrs(struct pci_dev *pdev,
struct fam15h_power_data *data)
{
@@ -137,6 +320,10 @@
(c->x86_model >= 0x60 && c->x86_model <= 0x7f)))
n += 1;
+ /* check if processor supports accumulated power */
+ if (boot_cpu_has(X86_FEATURE_ACC_POWER))
+ n += 2;
+
fam15h_power_attrs = devm_kcalloc(&pdev->dev, n,
sizeof(*fam15h_power_attrs),
GFP_KERNEL);
@@ -151,6 +338,11 @@
(c->x86_model >= 0x60 && c->x86_model <= 0x7f)))
fam15h_power_attrs[n++] = &dev_attr_power1_input.attr;
+ if (boot_cpu_has(X86_FEATURE_ACC_POWER)) {
+ fam15h_power_attrs[n++] = &dev_attr_power1_average.attr;
+ fam15h_power_attrs[n++] = &dev_attr_power1_average_interval.attr;
+ }
+
data->group.attrs = fam15h_power_attrs;
return 0;
@@ -216,7 +408,7 @@
static int fam15h_power_init_data(struct pci_dev *f4,
struct fam15h_power_data *data)
{
- u32 val, eax, ebx, ecx, edx;
+ u32 val;
u64 tmp;
int ret;
@@ -243,10 +435,9 @@
if (ret)
return ret;
- cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
/* CPUID Fn8000_0007:EDX[12] indicates to support accumulated power */
- if (!(edx & BIT(12)))
+ if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
return 0;
/*
@@ -254,7 +445,7 @@
* sample period to the PTSC counter period by executing CPUID
* Fn8000_0007:ECX
*/
- data->cpu_pwr_sample_ratio = ecx;
+ data->cpu_pwr_sample_ratio = cpuid_ecx(0x80000007);
if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) {
pr_err("Failed to read max compute unit power accumulator MSR\n");
@@ -263,7 +454,15 @@
data->max_cu_acc_power = tmp;
- return 0;
+ /*
+ * Milliseconds are a reasonable interval for the measurement.
+ * But it shouldn't set too long here, because several seconds
+ * would cause the read function to hang. So set default
+ * interval as 10 ms.
+ */
+ data->power_period = 10;
+
+ return read_registers(data);
}
static int fam15h_power_probe(struct pci_dev *pdev,
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 1896e26..730d840 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -13,6 +13,7 @@
* Supports: IT8603E Super I/O chip w/LPC interface
* IT8620E Super I/O chip w/LPC interface
* IT8623E Super I/O chip w/LPC interface
+ * IT8628E Super I/O chip w/LPC interface
* IT8705F Super I/O chip w/LPC interface
* IT8712F Super I/O chip w/LPC interface
* IT8716F Super I/O chip w/LPC interface
@@ -44,14 +45,11 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -72,17 +70,18 @@
enum chips { it87, it8712, it8716, it8718, it8720, it8721, it8728, it8732,
it8771, it8772, it8781, it8782, it8783, it8786, it8790, it8603,
- it8620 };
+ it8620, it8628 };
static unsigned short force_id;
module_param(force_id, ushort, 0);
MODULE_PARM_DESC(force_id, "Override the detected device ID");
-static struct platform_device *pdev;
+static struct platform_device *it87_pdev[2];
-#define REG 0x2e /* The register to read/write */
+#define REG_2E 0x2e /* The register to read/write */
+#define REG_4E 0x4e /* Secondary register to read/write */
+
#define DEV 0x07 /* Register: Logical device select */
-#define VAL 0x2f /* The value to read/write */
#define PME 0x04 /* The device with the fan registers in it */
/* The device with the IT8718F/IT8720F VID value in it */
@@ -91,54 +90,55 @@
#define DEVID 0x20 /* Register: Device ID */
#define DEVREV 0x22 /* Register: Device Revision */
-static inline int superio_inb(int reg)
+static inline int superio_inb(int ioreg, int reg)
{
- outb(reg, REG);
- return inb(VAL);
+ outb(reg, ioreg);
+ return inb(ioreg + 1);
}
-static inline void superio_outb(int reg, int val)
+static inline void superio_outb(int ioreg, int reg, int val)
{
- outb(reg, REG);
- outb(val, VAL);
+ outb(reg, ioreg);
+ outb(val, ioreg + 1);
}
-static int superio_inw(int reg)
+static int superio_inw(int ioreg, int reg)
{
int val;
- outb(reg++, REG);
- val = inb(VAL) << 8;
- outb(reg, REG);
- val |= inb(VAL);
+
+ outb(reg++, ioreg);
+ val = inb(ioreg + 1) << 8;
+ outb(reg, ioreg);
+ val |= inb(ioreg + 1);
return val;
}
-static inline void superio_select(int ldn)
+static inline void superio_select(int ioreg, int ldn)
{
- outb(DEV, REG);
- outb(ldn, VAL);
+ outb(DEV, ioreg);
+ outb(ldn, ioreg + 1);
}
-static inline int superio_enter(void)
+static inline int superio_enter(int ioreg)
{
/*
- * Try to reserve REG and REG + 1 for exclusive access.
+ * Try to reserve ioreg and ioreg + 1 for exclusive access.
*/
- if (!request_muxed_region(REG, 2, DRVNAME))
+ if (!request_muxed_region(ioreg, 2, DRVNAME))
return -EBUSY;
- outb(0x87, REG);
- outb(0x01, REG);
- outb(0x55, REG);
- outb(0x55, REG);
+ outb(0x87, ioreg);
+ outb(0x01, ioreg);
+ outb(0x55, ioreg);
+ outb(ioreg == REG_4E ? 0xaa : 0x55, ioreg);
return 0;
}
-static inline void superio_exit(void)
+static inline void superio_exit(int ioreg)
{
- outb(0x02, REG);
- outb(0x02, VAL);
- release_region(REG, 2);
+ outb(0x02, ioreg);
+ outb(0x02, ioreg + 1);
+ release_region(ioreg, 2);
}
/* Logical device 4 registers */
@@ -161,6 +161,7 @@
#define IT8603E_DEVID 0x8603
#define IT8620E_DEVID 0x8620
#define IT8623E_DEVID 0x8623
+#define IT8628E_DEVID 0x8628
#define IT87_ACT_REG 0x30
#define IT87_BASE_REG 0x60
@@ -168,6 +169,7 @@
#define IT87_SIO_GPIO1_REG 0x25
#define IT87_SIO_GPIO2_REG 0x26
#define IT87_SIO_GPIO3_REG 0x27
+#define IT87_SIO_GPIO4_REG 0x28
#define IT87_SIO_GPIO5_REG 0x29
#define IT87_SIO_PINX1_REG 0x2a /* Pin selection */
#define IT87_SIO_PINX2_REG 0x2c /* Pin selection */
@@ -217,7 +219,12 @@
#define IT87_REG_FAN_DIV 0x0b
#define IT87_REG_FAN_16BIT 0x0c
-/* Monitors: 9 voltage (0 to 7, battery), 3 temp (1 to 3), 3 fan (1 to 3) */
+/*
+ * Monitors:
+ * - up to 13 voltage (0 to 7, battery, avcc, 10 to 12)
+ * - up to 6 temp (1 to 6)
+ * - up to 6 fan (1 to 6)
+ */
static const u8 IT87_REG_FAN[] = { 0x0d, 0x0e, 0x0f, 0x80, 0x82, 0x4c };
static const u8 IT87_REG_FAN_MIN[] = { 0x10, 0x11, 0x12, 0x84, 0x86, 0x4e };
@@ -227,10 +234,12 @@
#define IT87_REG_FAN_MAIN_CTRL 0x13
#define IT87_REG_FAN_CTL 0x14
-#define IT87_REG_PWM(nr) (0x15 + (nr))
-#define IT87_REG_PWM_DUTY(nr) (0x63 + (nr) * 8)
+static const u8 IT87_REG_PWM[] = { 0x15, 0x16, 0x17, 0x7f, 0xa7, 0xaf };
+static const u8 IT87_REG_PWM_DUTY[] = { 0x63, 0x6b, 0x73, 0x7b, 0xa3, 0xab };
-#define IT87_REG_VIN(nr) (0x20 + (nr))
+static const u8 IT87_REG_VIN[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
+ 0x27, 0x28, 0x2f, 0x2c, 0x2d, 0x2e };
+
#define IT87_REG_TEMP(nr) (0x29 + (nr))
#define IT87_REG_VIN_MAX(nr) (0x30 + (nr) * 2)
@@ -245,30 +254,48 @@
#define IT87_REG_CHIPID 0x58
-#define IT87_REG_AUTO_TEMP(nr, i) (0x60 + (nr) * 8 + (i))
-#define IT87_REG_AUTO_PWM(nr, i) (0x65 + (nr) * 8 + (i))
+static const u8 IT87_REG_AUTO_BASE[] = { 0x60, 0x68, 0x70, 0x78, 0xa0, 0xa8 };
+
+#define IT87_REG_AUTO_TEMP(nr, i) (IT87_REG_AUTO_BASE[nr] + (i))
+#define IT87_REG_AUTO_PWM(nr, i) (IT87_REG_AUTO_BASE[nr] + 5 + (i))
+
+#define IT87_REG_TEMP456_ENABLE 0x77
+
+#define NUM_VIN ARRAY_SIZE(IT87_REG_VIN)
+#define NUM_VIN_LIMIT 8
+#define NUM_TEMP 6
+#define NUM_TEMP_OFFSET ARRAY_SIZE(IT87_REG_TEMP_OFFSET)
+#define NUM_TEMP_LIMIT 3
+#define NUM_FAN ARRAY_SIZE(IT87_REG_FAN)
+#define NUM_FAN_DIV 3
+#define NUM_PWM ARRAY_SIZE(IT87_REG_PWM)
+#define NUM_AUTO_PWM ARRAY_SIZE(IT87_REG_PWM)
struct it87_devices {
const char *name;
const char * const suffix;
- u16 features;
+ u32 features;
u8 peci_mask;
u8 old_peci_mask;
};
-#define FEAT_12MV_ADC (1 << 0)
-#define FEAT_NEWER_AUTOPWM (1 << 1)
-#define FEAT_OLD_AUTOPWM (1 << 2)
-#define FEAT_16BIT_FANS (1 << 3)
-#define FEAT_TEMP_OFFSET (1 << 4)
-#define FEAT_TEMP_PECI (1 << 5)
-#define FEAT_TEMP_OLD_PECI (1 << 6)
-#define FEAT_FAN16_CONFIG (1 << 7) /* Need to enable 16-bit fans */
-#define FEAT_FIVE_FANS (1 << 8) /* Supports five fans */
-#define FEAT_VID (1 << 9) /* Set if chip supports VID */
-#define FEAT_IN7_INTERNAL (1 << 10) /* Set if in7 is internal */
-#define FEAT_SIX_FANS (1 << 11) /* Supports six fans */
-#define FEAT_10_9MV_ADC (1 << 12)
+#define FEAT_12MV_ADC BIT(0)
+#define FEAT_NEWER_AUTOPWM BIT(1)
+#define FEAT_OLD_AUTOPWM BIT(2)
+#define FEAT_16BIT_FANS BIT(3)
+#define FEAT_TEMP_OFFSET BIT(4)
+#define FEAT_TEMP_PECI BIT(5)
+#define FEAT_TEMP_OLD_PECI BIT(6)
+#define FEAT_FAN16_CONFIG BIT(7) /* Need to enable 16-bit fans */
+#define FEAT_FIVE_FANS BIT(8) /* Supports five fans */
+#define FEAT_VID BIT(9) /* Set if chip supports VID */
+#define FEAT_IN7_INTERNAL BIT(10) /* Set if in7 is internal */
+#define FEAT_SIX_FANS BIT(11) /* Supports six fans */
+#define FEAT_10_9MV_ADC BIT(12)
+#define FEAT_AVCC3 BIT(13) /* Chip supports in9/AVCC3 */
+#define FEAT_SIX_PWM BIT(14) /* Chip supports 6 pwm chn */
+#define FEAT_PWM_FREQ2 BIT(15) /* Separate pwm freq 2 */
+#define FEAT_SIX_TEMP BIT(16) /* Up to 6 temp sensors */
static const struct it87_devices it87_devices[] = {
[it87] = {
@@ -286,20 +313,22 @@
.name = "it8716",
.suffix = "F",
.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
- | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
+ | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_PWM_FREQ2,
},
[it8718] = {
.name = "it8718",
.suffix = "F",
.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
- | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
+ | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS
+ | FEAT_PWM_FREQ2,
.old_peci_mask = 0x4,
},
[it8720] = {
.name = "it8720",
.suffix = "F",
.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
- | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
+ | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS
+ | FEAT_PWM_FREQ2,
.old_peci_mask = 0x4,
},
[it8721] = {
@@ -307,7 +336,8 @@
.suffix = "F",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
| FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI
- | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_IN7_INTERNAL,
+ | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_IN7_INTERNAL
+ | FEAT_PWM_FREQ2,
.peci_mask = 0x05,
.old_peci_mask = 0x02, /* Actually reports PCH */
},
@@ -316,7 +346,7 @@
.suffix = "F",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
| FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_FIVE_FANS
- | FEAT_IN7_INTERNAL,
+ | FEAT_IN7_INTERNAL | FEAT_PWM_FREQ2,
.peci_mask = 0x07,
},
[it8732] = {
@@ -332,7 +362,8 @@
.name = "it8771",
.suffix = "E",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
- | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+ | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+ | FEAT_PWM_FREQ2,
/* PECI: guesswork */
/* 12mV ADC (OHM) */
/* 16 bit fans (OHM) */
@@ -343,7 +374,8 @@
.name = "it8772",
.suffix = "E",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
- | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+ | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+ | FEAT_PWM_FREQ2,
/* PECI (coreboot) */
/* 12mV ADC (HWSensors4, OHM) */
/* 16 bit fans (HWSensors4, OHM) */
@@ -354,42 +386,45 @@
.name = "it8781",
.suffix = "F",
.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
- | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+ | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_PWM_FREQ2,
.old_peci_mask = 0x4,
},
[it8782] = {
.name = "it8782",
.suffix = "F",
.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
- | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+ | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_PWM_FREQ2,
.old_peci_mask = 0x4,
},
[it8783] = {
.name = "it8783",
.suffix = "E/F",
.features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
- | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+ | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_PWM_FREQ2,
.old_peci_mask = 0x4,
},
[it8786] = {
.name = "it8786",
.suffix = "E",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
- | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+ | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+ | FEAT_PWM_FREQ2,
.peci_mask = 0x07,
},
[it8790] = {
.name = "it8790",
.suffix = "E",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
- | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+ | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+ | FEAT_PWM_FREQ2,
.peci_mask = 0x07,
},
[it8603] = {
.name = "it8603",
.suffix = "E",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
- | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+ | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL
+ | FEAT_AVCC3 | FEAT_PWM_FREQ2,
.peci_mask = 0x07,
},
[it8620] = {
@@ -397,7 +432,17 @@
.suffix = "E",
.features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
| FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_SIX_FANS
- | FEAT_IN7_INTERNAL,
+ | FEAT_IN7_INTERNAL | FEAT_SIX_PWM | FEAT_PWM_FREQ2
+ | FEAT_SIX_TEMP,
+ .peci_mask = 0x07,
+ },
+ [it8628] = {
+ .name = "it8628",
+ .suffix = "E",
+ .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+ | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_SIX_FANS
+ | FEAT_IN7_INTERNAL | FEAT_SIX_PWM | FEAT_PWM_FREQ2
+ | FEAT_SIX_TEMP,
.peci_mask = 0x07,
},
};
@@ -409,16 +454,20 @@
#define has_old_autopwm(data) ((data)->features & FEAT_OLD_AUTOPWM)
#define has_temp_offset(data) ((data)->features & FEAT_TEMP_OFFSET)
#define has_temp_peci(data, nr) (((data)->features & FEAT_TEMP_PECI) && \
- ((data)->peci_mask & (1 << nr)))
+ ((data)->peci_mask & BIT(nr)))
#define has_temp_old_peci(data, nr) \
(((data)->features & FEAT_TEMP_OLD_PECI) && \
- ((data)->old_peci_mask & (1 << nr)))
+ ((data)->old_peci_mask & BIT(nr)))
#define has_fan16_config(data) ((data)->features & FEAT_FAN16_CONFIG)
#define has_five_fans(data) ((data)->features & (FEAT_FIVE_FANS | \
FEAT_SIX_FANS))
#define has_vid(data) ((data)->features & FEAT_VID)
#define has_in7_internal(data) ((data)->features & FEAT_IN7_INTERNAL)
#define has_six_fans(data) ((data)->features & FEAT_SIX_FANS)
+#define has_avcc3(data) ((data)->features & FEAT_AVCC3)
+#define has_six_pwm(data) ((data)->features & FEAT_SIX_PWM)
+#define has_pwm_freq2(data) ((data)->features & FEAT_PWM_FREQ2)
+#define has_six_temp(data) ((data)->features & FEAT_SIX_TEMP)
struct it87_sio_data {
enum chips type;
@@ -440,7 +489,7 @@
* The structure is dynamically allocated.
*/
struct it87_data {
- struct device *hwmon_dev;
+ const struct attribute_group *groups[7];
enum chips type;
u16 features;
u8 peci_mask;
@@ -453,17 +502,21 @@
unsigned long last_updated; /* In jiffies */
u16 in_scaled; /* Internal voltage sensors are scaled */
- u8 in[10][3]; /* [nr][0]=in, [1]=min, [2]=max */
+ u16 in_internal; /* Bitfield, internal sensors (for labels) */
+ u16 has_in; /* Bitfield, voltage sensors enabled */
+ u8 in[NUM_VIN][3]; /* [nr][0]=in, [1]=min, [2]=max */
u8 has_fan; /* Bitfield, fans enabled */
- u16 fan[6][2]; /* Register values, [nr][0]=fan, [1]=min */
+ u16 fan[NUM_FAN][2]; /* Register values, [nr][0]=fan, [1]=min */
u8 has_temp; /* Bitfield, temp sensors enabled */
- s8 temp[3][4]; /* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
+ s8 temp[NUM_TEMP][4]; /* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
u8 sensor; /* Register value (IT87_REG_TEMP_ENABLE) */
u8 extra; /* Register value (IT87_REG_TEMP_EXTRA) */
- u8 fan_div[3]; /* Register encoding, shifted right */
+ u8 fan_div[NUM_FAN_DIV];/* Register encoding, shifted right */
+ bool has_vid; /* True if VID supported */
u8 vid; /* Register encoding, combined */
u8 vrm;
u32 alarms; /* Register encoding, combined */
+ bool has_beep; /* true if beep supported */
u8 beeps; /* Register encoding */
u8 fan_main_ctrl; /* Register value */
u8 fan_ctl; /* Register value */
@@ -478,13 +531,14 @@
* is no longer needed, but it is still done to keep the driver
* simple.
*/
- u8 pwm_ctrl[3]; /* Register value */
- u8 pwm_duty[3]; /* Manual PWM value set by user */
- u8 pwm_temp_map[3]; /* PWM to temp. chan. mapping (bits 1-0) */
+ u8 has_pwm; /* Bitfield, pwm control enabled */
+ u8 pwm_ctrl[NUM_PWM]; /* Register value */
+ u8 pwm_duty[NUM_PWM]; /* Manual PWM value set by user */
+ u8 pwm_temp_map[NUM_PWM];/* PWM to temp. chan. mapping (bits 1-0) */
/* Automatic fan speed control registers */
- u8 auto_pwm[3][4]; /* [nr][3] is hard-coded */
- s8 auto_temp[3][5]; /* [nr][0] is point1_temp_hyst */
+ u8 auto_pwm[NUM_AUTO_PWM][4]; /* [nr][3] is hard-coded */
+ s8 auto_temp[NUM_AUTO_PWM][5]; /* [nr][0] is point1_temp_hyst */
};
static int adc_lsb(const struct it87_data *data, int nr)
@@ -497,7 +551,7 @@
lsb = 109;
else
lsb = 160;
- if (data->in_scaled & (1 << nr))
+ if (data->in_scaled & BIT(nr))
lsb <<= 1;
return lsb;
}
@@ -554,15 +608,16 @@
return (reg & 0x7f) << 1;
}
-
static int DIV_TO_REG(int val)
{
int answer = 0;
+
while (answer < 7 && (val >>= 1))
answer++;
return answer;
}
-#define DIV_FROM_REG(val) (1 << (val))
+
+#define DIV_FROM_REG(val) BIT(val)
/*
* PWM base frequencies. The frequency has to be divided by either 128 or 256,
@@ -585,32 +640,204 @@
750000,
};
-static int it87_probe(struct platform_device *pdev);
-static int it87_remove(struct platform_device *pdev);
+/*
+ * Must be called with data->update_lock held, except during initialization.
+ * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
+ * would slow down the IT87 access and should not be necessary.
+ */
+static int it87_read_value(struct it87_data *data, u8 reg)
+{
+ outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
+ return inb_p(data->addr + IT87_DATA_REG_OFFSET);
+}
-static int it87_read_value(struct it87_data *data, u8 reg);
-static void it87_write_value(struct it87_data *data, u8 reg, u8 value);
-static struct it87_data *it87_update_device(struct device *dev);
-static int it87_check_pwm(struct device *dev);
-static void it87_init_device(struct platform_device *pdev);
+/*
+ * Must be called with data->update_lock held, except during initialization.
+ * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
+ * would slow down the IT87 access and should not be necessary.
+ */
+static void it87_write_value(struct it87_data *data, u8 reg, u8 value)
+{
+ outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
+ outb_p(value, data->addr + IT87_DATA_REG_OFFSET);
+}
+static void it87_update_pwm_ctrl(struct it87_data *data, int nr)
+{
+ data->pwm_ctrl[nr] = it87_read_value(data, IT87_REG_PWM[nr]);
+ if (has_newer_autopwm(data)) {
+ data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
+ data->pwm_duty[nr] = it87_read_value(data,
+ IT87_REG_PWM_DUTY[nr]);
+ } else {
+ if (data->pwm_ctrl[nr] & 0x80) /* Automatic mode */
+ data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
+ else /* Manual mode */
+ data->pwm_duty[nr] = data->pwm_ctrl[nr] & 0x7f;
+ }
-static struct platform_driver it87_driver = {
- .driver = {
- .name = DRVNAME,
- },
- .probe = it87_probe,
- .remove = it87_remove,
-};
+ if (has_old_autopwm(data)) {
+ int i;
+
+ for (i = 0; i < 5 ; i++)
+ data->auto_temp[nr][i] = it87_read_value(data,
+ IT87_REG_AUTO_TEMP(nr, i));
+ for (i = 0; i < 3 ; i++)
+ data->auto_pwm[nr][i] = it87_read_value(data,
+ IT87_REG_AUTO_PWM(nr, i));
+ } else if (has_newer_autopwm(data)) {
+ int i;
+
+ /*
+ * 0: temperature hysteresis (base + 5)
+ * 1: fan off temperature (base + 0)
+ * 2: fan start temperature (base + 1)
+ * 3: fan max temperature (base + 2)
+ */
+ data->auto_temp[nr][0] =
+ it87_read_value(data, IT87_REG_AUTO_TEMP(nr, 5));
+
+ for (i = 0; i < 3 ; i++)
+ data->auto_temp[nr][i + 1] =
+ it87_read_value(data,
+ IT87_REG_AUTO_TEMP(nr, i));
+ /*
+ * 0: start pwm value (base + 3)
+ * 1: pwm slope (base + 4, 1/8th pwm)
+ */
+ data->auto_pwm[nr][0] =
+ it87_read_value(data, IT87_REG_AUTO_TEMP(nr, 3));
+ data->auto_pwm[nr][1] =
+ it87_read_value(data, IT87_REG_AUTO_TEMP(nr, 4));
+ }
+}
+
+static struct it87_data *it87_update_device(struct device *dev)
+{
+ struct it87_data *data = dev_get_drvdata(dev);
+ int i;
+
+ mutex_lock(&data->update_lock);
+
+ if (time_after(jiffies, data->last_updated + HZ + HZ / 2) ||
+ !data->valid) {
+ if (update_vbat) {
+ /*
+ * Cleared after each update, so reenable. Value
+ * returned by this read will be previous value
+ */
+ it87_write_value(data, IT87_REG_CONFIG,
+ it87_read_value(data, IT87_REG_CONFIG) | 0x40);
+ }
+ for (i = 0; i < NUM_VIN; i++) {
+ if (!(data->has_in & BIT(i)))
+ continue;
+
+ data->in[i][0] =
+ it87_read_value(data, IT87_REG_VIN[i]);
+
+ /* VBAT and AVCC don't have limit registers */
+ if (i >= NUM_VIN_LIMIT)
+ continue;
+
+ data->in[i][1] =
+ it87_read_value(data, IT87_REG_VIN_MIN(i));
+ data->in[i][2] =
+ it87_read_value(data, IT87_REG_VIN_MAX(i));
+ }
+
+ for (i = 0; i < NUM_FAN; i++) {
+ /* Skip disabled fans */
+ if (!(data->has_fan & BIT(i)))
+ continue;
+
+ data->fan[i][1] =
+ it87_read_value(data, IT87_REG_FAN_MIN[i]);
+ data->fan[i][0] = it87_read_value(data,
+ IT87_REG_FAN[i]);
+ /* Add high byte if in 16-bit mode */
+ if (has_16bit_fans(data)) {
+ data->fan[i][0] |= it87_read_value(data,
+ IT87_REG_FANX[i]) << 8;
+ data->fan[i][1] |= it87_read_value(data,
+ IT87_REG_FANX_MIN[i]) << 8;
+ }
+ }
+ for (i = 0; i < NUM_TEMP; i++) {
+ if (!(data->has_temp & BIT(i)))
+ continue;
+ data->temp[i][0] =
+ it87_read_value(data, IT87_REG_TEMP(i));
+
+ if (has_temp_offset(data) && i < NUM_TEMP_OFFSET)
+ data->temp[i][3] =
+ it87_read_value(data,
+ IT87_REG_TEMP_OFFSET[i]);
+
+ if (i >= NUM_TEMP_LIMIT)
+ continue;
+
+ data->temp[i][1] =
+ it87_read_value(data, IT87_REG_TEMP_LOW(i));
+ data->temp[i][2] =
+ it87_read_value(data, IT87_REG_TEMP_HIGH(i));
+ }
+
+ /* Newer chips don't have clock dividers */
+ if ((data->has_fan & 0x07) && !has_16bit_fans(data)) {
+ i = it87_read_value(data, IT87_REG_FAN_DIV);
+ data->fan_div[0] = i & 0x07;
+ data->fan_div[1] = (i >> 3) & 0x07;
+ data->fan_div[2] = (i & 0x40) ? 3 : 1;
+ }
+
+ data->alarms =
+ it87_read_value(data, IT87_REG_ALARM1) |
+ (it87_read_value(data, IT87_REG_ALARM2) << 8) |
+ (it87_read_value(data, IT87_REG_ALARM3) << 16);
+ data->beeps = it87_read_value(data, IT87_REG_BEEP_ENABLE);
+
+ data->fan_main_ctrl = it87_read_value(data,
+ IT87_REG_FAN_MAIN_CTRL);
+ data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL);
+ for (i = 0; i < NUM_PWM; i++) {
+ if (!(data->has_pwm & BIT(i)))
+ continue;
+ it87_update_pwm_ctrl(data, i);
+ }
+
+ data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE);
+ data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
+ /*
+ * The IT8705F does not have VID capability.
+ * The IT8718F and later don't use IT87_REG_VID for the
+ * same purpose.
+ */
+ if (data->type == it8712 || data->type == it8716) {
+ data->vid = it87_read_value(data, IT87_REG_VID);
+ /*
+ * The older IT8712F revisions had only 5 VID pins,
+ * but we assume it is always safe to read 6 bits.
+ */
+ data->vid &= 0x3f;
+ }
+ data->last_updated = jiffies;
+ data->valid = 1;
+ }
+
+ mutex_unlock(&data->update_lock);
+
+ return data;
+}
static ssize_t show_in(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
- int nr = sattr->nr;
- int index = sattr->index;
-
struct it87_data *data = it87_update_device(dev);
+ int index = sattr->index;
+ int nr = sattr->nr;
+
return sprintf(buf, "%d\n", in_from_reg(data, nr, data->in[nr][index]));
}
@@ -618,10 +845,9 @@
const char *buf, size_t count)
{
struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr);
- int nr = sattr->nr;
- int index = sattr->index;
-
struct it87_data *data = dev_get_drvdata(dev);
+ int index = sattr->index;
+ int nr = sattr->nr;
unsigned long val;
if (kstrtoul(buf, 10, &val) < 0)
@@ -687,8 +913,11 @@
static SENSOR_DEVICE_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 8, 0);
static SENSOR_DEVICE_ATTR_2(in9_input, S_IRUGO, show_in, NULL, 9, 0);
+static SENSOR_DEVICE_ATTR_2(in10_input, S_IRUGO, show_in, NULL, 10, 0);
+static SENSOR_DEVICE_ATTR_2(in11_input, S_IRUGO, show_in, NULL, 11, 0);
+static SENSOR_DEVICE_ATTR_2(in12_input, S_IRUGO, show_in, NULL, 12, 0);
-/* 3 temperatures */
+/* Up to 6 temperatures */
static ssize_t show_temp(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -761,6 +990,9 @@
2, 2);
static SENSOR_DEVICE_ATTR_2(temp3_offset, S_IRUGO | S_IWUSR, show_temp,
set_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(temp4_input, S_IRUGO, show_temp, NULL, 3, 0);
+static SENSOR_DEVICE_ATTR_2(temp5_input, S_IRUGO, show_temp, NULL, 4, 0);
+static SENSOR_DEVICE_ATTR_2(temp6_input, S_IRUGO, show_temp, NULL, 5, 0);
static ssize_t show_temp_type(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -771,8 +1003,8 @@
u8 reg = data->sensor; /* In case value is updated while used */
u8 extra = data->extra;
- if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1))
- || (has_temp_old_peci(data, nr) && (extra & 0x80)))
+ if ((has_temp_peci(data, nr) && (reg >> 6 == nr + 1)) ||
+ (has_temp_old_peci(data, nr) && (extra & 0x80)))
return sprintf(buf, "6\n"); /* Intel PECI */
if (reg & (1 << nr))
return sprintf(buf, "3\n"); /* thermal diode */
@@ -837,18 +1069,19 @@
static SENSOR_DEVICE_ATTR(temp3_type, S_IRUGO | S_IWUSR, show_temp_type,
set_temp_type, 2);
-/* 3 Fans */
+/* 6 Fans */
static int pwm_mode(const struct it87_data *data, int nr)
{
- int ctrl = data->fan_main_ctrl & (1 << nr);
+ if (data->type != it8603 && nr < 3 && !(data->fan_main_ctrl & BIT(nr)))
+ return 0; /* Full speed */
+ if (data->pwm_ctrl[nr] & 0x80)
+ return 2; /* Automatic mode */
+ if ((data->type == it8603 || nr >= 3) &&
+ data->pwm_duty[nr] == pwm_to_reg(data, 0xff))
+ return 0; /* Full speed */
- if (ctrl == 0 && data->type != it8603) /* Full speed */
- return 0;
- if (data->pwm_ctrl[nr] & 0x80) /* Automatic mode */
- return 2;
- else /* Manual mode */
- return 1;
+ return 1; /* Manual mode */
}
static ssize_t show_fan(struct device *dev, struct device_attribute *attr,
@@ -868,39 +1101,49 @@
}
static ssize_t show_fan_div(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+ struct it87_data *data = it87_update_device(dev);
int nr = sensor_attr->index;
- struct it87_data *data = it87_update_device(dev);
- return sprintf(buf, "%d\n", DIV_FROM_REG(data->fan_div[nr]));
+ return sprintf(buf, "%lu\n", DIV_FROM_REG(data->fan_div[nr]));
}
+
static ssize_t show_pwm_enable(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+ struct it87_data *data = it87_update_device(dev);
int nr = sensor_attr->index;
- struct it87_data *data = it87_update_device(dev);
return sprintf(buf, "%d\n", pwm_mode(data, nr));
}
+
static ssize_t show_pwm(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+ struct it87_data *data = it87_update_device(dev);
int nr = sensor_attr->index;
- struct it87_data *data = it87_update_device(dev);
return sprintf(buf, "%d\n",
pwm_from_reg(data, data->pwm_duty[nr]));
}
+
static ssize_t show_pwm_freq(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
+ struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
struct it87_data *data = it87_update_device(dev);
- int index = (data->fan_ctl >> 4) & 0x07;
+ int nr = sensor_attr->index;
unsigned int freq;
+ int index;
+
+ if (has_pwm_freq2(data) && nr == 1)
+ index = (data->extra >> 4) & 0x07;
+ else
+ index = (data->fan_ctl >> 4) & 0x07;
freq = pwm_freq[index] / (has_newer_autopwm(data) ? 256 : 128);
@@ -953,12 +1196,11 @@
}
static ssize_t set_fan_div(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+ const char *buf, size_t count)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
- int nr = sensor_attr->index;
-
struct it87_data *data = dev_get_drvdata(dev);
+ int nr = sensor_attr->index;
unsigned long val;
int min;
u8 old;
@@ -1013,6 +1255,11 @@
if (data->auto_pwm[nr][i] > data->auto_pwm[nr][i + 1])
err = -EINVAL;
}
+ } else if (has_newer_autopwm(data)) {
+ for (i = 1; i < 3; i++) {
+ if (data->auto_temp[nr][i] > data->auto_temp[nr][i + 1])
+ err = -EINVAL;
+ }
}
if (err) {
@@ -1023,13 +1270,12 @@
return err;
}
-static ssize_t set_pwm_enable(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
- int nr = sensor_attr->index;
-
struct it87_data *data = dev_get_drvdata(dev);
+ int nr = sensor_attr->index;
long val;
if (kstrtol(buf, 10, &val) < 0 || val < 0 || val > 2)
@@ -1041,21 +1287,30 @@
return -EINVAL;
}
- /* IT8603E does not have on/off mode */
- if (val == 0 && data->type == it8603)
- return -EINVAL;
-
mutex_lock(&data->update_lock);
if (val == 0) {
- int tmp;
- /* make sure the fan is on when in on/off mode */
- tmp = it87_read_value(data, IT87_REG_FAN_CTL);
- it87_write_value(data, IT87_REG_FAN_CTL, tmp | (1 << nr));
- /* set on/off mode */
- data->fan_main_ctrl &= ~(1 << nr);
- it87_write_value(data, IT87_REG_FAN_MAIN_CTRL,
- data->fan_main_ctrl);
+ if (nr < 3 && data->type != it8603) {
+ int tmp;
+ /* make sure the fan is on when in on/off mode */
+ tmp = it87_read_value(data, IT87_REG_FAN_CTL);
+ it87_write_value(data, IT87_REG_FAN_CTL, tmp | BIT(nr));
+ /* set on/off mode */
+ data->fan_main_ctrl &= ~BIT(nr);
+ it87_write_value(data, IT87_REG_FAN_MAIN_CTRL,
+ data->fan_main_ctrl);
+ } else {
+ /* No on/off mode, set maximum pwm value */
+ data->pwm_duty[nr] = pwm_to_reg(data, 0xff);
+ it87_write_value(data, IT87_REG_PWM_DUTY[nr],
+ data->pwm_duty[nr]);
+ /* and set manual mode */
+ data->pwm_ctrl[nr] = has_newer_autopwm(data) ?
+ data->pwm_temp_map[nr] :
+ data->pwm_duty[nr];
+ it87_write_value(data, IT87_REG_PWM[nr],
+ data->pwm_ctrl[nr]);
+ }
} else {
if (val == 1) /* Manual mode */
data->pwm_ctrl[nr] = has_newer_autopwm(data) ?
@@ -1063,11 +1318,11 @@
data->pwm_duty[nr];
else /* Automatic mode */
data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr];
- it87_write_value(data, IT87_REG_PWM(nr), data->pwm_ctrl[nr]);
+ it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]);
- if (data->type != it8603) {
+ if (data->type != it8603 && nr < 3) {
/* set SmartGuardian mode */
- data->fan_main_ctrl |= (1 << nr);
+ data->fan_main_ctrl |= BIT(nr);
it87_write_value(data, IT87_REG_FAN_MAIN_CTRL,
data->fan_main_ctrl);
}
@@ -1076,13 +1331,13 @@
mutex_unlock(&data->update_lock);
return count;
}
+
static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+ const char *buf, size_t count)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
- int nr = sensor_attr->index;
-
struct it87_data *data = dev_get_drvdata(dev);
+ int nr = sensor_attr->index;
long val;
if (kstrtol(buf, 10, &val) < 0 || val < 0 || val > 255)
@@ -1099,7 +1354,7 @@
return -EBUSY;
}
data->pwm_duty[nr] = pwm_to_reg(data, val);
- it87_write_value(data, IT87_REG_PWM_DUTY(nr),
+ it87_write_value(data, IT87_REG_PWM_DUTY[nr],
data->pwm_duty[nr]);
} else {
data->pwm_duty[nr] = pwm_to_reg(data, val);
@@ -1109,17 +1364,20 @@
*/
if (!(data->pwm_ctrl[nr] & 0x80)) {
data->pwm_ctrl[nr] = data->pwm_duty[nr];
- it87_write_value(data, IT87_REG_PWM(nr),
+ it87_write_value(data, IT87_REG_PWM[nr],
data->pwm_ctrl[nr]);
}
}
mutex_unlock(&data->update_lock);
return count;
}
-static ssize_t set_pwm_freq(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+
+static ssize_t set_pwm_freq(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
+ struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
struct it87_data *data = dev_get_drvdata(dev);
+ int nr = sensor_attr->index;
unsigned long val;
int i;
@@ -1131,63 +1389,66 @@
/* Search for the nearest available frequency */
for (i = 0; i < 7; i++) {
- if (val > (pwm_freq[i] + pwm_freq[i+1]) / 2)
+ if (val > (pwm_freq[i] + pwm_freq[i + 1]) / 2)
break;
}
mutex_lock(&data->update_lock);
- data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL) & 0x8f;
- data->fan_ctl |= i << 4;
- it87_write_value(data, IT87_REG_FAN_CTL, data->fan_ctl);
+ if (nr == 0) {
+ data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL) & 0x8f;
+ data->fan_ctl |= i << 4;
+ it87_write_value(data, IT87_REG_FAN_CTL, data->fan_ctl);
+ } else {
+ data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA) & 0x8f;
+ data->extra |= i << 4;
+ it87_write_value(data, IT87_REG_TEMP_EXTRA, data->extra);
+ }
mutex_unlock(&data->update_lock);
return count;
}
+
static ssize_t show_pwm_temp_map(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
- int nr = sensor_attr->index;
-
struct it87_data *data = it87_update_device(dev);
+ int nr = sensor_attr->index;
int map;
- if (data->pwm_temp_map[nr] < 3)
- map = 1 << data->pwm_temp_map[nr];
- else
- map = 0; /* Should never happen */
- return sprintf(buf, "%d\n", map);
+ map = data->pwm_temp_map[nr];
+ if (map >= 3)
+ map = 0; /* Should never happen */
+ if (nr >= 3) /* pwm channels 3..6 map to temp4..6 */
+ map += 3;
+
+ return sprintf(buf, "%d\n", (int)BIT(map));
}
+
static ssize_t set_pwm_temp_map(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+ struct device_attribute *attr, const char *buf,
+ size_t count)
{
struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
- int nr = sensor_attr->index;
-
struct it87_data *data = dev_get_drvdata(dev);
+ int nr = sensor_attr->index;
long val;
u8 reg;
- /*
- * This check can go away if we ever support automatic fan speed
- * control on newer chips.
- */
- if (!has_old_autopwm(data)) {
- dev_notice(dev, "Mapping change disabled for safety reasons\n");
- return -EINVAL;
- }
-
if (kstrtol(buf, 10, &val) < 0)
return -EINVAL;
+ if (nr >= 3)
+ val -= 3;
+
switch (val) {
- case (1 << 0):
+ case BIT(0):
reg = 0x00;
break;
- case (1 << 1):
+ case BIT(1):
reg = 0x01;
break;
- case (1 << 2):
+ case BIT(2):
reg = 0x02;
break;
default:
@@ -1202,14 +1463,14 @@
*/
if (data->pwm_ctrl[nr] & 0x80) {
data->pwm_ctrl[nr] = 0x80 | data->pwm_temp_map[nr];
- it87_write_value(data, IT87_REG_PWM(nr), data->pwm_ctrl[nr]);
+ it87_write_value(data, IT87_REG_PWM[nr], data->pwm_ctrl[nr]);
}
mutex_unlock(&data->update_lock);
return count;
}
-static ssize_t show_auto_pwm(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t show_auto_pwm(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct it87_data *data = it87_update_device(dev);
struct sensor_device_attribute_2 *sensor_attr =
@@ -1221,14 +1482,15 @@
pwm_from_reg(data, data->auto_pwm[nr][point]));
}
-static ssize_t set_auto_pwm(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t set_auto_pwm(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct it87_data *data = dev_get_drvdata(dev);
struct sensor_device_attribute_2 *sensor_attr =
to_sensor_dev_attr_2(attr);
int nr = sensor_attr->nr;
int point = sensor_attr->index;
+ int regaddr;
long val;
if (kstrtol(buf, 10, &val) < 0 || val < 0 || val > 255)
@@ -1236,26 +1498,65 @@
mutex_lock(&data->update_lock);
data->auto_pwm[nr][point] = pwm_to_reg(data, val);
- it87_write_value(data, IT87_REG_AUTO_PWM(nr, point),
- data->auto_pwm[nr][point]);
+ if (has_newer_autopwm(data))
+ regaddr = IT87_REG_AUTO_TEMP(nr, 3);
+ else
+ regaddr = IT87_REG_AUTO_PWM(nr, point);
+ it87_write_value(data, regaddr, data->auto_pwm[nr][point]);
mutex_unlock(&data->update_lock);
return count;
}
-static ssize_t show_auto_temp(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t show_auto_pwm_slope(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct it87_data *data = it87_update_device(dev);
+ struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+ int nr = sensor_attr->index;
+
+ return sprintf(buf, "%d\n", data->auto_pwm[nr][1] & 0x7f);
+}
+
+static ssize_t set_auto_pwm_slope(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct it87_data *data = dev_get_drvdata(dev);
+ struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr);
+ int nr = sensor_attr->index;
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val) < 0 || val > 127)
+ return -EINVAL;
+
+ mutex_lock(&data->update_lock);
+ data->auto_pwm[nr][1] = (data->auto_pwm[nr][1] & 0x80) | val;
+ it87_write_value(data, IT87_REG_AUTO_TEMP(nr, 4),
+ data->auto_pwm[nr][1]);
+ mutex_unlock(&data->update_lock);
+ return count;
+}
+
+static ssize_t show_auto_temp(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct it87_data *data = it87_update_device(dev);
struct sensor_device_attribute_2 *sensor_attr =
to_sensor_dev_attr_2(attr);
int nr = sensor_attr->nr;
int point = sensor_attr->index;
+ int reg;
- return sprintf(buf, "%d\n", TEMP_FROM_REG(data->auto_temp[nr][point]));
+ if (has_old_autopwm(data) || point)
+ reg = data->auto_temp[nr][point];
+ else
+ reg = data->auto_temp[nr][1] - (data->auto_temp[nr][0] & 0x1f);
+
+ return sprintf(buf, "%d\n", TEMP_FROM_REG(reg));
}
-static ssize_t set_auto_temp(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t set_auto_temp(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct it87_data *data = dev_get_drvdata(dev);
struct sensor_device_attribute_2 *sensor_attr =
@@ -1263,14 +1564,24 @@
int nr = sensor_attr->nr;
int point = sensor_attr->index;
long val;
+ int reg;
if (kstrtol(buf, 10, &val) < 0 || val < -128000 || val > 127000)
return -EINVAL;
mutex_lock(&data->update_lock);
- data->auto_temp[nr][point] = TEMP_TO_REG(val);
- it87_write_value(data, IT87_REG_AUTO_TEMP(nr, point),
- data->auto_temp[nr][point]);
+ if (has_newer_autopwm(data) && !point) {
+ reg = data->auto_temp[nr][1] - TEMP_TO_REG(val);
+ reg = clamp_val(reg, 0, 0x1f) | (data->auto_temp[nr][0] & 0xe0);
+ data->auto_temp[nr][0] = reg;
+ it87_write_value(data, IT87_REG_AUTO_TEMP(nr, 5), reg);
+ } else {
+ reg = TEMP_TO_REG(val);
+ data->auto_temp[nr][point] = reg;
+ if (has_newer_autopwm(data))
+ point--;
+ it87_write_value(data, IT87_REG_AUTO_TEMP(nr, point), reg);
+ }
mutex_unlock(&data->update_lock);
return count;
}
@@ -1308,8 +1619,9 @@
static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
show_pwm_enable, set_pwm_enable, 0);
static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0);
-static DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq, set_pwm_freq);
-static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO | S_IWUSR,
+static SENSOR_DEVICE_ATTR(pwm1_freq, S_IRUGO | S_IWUSR, show_pwm_freq,
+ set_pwm_freq, 0);
+static SENSOR_DEVICE_ATTR(pwm1_auto_channels_temp, S_IRUGO,
show_pwm_temp_map, set_pwm_temp_map, 0);
static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR,
show_auto_pwm, set_auto_pwm, 0, 0);
@@ -1329,12 +1641,16 @@
show_auto_temp, set_auto_temp, 0, 3);
static SENSOR_DEVICE_ATTR_2(pwm1_auto_point4_temp, S_IRUGO | S_IWUSR,
show_auto_temp, set_auto_temp, 0, 4);
+static SENSOR_DEVICE_ATTR_2(pwm1_auto_start, S_IRUGO | S_IWUSR,
+ show_auto_pwm, set_auto_pwm, 0, 0);
+static SENSOR_DEVICE_ATTR(pwm1_auto_slope, S_IRUGO | S_IWUSR,
+ show_auto_pwm_slope, set_auto_pwm_slope, 0);
static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR,
show_pwm_enable, set_pwm_enable, 1);
static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 1);
-static DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, NULL);
-static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO | S_IWUSR,
+static SENSOR_DEVICE_ATTR(pwm2_freq, S_IRUGO, show_pwm_freq, set_pwm_freq, 1);
+static SENSOR_DEVICE_ATTR(pwm2_auto_channels_temp, S_IRUGO,
show_pwm_temp_map, set_pwm_temp_map, 1);
static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR,
show_auto_pwm, set_auto_pwm, 1, 0);
@@ -1354,12 +1670,16 @@
show_auto_temp, set_auto_temp, 1, 3);
static SENSOR_DEVICE_ATTR_2(pwm2_auto_point4_temp, S_IRUGO | S_IWUSR,
show_auto_temp, set_auto_temp, 1, 4);
+static SENSOR_DEVICE_ATTR_2(pwm2_auto_start, S_IRUGO | S_IWUSR,
+ show_auto_pwm, set_auto_pwm, 1, 0);
+static SENSOR_DEVICE_ATTR(pwm2_auto_slope, S_IRUGO | S_IWUSR,
+ show_auto_pwm_slope, set_auto_pwm_slope, 1);
static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR,
show_pwm_enable, set_pwm_enable, 2);
static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 2);
-static DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL);
-static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO | S_IWUSR,
+static SENSOR_DEVICE_ATTR(pwm3_freq, S_IRUGO, show_pwm_freq, NULL, 2);
+static SENSOR_DEVICE_ATTR(pwm3_auto_channels_temp, S_IRUGO,
show_pwm_temp_map, set_pwm_temp_map, 2);
static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_pwm, S_IRUGO | S_IWUSR,
show_auto_pwm, set_auto_pwm, 2, 0);
@@ -1379,30 +1699,94 @@
show_auto_temp, set_auto_temp, 2, 3);
static SENSOR_DEVICE_ATTR_2(pwm3_auto_point4_temp, S_IRUGO | S_IWUSR,
show_auto_temp, set_auto_temp, 2, 4);
+static SENSOR_DEVICE_ATTR_2(pwm3_auto_start, S_IRUGO | S_IWUSR,
+ show_auto_pwm, set_auto_pwm, 2, 0);
+static SENSOR_DEVICE_ATTR(pwm3_auto_slope, S_IRUGO | S_IWUSR,
+ show_auto_pwm_slope, set_auto_pwm_slope, 2);
+
+static SENSOR_DEVICE_ATTR(pwm4_enable, S_IRUGO | S_IWUSR,
+ show_pwm_enable, set_pwm_enable, 3);
+static SENSOR_DEVICE_ATTR(pwm4, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 3);
+static SENSOR_DEVICE_ATTR(pwm4_freq, S_IRUGO, show_pwm_freq, NULL, 3);
+static SENSOR_DEVICE_ATTR(pwm4_auto_channels_temp, S_IRUGO,
+ show_pwm_temp_map, set_pwm_temp_map, 3);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point1_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point2_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_point3_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm4_auto_start, S_IRUGO | S_IWUSR,
+ show_auto_pwm, set_auto_pwm, 3, 0);
+static SENSOR_DEVICE_ATTR(pwm4_auto_slope, S_IRUGO | S_IWUSR,
+ show_auto_pwm_slope, set_auto_pwm_slope, 3);
+
+static SENSOR_DEVICE_ATTR(pwm5_enable, S_IRUGO | S_IWUSR,
+ show_pwm_enable, set_pwm_enable, 4);
+static SENSOR_DEVICE_ATTR(pwm5, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 4);
+static SENSOR_DEVICE_ATTR(pwm5_freq, S_IRUGO, show_pwm_freq, NULL, 4);
+static SENSOR_DEVICE_ATTR(pwm5_auto_channels_temp, S_IRUGO,
+ show_pwm_temp_map, set_pwm_temp_map, 4);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point1_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point2_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_point3_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm5_auto_start, S_IRUGO | S_IWUSR,
+ show_auto_pwm, set_auto_pwm, 4, 0);
+static SENSOR_DEVICE_ATTR(pwm5_auto_slope, S_IRUGO | S_IWUSR,
+ show_auto_pwm_slope, set_auto_pwm_slope, 4);
+
+static SENSOR_DEVICE_ATTR(pwm6_enable, S_IRUGO | S_IWUSR,
+ show_pwm_enable, set_pwm_enable, 5);
+static SENSOR_DEVICE_ATTR(pwm6, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 5);
+static SENSOR_DEVICE_ATTR(pwm6_freq, S_IRUGO, show_pwm_freq, NULL, 5);
+static SENSOR_DEVICE_ATTR(pwm6_auto_channels_temp, S_IRUGO,
+ show_pwm_temp_map, set_pwm_temp_map, 5);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point1_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 1);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point1_temp_hyst, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 0);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point2_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 2);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_point3_temp, S_IRUGO | S_IWUSR,
+ show_auto_temp, set_auto_temp, 2, 3);
+static SENSOR_DEVICE_ATTR_2(pwm6_auto_start, S_IRUGO | S_IWUSR,
+ show_auto_pwm, set_auto_pwm, 5, 0);
+static SENSOR_DEVICE_ATTR(pwm6_auto_slope, S_IRUGO | S_IWUSR,
+ show_auto_pwm_slope, set_auto_pwm_slope, 5);
/* Alarms */
static ssize_t show_alarms(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
struct it87_data *data = it87_update_device(dev);
+
return sprintf(buf, "%u\n", data->alarms);
}
static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
- int bitnr = to_sensor_dev_attr(attr)->index;
struct it87_data *data = it87_update_device(dev);
+ int bitnr = to_sensor_dev_attr(attr)->index;
+
return sprintf(buf, "%u\n", (data->alarms >> bitnr) & 1);
}
-static ssize_t clear_intrusion(struct device *dev, struct device_attribute
- *attr, const char *buf, size_t count)
+static ssize_t clear_intrusion(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
{
struct it87_data *data = dev_get_drvdata(dev);
- long val;
int config;
+ long val;
if (kstrtol(buf, 10, &val) < 0 || val != 0)
return -EINVAL;
@@ -1412,7 +1796,7 @@
if (config < 0) {
count = config;
} else {
- config |= 1 << 5;
+ config |= BIT(5);
it87_write_value(data, IT87_REG_CONFIG, config);
/* Invalidate cache to force re-read */
data->valid = 0;
@@ -1443,29 +1827,30 @@
show_alarm, clear_intrusion, 4);
static ssize_t show_beep(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
- int bitnr = to_sensor_dev_attr(attr)->index;
struct it87_data *data = it87_update_device(dev);
+ int bitnr = to_sensor_dev_attr(attr)->index;
+
return sprintf(buf, "%u\n", (data->beeps >> bitnr) & 1);
}
+
static ssize_t set_beep(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+ const char *buf, size_t count)
{
int bitnr = to_sensor_dev_attr(attr)->index;
struct it87_data *data = dev_get_drvdata(dev);
long val;
- if (kstrtol(buf, 10, &val) < 0
- || (val != 0 && val != 1))
+ if (kstrtol(buf, 10, &val) < 0 || (val != 0 && val != 1))
return -EINVAL;
mutex_lock(&data->update_lock);
data->beeps = it87_read_value(data, IT87_REG_BEEP_ENABLE);
if (val)
- data->beeps |= (1 << bitnr);
+ data->beeps |= BIT(bitnr);
else
- data->beeps &= ~(1 << bitnr);
+ data->beeps &= ~BIT(bitnr);
it87_write_value(data, IT87_REG_BEEP_ENABLE, data->beeps);
mutex_unlock(&data->update_lock);
return count;
@@ -1493,13 +1878,15 @@
static SENSOR_DEVICE_ATTR(temp3_beep, S_IRUGO, show_beep, NULL, 2);
static ssize_t show_vrm_reg(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
struct it87_data *data = dev_get_drvdata(dev);
+
return sprintf(buf, "%u\n", data->vrm);
}
+
static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+ const char *buf, size_t count)
{
struct it87_data *data = dev_get_drvdata(dev);
unsigned long val;
@@ -1514,15 +1901,16 @@
static DEVICE_ATTR(vrm, S_IRUGO | S_IWUSR, show_vrm_reg, store_vrm_reg);
static ssize_t show_vid_reg(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
struct it87_data *data = it87_update_device(dev);
- return sprintf(buf, "%ld\n", (long) vid_from_reg(data->vid, data->vrm));
+
+ return sprintf(buf, "%ld\n", (long)vid_from_reg(data->vid, data->vrm));
}
static DEVICE_ATTR(cpu0_vid, S_IRUGO, show_vid_reg, NULL);
static ssize_t show_label(struct device *dev, struct device_attribute *attr,
- char *buf)
+ char *buf)
{
static const char * const labels[] = {
"+5V",
@@ -1548,227 +1936,348 @@
static SENSOR_DEVICE_ATTR(in3_label, S_IRUGO, show_label, NULL, 0);
static SENSOR_DEVICE_ATTR(in7_label, S_IRUGO, show_label, NULL, 1);
static SENSOR_DEVICE_ATTR(in8_label, S_IRUGO, show_label, NULL, 2);
-/* special AVCC3 IT8603E in9 */
+/* AVCC3 */
static SENSOR_DEVICE_ATTR(in9_label, S_IRUGO, show_label, NULL, 0);
-static ssize_t show_name(struct device *dev, struct device_attribute
- *devattr, char *buf)
+static umode_t it87_in_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
{
+ struct device *dev = container_of(kobj, struct device, kobj);
struct it87_data *data = dev_get_drvdata(dev);
- return sprintf(buf, "%s\n", data->name);
-}
-static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+ int i = index / 5; /* voltage index */
+ int a = index % 5; /* attribute index */
-static struct attribute *it87_attributes_in[10][5] = {
-{
+ if (index >= 40) { /* in8 and higher only have input attributes */
+ i = index - 40 + 8;
+ a = 0;
+ }
+
+ if (!(data->has_in & BIT(i)))
+ return 0;
+
+ if (a == 4 && !data->has_beep)
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute *it87_attributes_in[] = {
&sensor_dev_attr_in0_input.dev_attr.attr,
&sensor_dev_attr_in0_min.dev_attr.attr,
&sensor_dev_attr_in0_max.dev_attr.attr,
&sensor_dev_attr_in0_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in0_beep.dev_attr.attr, /* 4 */
+
&sensor_dev_attr_in1_input.dev_attr.attr,
&sensor_dev_attr_in1_min.dev_attr.attr,
&sensor_dev_attr_in1_max.dev_attr.attr,
&sensor_dev_attr_in1_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in1_beep.dev_attr.attr, /* 9 */
+
&sensor_dev_attr_in2_input.dev_attr.attr,
&sensor_dev_attr_in2_min.dev_attr.attr,
&sensor_dev_attr_in2_max.dev_attr.attr,
&sensor_dev_attr_in2_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in2_beep.dev_attr.attr, /* 14 */
+
&sensor_dev_attr_in3_input.dev_attr.attr,
&sensor_dev_attr_in3_min.dev_attr.attr,
&sensor_dev_attr_in3_max.dev_attr.attr,
&sensor_dev_attr_in3_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in3_beep.dev_attr.attr, /* 19 */
+
&sensor_dev_attr_in4_input.dev_attr.attr,
&sensor_dev_attr_in4_min.dev_attr.attr,
&sensor_dev_attr_in4_max.dev_attr.attr,
&sensor_dev_attr_in4_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in4_beep.dev_attr.attr, /* 24 */
+
&sensor_dev_attr_in5_input.dev_attr.attr,
&sensor_dev_attr_in5_min.dev_attr.attr,
&sensor_dev_attr_in5_max.dev_attr.attr,
&sensor_dev_attr_in5_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in5_beep.dev_attr.attr, /* 29 */
+
&sensor_dev_attr_in6_input.dev_attr.attr,
&sensor_dev_attr_in6_min.dev_attr.attr,
&sensor_dev_attr_in6_max.dev_attr.attr,
&sensor_dev_attr_in6_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_in6_beep.dev_attr.attr, /* 34 */
+
&sensor_dev_attr_in7_input.dev_attr.attr,
&sensor_dev_attr_in7_min.dev_attr.attr,
&sensor_dev_attr_in7_max.dev_attr.attr,
&sensor_dev_attr_in7_alarm.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_in8_input.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_in9_input.dev_attr.attr,
- NULL
-} };
+ &sensor_dev_attr_in7_beep.dev_attr.attr, /* 39 */
-static const struct attribute_group it87_group_in[10] = {
- { .attrs = it87_attributes_in[0] },
- { .attrs = it87_attributes_in[1] },
- { .attrs = it87_attributes_in[2] },
- { .attrs = it87_attributes_in[3] },
- { .attrs = it87_attributes_in[4] },
- { .attrs = it87_attributes_in[5] },
- { .attrs = it87_attributes_in[6] },
- { .attrs = it87_attributes_in[7] },
- { .attrs = it87_attributes_in[8] },
- { .attrs = it87_attributes_in[9] },
+ &sensor_dev_attr_in8_input.dev_attr.attr, /* 40 */
+ &sensor_dev_attr_in9_input.dev_attr.attr, /* 41 */
+ &sensor_dev_attr_in10_input.dev_attr.attr, /* 41 */
+ &sensor_dev_attr_in11_input.dev_attr.attr, /* 41 */
+ &sensor_dev_attr_in12_input.dev_attr.attr, /* 41 */
};
-static struct attribute *it87_attributes_temp[3][6] = {
+static const struct attribute_group it87_group_in = {
+ .attrs = it87_attributes_in,
+ .is_visible = it87_in_is_visible,
+};
+
+static umode_t it87_temp_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct it87_data *data = dev_get_drvdata(dev);
+ int i = index / 7; /* temperature index */
+ int a = index % 7; /* attribute index */
+
+ if (index >= 21) {
+ i = index - 21 + 3;
+ a = 0;
+ }
+
+ if (!(data->has_temp & BIT(i)))
+ return 0;
+
+ if (a == 5 && !has_temp_offset(data))
+ return 0;
+
+ if (a == 6 && !data->has_beep)
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute *it87_attributes_temp[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
&sensor_dev_attr_temp1_max.dev_attr.attr,
&sensor_dev_attr_temp1_min.dev_attr.attr,
&sensor_dev_attr_temp1_type.dev_attr.attr,
&sensor_dev_attr_temp1_alarm.dev_attr.attr,
- NULL
-} , {
- &sensor_dev_attr_temp2_input.dev_attr.attr,
+ &sensor_dev_attr_temp1_offset.dev_attr.attr, /* 5 */
+ &sensor_dev_attr_temp1_beep.dev_attr.attr, /* 6 */
+
+ &sensor_dev_attr_temp2_input.dev_attr.attr, /* 7 */
&sensor_dev_attr_temp2_max.dev_attr.attr,
&sensor_dev_attr_temp2_min.dev_attr.attr,
&sensor_dev_attr_temp2_type.dev_attr.attr,
&sensor_dev_attr_temp2_alarm.dev_attr.attr,
- NULL
-} , {
- &sensor_dev_attr_temp3_input.dev_attr.attr,
+ &sensor_dev_attr_temp2_offset.dev_attr.attr,
+ &sensor_dev_attr_temp2_beep.dev_attr.attr,
+
+ &sensor_dev_attr_temp3_input.dev_attr.attr, /* 14 */
&sensor_dev_attr_temp3_max.dev_attr.attr,
&sensor_dev_attr_temp3_min.dev_attr.attr,
&sensor_dev_attr_temp3_type.dev_attr.attr,
&sensor_dev_attr_temp3_alarm.dev_attr.attr,
- NULL
-} };
-
-static const struct attribute_group it87_group_temp[3] = {
- { .attrs = it87_attributes_temp[0] },
- { .attrs = it87_attributes_temp[1] },
- { .attrs = it87_attributes_temp[2] },
-};
-
-static struct attribute *it87_attributes_temp_offset[] = {
- &sensor_dev_attr_temp1_offset.dev_attr.attr,
- &sensor_dev_attr_temp2_offset.dev_attr.attr,
&sensor_dev_attr_temp3_offset.dev_attr.attr,
+ &sensor_dev_attr_temp3_beep.dev_attr.attr,
+
+ &sensor_dev_attr_temp4_input.dev_attr.attr, /* 21 */
+ &sensor_dev_attr_temp5_input.dev_attr.attr,
+ &sensor_dev_attr_temp6_input.dev_attr.attr,
+ NULL
};
+static const struct attribute_group it87_group_temp = {
+ .attrs = it87_attributes_temp,
+ .is_visible = it87_temp_is_visible,
+};
+
+static umode_t it87_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct it87_data *data = dev_get_drvdata(dev);
+
+ if ((index == 2 || index == 3) && !data->has_vid)
+ return 0;
+
+ if (index > 3 && !(data->in_internal & BIT(index - 4)))
+ return 0;
+
+ return attr->mode;
+}
+
static struct attribute *it87_attributes[] = {
&dev_attr_alarms.attr,
&sensor_dev_attr_intrusion0_alarm.dev_attr.attr,
- &dev_attr_name.attr,
+ &dev_attr_vrm.attr, /* 2 */
+ &dev_attr_cpu0_vid.attr, /* 3 */
+ &sensor_dev_attr_in3_label.dev_attr.attr, /* 4 .. 7 */
+ &sensor_dev_attr_in7_label.dev_attr.attr,
+ &sensor_dev_attr_in8_label.dev_attr.attr,
+ &sensor_dev_attr_in9_label.dev_attr.attr,
NULL
};
static const struct attribute_group it87_group = {
.attrs = it87_attributes,
+ .is_visible = it87_is_visible,
};
-static struct attribute *it87_attributes_in_beep[] = {
- &sensor_dev_attr_in0_beep.dev_attr.attr,
- &sensor_dev_attr_in1_beep.dev_attr.attr,
- &sensor_dev_attr_in2_beep.dev_attr.attr,
- &sensor_dev_attr_in3_beep.dev_attr.attr,
- &sensor_dev_attr_in4_beep.dev_attr.attr,
- &sensor_dev_attr_in5_beep.dev_attr.attr,
- &sensor_dev_attr_in6_beep.dev_attr.attr,
- &sensor_dev_attr_in7_beep.dev_attr.attr,
- NULL,
- NULL,
-};
+static umode_t it87_fan_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct it87_data *data = dev_get_drvdata(dev);
+ int i = index / 5; /* fan index */
+ int a = index % 5; /* attribute index */
-static struct attribute *it87_attributes_temp_beep[] = {
- &sensor_dev_attr_temp1_beep.dev_attr.attr,
- &sensor_dev_attr_temp2_beep.dev_attr.attr,
- &sensor_dev_attr_temp3_beep.dev_attr.attr,
-};
+ if (index >= 15) { /* fan 4..6 don't have divisor attributes */
+ i = (index - 15) / 4 + 3;
+ a = (index - 15) % 4;
+ }
-static struct attribute *it87_attributes_fan[6][3+1] = { {
+ if (!(data->has_fan & BIT(i)))
+ return 0;
+
+ if (a == 3) { /* beep */
+ if (!data->has_beep)
+ return 0;
+ /* first fan beep attribute is writable */
+ if (i == __ffs(data->has_fan))
+ return attr->mode | S_IWUSR;
+ }
+
+ if (a == 4 && has_16bit_fans(data)) /* divisor */
+ return 0;
+
+ return attr->mode;
+}
+
+static struct attribute *it87_attributes_fan[] = {
&sensor_dev_attr_fan1_input.dev_attr.attr,
&sensor_dev_attr_fan1_min.dev_attr.attr,
&sensor_dev_attr_fan1_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_fan1_beep.dev_attr.attr, /* 3 */
+ &sensor_dev_attr_fan1_div.dev_attr.attr, /* 4 */
+
&sensor_dev_attr_fan2_input.dev_attr.attr,
&sensor_dev_attr_fan2_min.dev_attr.attr,
&sensor_dev_attr_fan2_alarm.dev_attr.attr,
- NULL
-}, {
+ &sensor_dev_attr_fan2_beep.dev_attr.attr,
+ &sensor_dev_attr_fan2_div.dev_attr.attr, /* 9 */
+
&sensor_dev_attr_fan3_input.dev_attr.attr,
&sensor_dev_attr_fan3_min.dev_attr.attr,
&sensor_dev_attr_fan3_alarm.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_fan4_input.dev_attr.attr,
+ &sensor_dev_attr_fan3_beep.dev_attr.attr,
+ &sensor_dev_attr_fan3_div.dev_attr.attr, /* 14 */
+
+ &sensor_dev_attr_fan4_input.dev_attr.attr, /* 15 */
&sensor_dev_attr_fan4_min.dev_attr.attr,
&sensor_dev_attr_fan4_alarm.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_fan5_input.dev_attr.attr,
+ &sensor_dev_attr_fan4_beep.dev_attr.attr,
+
+ &sensor_dev_attr_fan5_input.dev_attr.attr, /* 19 */
&sensor_dev_attr_fan5_min.dev_attr.attr,
&sensor_dev_attr_fan5_alarm.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_fan6_input.dev_attr.attr,
+ &sensor_dev_attr_fan5_beep.dev_attr.attr,
+
+ &sensor_dev_attr_fan6_input.dev_attr.attr, /* 23 */
&sensor_dev_attr_fan6_min.dev_attr.attr,
&sensor_dev_attr_fan6_alarm.dev_attr.attr,
+ &sensor_dev_attr_fan6_beep.dev_attr.attr,
NULL
-} };
-
-static const struct attribute_group it87_group_fan[6] = {
- { .attrs = it87_attributes_fan[0] },
- { .attrs = it87_attributes_fan[1] },
- { .attrs = it87_attributes_fan[2] },
- { .attrs = it87_attributes_fan[3] },
- { .attrs = it87_attributes_fan[4] },
- { .attrs = it87_attributes_fan[5] },
};
-static const struct attribute *it87_attributes_fan_div[] = {
- &sensor_dev_attr_fan1_div.dev_attr.attr,
- &sensor_dev_attr_fan2_div.dev_attr.attr,
- &sensor_dev_attr_fan3_div.dev_attr.attr,
+static const struct attribute_group it87_group_fan = {
+ .attrs = it87_attributes_fan,
+ .is_visible = it87_fan_is_visible,
};
-static struct attribute *it87_attributes_pwm[3][4+1] = { {
+static umode_t it87_pwm_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct it87_data *data = dev_get_drvdata(dev);
+ int i = index / 4; /* pwm index */
+ int a = index % 4; /* attribute index */
+
+ if (!(data->has_pwm & BIT(i)))
+ return 0;
+
+ /* pwmX_auto_channels_temp is only writable if auto pwm is supported */
+ if (a == 3 && (has_old_autopwm(data) || has_newer_autopwm(data)))
+ return attr->mode | S_IWUSR;
+
+ /* pwm2_freq is writable if there are two pwm frequency selects */
+ if (has_pwm_freq2(data) && i == 1 && a == 2)
+ return attr->mode | S_IWUSR;
+
+ return attr->mode;
+}
+
+static struct attribute *it87_attributes_pwm[] = {
&sensor_dev_attr_pwm1_enable.dev_attr.attr,
&sensor_dev_attr_pwm1.dev_attr.attr,
- &dev_attr_pwm1_freq.attr,
+ &sensor_dev_attr_pwm1_freq.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_channels_temp.dev_attr.attr,
- NULL
-}, {
+
&sensor_dev_attr_pwm2_enable.dev_attr.attr,
&sensor_dev_attr_pwm2.dev_attr.attr,
- &dev_attr_pwm2_freq.attr,
+ &sensor_dev_attr_pwm2_freq.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_channels_temp.dev_attr.attr,
- NULL
-}, {
+
&sensor_dev_attr_pwm3_enable.dev_attr.attr,
&sensor_dev_attr_pwm3.dev_attr.attr,
- &dev_attr_pwm3_freq.attr,
+ &sensor_dev_attr_pwm3_freq.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_channels_temp.dev_attr.attr,
- NULL
-} };
-static const struct attribute_group it87_group_pwm[3] = {
- { .attrs = it87_attributes_pwm[0] },
- { .attrs = it87_attributes_pwm[1] },
- { .attrs = it87_attributes_pwm[2] },
+ &sensor_dev_attr_pwm4_enable.dev_attr.attr,
+ &sensor_dev_attr_pwm4.dev_attr.attr,
+ &sensor_dev_attr_pwm4_freq.dev_attr.attr,
+ &sensor_dev_attr_pwm4_auto_channels_temp.dev_attr.attr,
+
+ &sensor_dev_attr_pwm5_enable.dev_attr.attr,
+ &sensor_dev_attr_pwm5.dev_attr.attr,
+ &sensor_dev_attr_pwm5_freq.dev_attr.attr,
+ &sensor_dev_attr_pwm5_auto_channels_temp.dev_attr.attr,
+
+ &sensor_dev_attr_pwm6_enable.dev_attr.attr,
+ &sensor_dev_attr_pwm6.dev_attr.attr,
+ &sensor_dev_attr_pwm6_freq.dev_attr.attr,
+ &sensor_dev_attr_pwm6_auto_channels_temp.dev_attr.attr,
+
+ NULL
};
-static struct attribute *it87_attributes_autopwm[3][9+1] = { {
+static const struct attribute_group it87_group_pwm = {
+ .attrs = it87_attributes_pwm,
+ .is_visible = it87_pwm_is_visible,
+};
+
+static umode_t it87_auto_pwm_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct it87_data *data = dev_get_drvdata(dev);
+ int i = index / 11; /* pwm index */
+ int a = index % 11; /* attribute index */
+
+ if (index >= 33) { /* pwm 4..6 */
+ i = (index - 33) / 6 + 3;
+ a = (index - 33) % 6 + 4;
+ }
+
+ if (!(data->has_pwm & BIT(i)))
+ return 0;
+
+ if (has_newer_autopwm(data)) {
+ if (a < 4) /* no auto point pwm */
+ return 0;
+ if (a == 8) /* no auto_point4 */
+ return 0;
+ }
+ if (has_old_autopwm(data)) {
+ if (a >= 9) /* no pwm_auto_start, pwm_auto_slope */
+ return 0;
+ }
+
+ return attr->mode;
+}
+
+static struct attribute *it87_attributes_auto_pwm[] = {
&sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_point3_pwm.dev_attr.attr,
@@ -1778,9 +2287,10 @@
&sensor_dev_attr_pwm1_auto_point2_temp.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_point3_temp.dev_attr.attr,
&sensor_dev_attr_pwm1_auto_point4_temp.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_start.dev_attr.attr,
+ &sensor_dev_attr_pwm1_auto_slope.dev_attr.attr,
+
+ &sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr, /* 11 */
&sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_point3_pwm.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_point4_pwm.dev_attr.attr,
@@ -1789,9 +2299,10 @@
&sensor_dev_attr_pwm2_auto_point2_temp.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_point3_temp.dev_attr.attr,
&sensor_dev_attr_pwm2_auto_point4_temp.dev_attr.attr,
- NULL
-}, {
- &sensor_dev_attr_pwm3_auto_point1_pwm.dev_attr.attr,
+ &sensor_dev_attr_pwm2_auto_start.dev_attr.attr,
+ &sensor_dev_attr_pwm2_auto_slope.dev_attr.attr,
+
+ &sensor_dev_attr_pwm3_auto_point1_pwm.dev_attr.attr, /* 22 */
&sensor_dev_attr_pwm3_auto_point2_pwm.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_point3_pwm.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_point4_pwm.dev_attr.attr,
@@ -1800,61 +2311,53 @@
&sensor_dev_attr_pwm3_auto_point2_temp.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_point3_temp.dev_attr.attr,
&sensor_dev_attr_pwm3_auto_point4_temp.dev_attr.attr,
- NULL
-} };
+ &sensor_dev_attr_pwm3_auto_start.dev_attr.attr,
+ &sensor_dev_attr_pwm3_auto_slope.dev_attr.attr,
-static const struct attribute_group it87_group_autopwm[3] = {
- { .attrs = it87_attributes_autopwm[0] },
- { .attrs = it87_attributes_autopwm[1] },
- { .attrs = it87_attributes_autopwm[2] },
+ &sensor_dev_attr_pwm4_auto_point1_temp.dev_attr.attr, /* 33 */
+ &sensor_dev_attr_pwm4_auto_point1_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm4_auto_point2_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm4_auto_point3_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm4_auto_start.dev_attr.attr,
+ &sensor_dev_attr_pwm4_auto_slope.dev_attr.attr,
+
+ &sensor_dev_attr_pwm5_auto_point1_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm5_auto_point1_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm5_auto_point2_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm5_auto_point3_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm5_auto_start.dev_attr.attr,
+ &sensor_dev_attr_pwm5_auto_slope.dev_attr.attr,
+
+ &sensor_dev_attr_pwm6_auto_point1_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm6_auto_point1_temp_hyst.dev_attr.attr,
+ &sensor_dev_attr_pwm6_auto_point2_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm6_auto_point3_temp.dev_attr.attr,
+ &sensor_dev_attr_pwm6_auto_start.dev_attr.attr,
+ &sensor_dev_attr_pwm6_auto_slope.dev_attr.attr,
+
+ NULL,
};
-static struct attribute *it87_attributes_fan_beep[] = {
- &sensor_dev_attr_fan1_beep.dev_attr.attr,
- &sensor_dev_attr_fan2_beep.dev_attr.attr,
- &sensor_dev_attr_fan3_beep.dev_attr.attr,
- &sensor_dev_attr_fan4_beep.dev_attr.attr,
- &sensor_dev_attr_fan5_beep.dev_attr.attr,
- &sensor_dev_attr_fan6_beep.dev_attr.attr,
-};
-
-static struct attribute *it87_attributes_vid[] = {
- &dev_attr_vrm.attr,
- &dev_attr_cpu0_vid.attr,
- NULL
-};
-
-static const struct attribute_group it87_group_vid = {
- .attrs = it87_attributes_vid,
-};
-
-static struct attribute *it87_attributes_label[] = {
- &sensor_dev_attr_in3_label.dev_attr.attr,
- &sensor_dev_attr_in7_label.dev_attr.attr,
- &sensor_dev_attr_in8_label.dev_attr.attr,
- &sensor_dev_attr_in9_label.dev_attr.attr,
- NULL
-};
-
-static const struct attribute_group it87_group_label = {
- .attrs = it87_attributes_label,
+static const struct attribute_group it87_group_auto_pwm = {
+ .attrs = it87_attributes_auto_pwm,
+ .is_visible = it87_auto_pwm_is_visible,
};
/* SuperIO detection - will change isa_address if a chip is found */
-static int __init it87_find(unsigned short *address,
- struct it87_sio_data *sio_data)
+static int __init it87_find(int sioaddr, unsigned short *address,
+ struct it87_sio_data *sio_data)
{
int err;
u16 chip_type;
const char *board_vendor, *board_name;
const struct it87_devices *config;
- err = superio_enter();
+ err = superio_enter(sioaddr);
if (err)
return err;
err = -ENODEV;
- chip_type = force_id ? force_id : superio_inw(DEVID);
+ chip_type = force_id ? force_id : superio_inw(sioaddr, DEVID);
switch (chip_type) {
case IT8705F_DEVID:
@@ -1910,6 +2413,9 @@
case IT8620E_DEVID:
sio_data->type = it8620;
break;
+ case IT8628E_DEVID:
+ sio_data->type = it8628;
+ break;
case 0xffff: /* No device at all */
goto exit;
default:
@@ -1917,20 +2423,20 @@
goto exit;
}
- superio_select(PME);
- if (!(superio_inb(IT87_ACT_REG) & 0x01)) {
+ superio_select(sioaddr, PME);
+ if (!(superio_inb(sioaddr, IT87_ACT_REG) & 0x01)) {
pr_info("Device not activated, skipping\n");
goto exit;
}
- *address = superio_inw(IT87_BASE_REG) & ~(IT87_EXTENT - 1);
+ *address = superio_inw(sioaddr, IT87_BASE_REG) & ~(IT87_EXTENT - 1);
if (*address == 0) {
pr_info("Base address not set, skipping\n");
goto exit;
}
err = 0;
- sio_data->revision = superio_inb(DEVREV) & 0x0f;
+ sio_data->revision = superio_inb(sioaddr, DEVREV) & 0x0f;
pr_info("Found IT%04x%s chip at 0x%x, revision %d\n", chip_type,
it87_devices[sio_data->type].suffix,
*address, sio_data->revision);
@@ -1939,14 +2445,19 @@
/* in7 (VSB or VCCH5V) is always internal on some chips */
if (has_in7_internal(config))
- sio_data->internal |= (1 << 1);
+ sio_data->internal |= BIT(1);
/* in8 (Vbat) is always internal */
- sio_data->internal |= (1 << 2);
+ sio_data->internal |= BIT(2);
- /* Only the IT8603E has in9 */
- if (sio_data->type != it8603)
- sio_data->skip_in |= (1 << 9);
+ /* in9 (AVCC3), always internal if supported */
+ if (has_avcc3(config))
+ sio_data->internal |= BIT(3); /* in9 is AVCC */
+ else
+ sio_data->skip_in |= BIT(9);
+
+ if (!has_six_pwm(config))
+ sio_data->skip_pwm |= BIT(3) | BIT(4) | BIT(5);
if (!has_vid(config))
sio_data->skip_vid = 1;
@@ -1954,45 +2465,46 @@
/* Read GPIO config and VID value from LDN 7 (GPIO) */
if (sio_data->type == it87) {
/* The IT8705F has a different LD number for GPIO */
- superio_select(5);
- sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+ superio_select(sioaddr, 5);
+ sio_data->beep_pin = superio_inb(sioaddr,
+ IT87_SIO_BEEP_PIN_REG) & 0x3f;
} else if (sio_data->type == it8783) {
int reg25, reg27, reg2a, reg2c, regef;
- superio_select(GPIO);
+ superio_select(sioaddr, GPIO);
- reg25 = superio_inb(IT87_SIO_GPIO1_REG);
- reg27 = superio_inb(IT87_SIO_GPIO3_REG);
- reg2a = superio_inb(IT87_SIO_PINX1_REG);
- reg2c = superio_inb(IT87_SIO_PINX2_REG);
- regef = superio_inb(IT87_SIO_SPI_REG);
+ reg25 = superio_inb(sioaddr, IT87_SIO_GPIO1_REG);
+ reg27 = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
+ reg2a = superio_inb(sioaddr, IT87_SIO_PINX1_REG);
+ reg2c = superio_inb(sioaddr, IT87_SIO_PINX2_REG);
+ regef = superio_inb(sioaddr, IT87_SIO_SPI_REG);
/* Check if fan3 is there or not */
- if ((reg27 & (1 << 0)) || !(reg2c & (1 << 2)))
- sio_data->skip_fan |= (1 << 2);
- if ((reg25 & (1 << 4))
- || (!(reg2a & (1 << 1)) && (regef & (1 << 0))))
- sio_data->skip_pwm |= (1 << 2);
+ if ((reg27 & BIT(0)) || !(reg2c & BIT(2)))
+ sio_data->skip_fan |= BIT(2);
+ if ((reg25 & BIT(4)) ||
+ (!(reg2a & BIT(1)) && (regef & BIT(0))))
+ sio_data->skip_pwm |= BIT(2);
/* Check if fan2 is there or not */
- if (reg27 & (1 << 7))
- sio_data->skip_fan |= (1 << 1);
- if (reg27 & (1 << 3))
- sio_data->skip_pwm |= (1 << 1);
+ if (reg27 & BIT(7))
+ sio_data->skip_fan |= BIT(1);
+ if (reg27 & BIT(3))
+ sio_data->skip_pwm |= BIT(1);
/* VIN5 */
- if ((reg27 & (1 << 0)) || (reg2c & (1 << 2)))
- sio_data->skip_in |= (1 << 5); /* No VIN5 */
+ if ((reg27 & BIT(0)) || (reg2c & BIT(2)))
+ sio_data->skip_in |= BIT(5); /* No VIN5 */
/* VIN6 */
- if (reg27 & (1 << 1))
- sio_data->skip_in |= (1 << 6); /* No VIN6 */
+ if (reg27 & BIT(1))
+ sio_data->skip_in |= BIT(6); /* No VIN6 */
/*
* VIN7
* Does not depend on bit 2 of Reg2C, contrary to datasheet.
*/
- if (reg27 & (1 << 2)) {
+ if (reg27 & BIT(2)) {
/*
* The data sheet is a bit unclear regarding the
* internal voltage divider for VCCH5V. It says
@@ -2006,81 +2518,121 @@
* not the case, and ask the user to report if the
* resulting voltage is sane.
*/
- if (!(reg2c & (1 << 1))) {
- reg2c |= (1 << 1);
- superio_outb(IT87_SIO_PINX2_REG, reg2c);
+ if (!(reg2c & BIT(1))) {
+ reg2c |= BIT(1);
+ superio_outb(sioaddr, IT87_SIO_PINX2_REG,
+ reg2c);
pr_notice("Routing internal VCCH5V to in7.\n");
}
pr_notice("in7 routed to internal voltage divider, with external pin disabled.\n");
pr_notice("Please report if it displays a reasonable voltage.\n");
}
- if (reg2c & (1 << 0))
- sio_data->internal |= (1 << 0);
- if (reg2c & (1 << 1))
- sio_data->internal |= (1 << 1);
+ if (reg2c & BIT(0))
+ sio_data->internal |= BIT(0);
+ if (reg2c & BIT(1))
+ sio_data->internal |= BIT(1);
- sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+ sio_data->beep_pin = superio_inb(sioaddr,
+ IT87_SIO_BEEP_PIN_REG) & 0x3f;
} else if (sio_data->type == it8603) {
int reg27, reg29;
- superio_select(GPIO);
+ superio_select(sioaddr, GPIO);
- reg27 = superio_inb(IT87_SIO_GPIO3_REG);
+ reg27 = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
/* Check if fan3 is there or not */
- if (reg27 & (1 << 6))
- sio_data->skip_pwm |= (1 << 2);
- if (reg27 & (1 << 7))
- sio_data->skip_fan |= (1 << 2);
+ if (reg27 & BIT(6))
+ sio_data->skip_pwm |= BIT(2);
+ if (reg27 & BIT(7))
+ sio_data->skip_fan |= BIT(2);
/* Check if fan2 is there or not */
- reg29 = superio_inb(IT87_SIO_GPIO5_REG);
- if (reg29 & (1 << 1))
- sio_data->skip_pwm |= (1 << 1);
- if (reg29 & (1 << 2))
- sio_data->skip_fan |= (1 << 1);
+ reg29 = superio_inb(sioaddr, IT87_SIO_GPIO5_REG);
+ if (reg29 & BIT(1))
+ sio_data->skip_pwm |= BIT(1);
+ if (reg29 & BIT(2))
+ sio_data->skip_fan |= BIT(1);
- sio_data->skip_in |= (1 << 5); /* No VIN5 */
- sio_data->skip_in |= (1 << 6); /* No VIN6 */
+ sio_data->skip_in |= BIT(5); /* No VIN5 */
+ sio_data->skip_in |= BIT(6); /* No VIN6 */
- sio_data->internal |= (1 << 3); /* in9 is AVCC */
-
- sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
- } else if (sio_data->type == it8620) {
+ sio_data->beep_pin = superio_inb(sioaddr,
+ IT87_SIO_BEEP_PIN_REG) & 0x3f;
+ } else if (sio_data->type == it8620 || sio_data->type == it8628) {
int reg;
- superio_select(GPIO);
+ superio_select(sioaddr, GPIO);
+
+ /* Check for pwm5 */
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO1_REG);
+ if (reg & BIT(6))
+ sio_data->skip_pwm |= BIT(4);
/* Check for fan4, fan5 */
- reg = superio_inb(IT87_SIO_GPIO2_REG);
- if (!(reg & (1 << 5)))
- sio_data->skip_fan |= (1 << 3);
- if (!(reg & (1 << 4)))
- sio_data->skip_fan |= (1 << 4);
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO2_REG);
+ if (!(reg & BIT(5)))
+ sio_data->skip_fan |= BIT(3);
+ if (!(reg & BIT(4)))
+ sio_data->skip_fan |= BIT(4);
/* Check for pwm3, fan3 */
- reg = superio_inb(IT87_SIO_GPIO3_REG);
- if (reg & (1 << 6))
- sio_data->skip_pwm |= (1 << 2);
- if (reg & (1 << 7))
- sio_data->skip_fan |= (1 << 2);
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
+ if (reg & BIT(6))
+ sio_data->skip_pwm |= BIT(2);
+ if (reg & BIT(7))
+ sio_data->skip_fan |= BIT(2);
+
+ /* Check for pwm4 */
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO4_REG);
+ if (!(reg & BIT(2)))
+ sio_data->skip_pwm |= BIT(3);
/* Check for pwm2, fan2 */
- reg = superio_inb(IT87_SIO_GPIO5_REG);
- if (reg & (1 << 1))
- sio_data->skip_pwm |= (1 << 1);
- if (reg & (1 << 2))
- sio_data->skip_fan |= (1 << 1);
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO5_REG);
+ if (reg & BIT(1))
+ sio_data->skip_pwm |= BIT(1);
+ if (reg & BIT(2))
+ sio_data->skip_fan |= BIT(1);
+ /* Check for pwm6, fan6 */
+ if (!(reg & BIT(7))) {
+ sio_data->skip_pwm |= BIT(5);
+ sio_data->skip_fan |= BIT(5);
+ }
- sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+ sio_data->beep_pin = superio_inb(sioaddr,
+ IT87_SIO_BEEP_PIN_REG) & 0x3f;
} else {
int reg;
bool uart6;
- superio_select(GPIO);
+ superio_select(sioaddr, GPIO);
- reg = superio_inb(IT87_SIO_GPIO3_REG);
+ /* Check for fan4, fan5 */
+ if (has_five_fans(config)) {
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO2_REG);
+ switch (sio_data->type) {
+ case it8718:
+ if (reg & BIT(5))
+ sio_data->skip_fan |= BIT(3);
+ if (reg & BIT(4))
+ sio_data->skip_fan |= BIT(4);
+ break;
+ case it8720:
+ case it8721:
+ case it8728:
+ if (!(reg & BIT(5)))
+ sio_data->skip_fan |= BIT(3);
+ if (!(reg & BIT(4)))
+ sio_data->skip_fan |= BIT(4);
+ break;
+ default:
+ break;
+ }
+ }
+
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO3_REG);
if (!sio_data->skip_vid) {
/* We need at least 4 VID pins */
if (reg & 0x0f) {
@@ -2090,25 +2642,26 @@
}
/* Check if fan3 is there or not */
- if (reg & (1 << 6))
- sio_data->skip_pwm |= (1 << 2);
- if (reg & (1 << 7))
- sio_data->skip_fan |= (1 << 2);
+ if (reg & BIT(6))
+ sio_data->skip_pwm |= BIT(2);
+ if (reg & BIT(7))
+ sio_data->skip_fan |= BIT(2);
/* Check if fan2 is there or not */
- reg = superio_inb(IT87_SIO_GPIO5_REG);
- if (reg & (1 << 1))
- sio_data->skip_pwm |= (1 << 1);
- if (reg & (1 << 2))
- sio_data->skip_fan |= (1 << 1);
+ reg = superio_inb(sioaddr, IT87_SIO_GPIO5_REG);
+ if (reg & BIT(1))
+ sio_data->skip_pwm |= BIT(1);
+ if (reg & BIT(2))
+ sio_data->skip_fan |= BIT(1);
- if ((sio_data->type == it8718 || sio_data->type == it8720)
- && !(sio_data->skip_vid))
- sio_data->vid_value = superio_inb(IT87_SIO_VID_REG);
+ if ((sio_data->type == it8718 || sio_data->type == it8720) &&
+ !(sio_data->skip_vid))
+ sio_data->vid_value = superio_inb(sioaddr,
+ IT87_SIO_VID_REG);
- reg = superio_inb(IT87_SIO_PINX2_REG);
+ reg = superio_inb(sioaddr, IT87_SIO_PINX2_REG);
- uart6 = sio_data->type == it8782 && (reg & (1 << 2));
+ uart6 = sio_data->type == it8782 && (reg & BIT(2));
/*
* The IT8720F has no VIN7 pin, so VCCH should always be
@@ -2124,15 +2677,15 @@
* If UART6 is enabled, re-route VIN7 to the internal divider
* if that is not already the case.
*/
- if ((sio_data->type == it8720 || uart6) && !(reg & (1 << 1))) {
- reg |= (1 << 1);
- superio_outb(IT87_SIO_PINX2_REG, reg);
+ if ((sio_data->type == it8720 || uart6) && !(reg & BIT(1))) {
+ reg |= BIT(1);
+ superio_outb(sioaddr, IT87_SIO_PINX2_REG, reg);
pr_notice("Routing internal VCCH to in7\n");
}
- if (reg & (1 << 0))
- sio_data->internal |= (1 << 0);
- if (reg & (1 << 1))
- sio_data->internal |= (1 << 1);
+ if (reg & BIT(0))
+ sio_data->internal |= BIT(0);
+ if (reg & BIT(1))
+ sio_data->internal |= BIT(1);
/*
* On IT8782F, UART6 pins overlap with VIN5, VIN6, and VIN7.
@@ -2144,11 +2697,12 @@
* temperature source here, skip_temp is preliminary.
*/
if (uart6) {
- sio_data->skip_in |= (1 << 5) | (1 << 6);
- sio_data->skip_temp |= (1 << 2);
+ sio_data->skip_in |= BIT(5) | BIT(6);
+ sio_data->skip_temp |= BIT(2);
}
- sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+ sio_data->beep_pin = superio_inb(sioaddr,
+ IT87_SIO_BEEP_PIN_REG) & 0x3f;
}
if (sio_data->beep_pin)
pr_info("Beeping is supported\n");
@@ -2157,8 +2711,8 @@
board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
board_name = dmi_get_system_info(DMI_BOARD_NAME);
if (board_vendor && board_name) {
- if (strcmp(board_vendor, "nVIDIA") == 0
- && strcmp(board_name, "FN68PT") == 0) {
+ if (strcmp(board_vendor, "nVIDIA") == 0 &&
+ strcmp(board_name, "FN68PT") == 0) {
/*
* On the Shuttle SN68PT, FAN_CTL2 is apparently not
* connected to a fan, but to something else. One user
@@ -2168,373 +2722,15 @@
* the same board is ever used in other systems.
*/
pr_info("Disabling pwm2 due to hardware constraints\n");
- sio_data->skip_pwm = (1 << 1);
+ sio_data->skip_pwm = BIT(1);
}
}
exit:
- superio_exit();
+ superio_exit(sioaddr);
return err;
}
-static void it87_remove_files(struct device *dev)
-{
- struct it87_data *data = platform_get_drvdata(pdev);
- struct it87_sio_data *sio_data = dev_get_platdata(dev);
- int i;
-
- sysfs_remove_group(&dev->kobj, &it87_group);
- for (i = 0; i < 10; i++) {
- if (sio_data->skip_in & (1 << i))
- continue;
- sysfs_remove_group(&dev->kobj, &it87_group_in[i]);
- if (it87_attributes_in_beep[i])
- sysfs_remove_file(&dev->kobj,
- it87_attributes_in_beep[i]);
- }
- for (i = 0; i < 3; i++) {
- if (!(data->has_temp & (1 << i)))
- continue;
- sysfs_remove_group(&dev->kobj, &it87_group_temp[i]);
- if (has_temp_offset(data))
- sysfs_remove_file(&dev->kobj,
- it87_attributes_temp_offset[i]);
- if (sio_data->beep_pin)
- sysfs_remove_file(&dev->kobj,
- it87_attributes_temp_beep[i]);
- }
- for (i = 0; i < 6; i++) {
- if (!(data->has_fan & (1 << i)))
- continue;
- sysfs_remove_group(&dev->kobj, &it87_group_fan[i]);
- if (sio_data->beep_pin)
- sysfs_remove_file(&dev->kobj,
- it87_attributes_fan_beep[i]);
- if (i < 3 && !has_16bit_fans(data))
- sysfs_remove_file(&dev->kobj,
- it87_attributes_fan_div[i]);
- }
- for (i = 0; i < 3; i++) {
- if (sio_data->skip_pwm & (1 << i))
- continue;
- sysfs_remove_group(&dev->kobj, &it87_group_pwm[i]);
- if (has_old_autopwm(data))
- sysfs_remove_group(&dev->kobj,
- &it87_group_autopwm[i]);
- }
- if (!sio_data->skip_vid)
- sysfs_remove_group(&dev->kobj, &it87_group_vid);
- sysfs_remove_group(&dev->kobj, &it87_group_label);
-}
-
-static int it87_probe(struct platform_device *pdev)
-{
- struct it87_data *data;
- struct resource *res;
- struct device *dev = &pdev->dev;
- struct it87_sio_data *sio_data = dev_get_platdata(dev);
- int err = 0, i;
- int enable_pwm_interface;
- int fan_beep_need_rw;
-
- res = platform_get_resource(pdev, IORESOURCE_IO, 0);
- if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT,
- DRVNAME)) {
- dev_err(dev, "Failed to request region 0x%lx-0x%lx\n",
- (unsigned long)res->start,
- (unsigned long)(res->start + IT87_EC_EXTENT - 1));
- return -EBUSY;
- }
-
- data = devm_kzalloc(&pdev->dev, sizeof(struct it87_data), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->addr = res->start;
- data->type = sio_data->type;
- data->features = it87_devices[sio_data->type].features;
- data->peci_mask = it87_devices[sio_data->type].peci_mask;
- data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask;
- data->name = it87_devices[sio_data->type].name;
- /*
- * IT8705F Datasheet 0.4.1, 3h == Version G.
- * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J.
- * These are the first revisions with 16-bit tachometer support.
- */
- switch (data->type) {
- case it87:
- if (sio_data->revision >= 0x03) {
- data->features &= ~FEAT_OLD_AUTOPWM;
- data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS;
- }
- break;
- case it8712:
- if (sio_data->revision >= 0x08) {
- data->features &= ~FEAT_OLD_AUTOPWM;
- data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS |
- FEAT_FIVE_FANS;
- }
- break;
- default:
- break;
- }
-
- /* Now, we do the remaining detection. */
- if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80)
- || it87_read_value(data, IT87_REG_CHIPID) != 0x90)
- return -ENODEV;
-
- platform_set_drvdata(pdev, data);
-
- mutex_init(&data->update_lock);
-
- /* Check PWM configuration */
- enable_pwm_interface = it87_check_pwm(dev);
-
- /* Starting with IT8721F, we handle scaling of internal voltages */
- if (has_12mv_adc(data)) {
- if (sio_data->internal & (1 << 0))
- data->in_scaled |= (1 << 3); /* in3 is AVCC */
- if (sio_data->internal & (1 << 1))
- data->in_scaled |= (1 << 7); /* in7 is VSB */
- if (sio_data->internal & (1 << 2))
- data->in_scaled |= (1 << 8); /* in8 is Vbat */
- if (sio_data->internal & (1 << 3))
- data->in_scaled |= (1 << 9); /* in9 is AVCC */
- } else if (sio_data->type == it8781 || sio_data->type == it8782 ||
- sio_data->type == it8783) {
- if (sio_data->internal & (1 << 0))
- data->in_scaled |= (1 << 3); /* in3 is VCC5V */
- if (sio_data->internal & (1 << 1))
- data->in_scaled |= (1 << 7); /* in7 is VCCH5V */
- }
-
- data->has_temp = 0x07;
- if (sio_data->skip_temp & (1 << 2)) {
- if (sio_data->type == it8782
- && !(it87_read_value(data, IT87_REG_TEMP_EXTRA) & 0x80))
- data->has_temp &= ~(1 << 2);
- }
-
- /* Initialize the IT87 chip */
- it87_init_device(pdev);
-
- /* Register sysfs hooks */
- err = sysfs_create_group(&dev->kobj, &it87_group);
- if (err)
- return err;
-
- for (i = 0; i < 10; i++) {
- if (sio_data->skip_in & (1 << i))
- continue;
- err = sysfs_create_group(&dev->kobj, &it87_group_in[i]);
- if (err)
- goto error;
- if (sio_data->beep_pin && it87_attributes_in_beep[i]) {
- err = sysfs_create_file(&dev->kobj,
- it87_attributes_in_beep[i]);
- if (err)
- goto error;
- }
- }
-
- for (i = 0; i < 3; i++) {
- if (!(data->has_temp & (1 << i)))
- continue;
- err = sysfs_create_group(&dev->kobj, &it87_group_temp[i]);
- if (err)
- goto error;
- if (has_temp_offset(data)) {
- err = sysfs_create_file(&dev->kobj,
- it87_attributes_temp_offset[i]);
- if (err)
- goto error;
- }
- if (sio_data->beep_pin) {
- err = sysfs_create_file(&dev->kobj,
- it87_attributes_temp_beep[i]);
- if (err)
- goto error;
- }
- }
-
- /* Do not create fan files for disabled fans */
- fan_beep_need_rw = 1;
- for (i = 0; i < 6; i++) {
- if (!(data->has_fan & (1 << i)))
- continue;
- err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]);
- if (err)
- goto error;
-
- if (i < 3 && !has_16bit_fans(data)) {
- err = sysfs_create_file(&dev->kobj,
- it87_attributes_fan_div[i]);
- if (err)
- goto error;
- }
-
- if (sio_data->beep_pin) {
- err = sysfs_create_file(&dev->kobj,
- it87_attributes_fan_beep[i]);
- if (err)
- goto error;
- if (!fan_beep_need_rw)
- continue;
-
- /*
- * As we have a single beep enable bit for all fans,
- * only the first enabled fan has a writable attribute
- * for it.
- */
- if (sysfs_chmod_file(&dev->kobj,
- it87_attributes_fan_beep[i],
- S_IRUGO | S_IWUSR))
- dev_dbg(dev, "chmod +w fan%d_beep failed\n",
- i + 1);
- fan_beep_need_rw = 0;
- }
- }
-
- if (enable_pwm_interface) {
- for (i = 0; i < 3; i++) {
- if (sio_data->skip_pwm & (1 << i))
- continue;
- err = sysfs_create_group(&dev->kobj,
- &it87_group_pwm[i]);
- if (err)
- goto error;
-
- if (!has_old_autopwm(data))
- continue;
- err = sysfs_create_group(&dev->kobj,
- &it87_group_autopwm[i]);
- if (err)
- goto error;
- }
- }
-
- if (!sio_data->skip_vid) {
- data->vrm = vid_which_vrm();
- /* VID reading from Super-I/O config space if available */
- data->vid = sio_data->vid_value;
- err = sysfs_create_group(&dev->kobj, &it87_group_vid);
- if (err)
- goto error;
- }
-
- /* Export labels for internal sensors */
- for (i = 0; i < 4; i++) {
- if (!(sio_data->internal & (1 << i)))
- continue;
- err = sysfs_create_file(&dev->kobj,
- it87_attributes_label[i]);
- if (err)
- goto error;
- }
-
- data->hwmon_dev = hwmon_device_register(dev);
- if (IS_ERR(data->hwmon_dev)) {
- err = PTR_ERR(data->hwmon_dev);
- goto error;
- }
-
- return 0;
-
-error:
- it87_remove_files(dev);
- return err;
-}
-
-static int it87_remove(struct platform_device *pdev)
-{
- struct it87_data *data = platform_get_drvdata(pdev);
-
- hwmon_device_unregister(data->hwmon_dev);
- it87_remove_files(&pdev->dev);
-
- return 0;
-}
-
-/*
- * Must be called with data->update_lock held, except during initialization.
- * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
- * would slow down the IT87 access and should not be necessary.
- */
-static int it87_read_value(struct it87_data *data, u8 reg)
-{
- outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
- return inb_p(data->addr + IT87_DATA_REG_OFFSET);
-}
-
-/*
- * Must be called with data->update_lock held, except during initialization.
- * We ignore the IT87 BUSY flag at this moment - it could lead to deadlocks,
- * would slow down the IT87 access and should not be necessary.
- */
-static void it87_write_value(struct it87_data *data, u8 reg, u8 value)
-{
- outb_p(reg, data->addr + IT87_ADDR_REG_OFFSET);
- outb_p(value, data->addr + IT87_DATA_REG_OFFSET);
-}
-
-/* Return 1 if and only if the PWM interface is safe to use */
-static int it87_check_pwm(struct device *dev)
-{
- struct it87_data *data = dev_get_drvdata(dev);
- /*
- * Some BIOSes fail to correctly configure the IT87 fans. All fans off
- * and polarity set to active low is sign that this is the case so we
- * disable pwm control to protect the user.
- */
- int tmp = it87_read_value(data, IT87_REG_FAN_CTL);
- if ((tmp & 0x87) == 0) {
- if (fix_pwm_polarity) {
- /*
- * The user asks us to attempt a chip reconfiguration.
- * This means switching to active high polarity and
- * inverting all fan speed values.
- */
- int i;
- u8 pwm[3];
-
- for (i = 0; i < 3; i++)
- pwm[i] = it87_read_value(data,
- IT87_REG_PWM(i));
-
- /*
- * If any fan is in automatic pwm mode, the polarity
- * might be correct, as suspicious as it seems, so we
- * better don't change anything (but still disable the
- * PWM interface).
- */
- if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) {
- dev_info(dev,
- "Reconfiguring PWM to active high polarity\n");
- it87_write_value(data, IT87_REG_FAN_CTL,
- tmp | 0x87);
- for (i = 0; i < 3; i++)
- it87_write_value(data,
- IT87_REG_PWM(i),
- 0x7f & ~pwm[i]);
- return 1;
- }
-
- dev_info(dev,
- "PWM configuration is too broken to be fixed\n");
- }
-
- dev_info(dev,
- "Detected broken BIOS defaults, disabling PWM interface\n");
- return 0;
- } else if (fix_pwm_polarity) {
- dev_info(dev,
- "PWM configuration looks sane, won't touch\n");
- }
-
- return 1;
-}
-
/* Called when we have found a new IT87. */
static void it87_init_device(struct platform_device *pdev)
{
@@ -2556,7 +2752,7 @@
* these have separate registers for the temperature mapping and the
* manual duty cycle.
*/
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < NUM_AUTO_PWM; i++) {
data->pwm_temp_map[i] = i;
data->pwm_duty[i] = 0x7f; /* Full speed */
data->auto_pwm[i][3] = 0x7f; /* Full speed, hard-coded */
@@ -2569,12 +2765,12 @@
* means -1 degree C, which surprisingly doesn't trigger an alarm,
* but is still confusing, so change to 127 degrees C.
*/
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < NUM_VIN_LIMIT; i++) {
tmp = it87_read_value(data, IT87_REG_VIN_MIN(i));
if (tmp == 0xff)
it87_write_value(data, IT87_REG_VIN_MIN(i), 0);
}
- for (i = 0; i < 3; i++) {
+ for (i = 0; i < NUM_TEMP_LIMIT; i++) {
tmp = it87_read_value(data, IT87_REG_TEMP_HIGH(i));
if (tmp == 0xff)
it87_write_value(data, IT87_REG_TEMP_HIGH(i), 127);
@@ -2619,158 +2815,245 @@
/* Check for additional fans */
if (has_five_fans(data)) {
- if (tmp & (1 << 4))
- data->has_fan |= (1 << 3); /* fan4 enabled */
- if (tmp & (1 << 5))
- data->has_fan |= (1 << 4); /* fan5 enabled */
- if (has_six_fans(data) && (tmp & (1 << 2)))
- data->has_fan |= (1 << 5); /* fan6 enabled */
+ if (tmp & BIT(4))
+ data->has_fan |= BIT(3); /* fan4 enabled */
+ if (tmp & BIT(5))
+ data->has_fan |= BIT(4); /* fan5 enabled */
+ if (has_six_fans(data) && (tmp & BIT(2)))
+ data->has_fan |= BIT(5); /* fan6 enabled */
}
/* Fan input pins may be used for alternative functions */
data->has_fan &= ~sio_data->skip_fan;
+ /* Check if pwm5, pwm6 are enabled */
+ if (has_six_pwm(data)) {
+ /* The following code may be IT8620E specific */
+ tmp = it87_read_value(data, IT87_REG_FAN_DIV);
+ if ((tmp & 0xc0) == 0xc0)
+ sio_data->skip_pwm |= BIT(4);
+ if (!(tmp & BIT(3)))
+ sio_data->skip_pwm |= BIT(5);
+ }
+
/* Start monitoring */
it87_write_value(data, IT87_REG_CONFIG,
(it87_read_value(data, IT87_REG_CONFIG) & 0x3e)
| (update_vbat ? 0x41 : 0x01));
}
-static void it87_update_pwm_ctrl(struct it87_data *data, int nr)
-{
- data->pwm_ctrl[nr] = it87_read_value(data, IT87_REG_PWM(nr));
- if (has_newer_autopwm(data)) {
- data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
- data->pwm_duty[nr] = it87_read_value(data,
- IT87_REG_PWM_DUTY(nr));
- } else {
- if (data->pwm_ctrl[nr] & 0x80) /* Automatic mode */
- data->pwm_temp_map[nr] = data->pwm_ctrl[nr] & 0x03;
- else /* Manual mode */
- data->pwm_duty[nr] = data->pwm_ctrl[nr] & 0x7f;
- }
-
- if (has_old_autopwm(data)) {
- int i;
-
- for (i = 0; i < 5 ; i++)
- data->auto_temp[nr][i] = it87_read_value(data,
- IT87_REG_AUTO_TEMP(nr, i));
- for (i = 0; i < 3 ; i++)
- data->auto_pwm[nr][i] = it87_read_value(data,
- IT87_REG_AUTO_PWM(nr, i));
- }
-}
-
-static struct it87_data *it87_update_device(struct device *dev)
+/* Return 1 if and only if the PWM interface is safe to use */
+static int it87_check_pwm(struct device *dev)
{
struct it87_data *data = dev_get_drvdata(dev);
- int i;
+ /*
+ * Some BIOSes fail to correctly configure the IT87 fans. All fans off
+ * and polarity set to active low is sign that this is the case so we
+ * disable pwm control to protect the user.
+ */
+ int tmp = it87_read_value(data, IT87_REG_FAN_CTL);
- mutex_lock(&data->update_lock);
-
- if (time_after(jiffies, data->last_updated + HZ + HZ / 2)
- || !data->valid) {
- if (update_vbat) {
+ if ((tmp & 0x87) == 0) {
+ if (fix_pwm_polarity) {
/*
- * Cleared after each update, so reenable. Value
- * returned by this read will be previous value
+ * The user asks us to attempt a chip reconfiguration.
+ * This means switching to active high polarity and
+ * inverting all fan speed values.
*/
- it87_write_value(data, IT87_REG_CONFIG,
- it87_read_value(data, IT87_REG_CONFIG) | 0x40);
- }
- for (i = 0; i <= 7; i++) {
- data->in[i][0] =
- it87_read_value(data, IT87_REG_VIN(i));
- data->in[i][1] =
- it87_read_value(data, IT87_REG_VIN_MIN(i));
- data->in[i][2] =
- it87_read_value(data, IT87_REG_VIN_MAX(i));
- }
- /* in8 (battery) has no limit registers */
- data->in[8][0] = it87_read_value(data, IT87_REG_VIN(8));
- if (data->type == it8603)
- data->in[9][0] = it87_read_value(data, 0x2f);
+ int i;
+ u8 pwm[3];
- for (i = 0; i < 6; i++) {
- /* Skip disabled fans */
- if (!(data->has_fan & (1 << i)))
- continue;
+ for (i = 0; i < ARRAY_SIZE(pwm); i++)
+ pwm[i] = it87_read_value(data,
+ IT87_REG_PWM[i]);
- data->fan[i][1] =
- it87_read_value(data, IT87_REG_FAN_MIN[i]);
- data->fan[i][0] = it87_read_value(data,
- IT87_REG_FAN[i]);
- /* Add high byte if in 16-bit mode */
- if (has_16bit_fans(data)) {
- data->fan[i][0] |= it87_read_value(data,
- IT87_REG_FANX[i]) << 8;
- data->fan[i][1] |= it87_read_value(data,
- IT87_REG_FANX_MIN[i]) << 8;
+ /*
+ * If any fan is in automatic pwm mode, the polarity
+ * might be correct, as suspicious as it seems, so we
+ * better don't change anything (but still disable the
+ * PWM interface).
+ */
+ if (!((pwm[0] | pwm[1] | pwm[2]) & 0x80)) {
+ dev_info(dev,
+ "Reconfiguring PWM to active high polarity\n");
+ it87_write_value(data, IT87_REG_FAN_CTL,
+ tmp | 0x87);
+ for (i = 0; i < 3; i++)
+ it87_write_value(data,
+ IT87_REG_PWM[i],
+ 0x7f & ~pwm[i]);
+ return 1;
}
- }
- for (i = 0; i < 3; i++) {
- if (!(data->has_temp & (1 << i)))
- continue;
- data->temp[i][0] =
- it87_read_value(data, IT87_REG_TEMP(i));
- data->temp[i][1] =
- it87_read_value(data, IT87_REG_TEMP_LOW(i));
- data->temp[i][2] =
- it87_read_value(data, IT87_REG_TEMP_HIGH(i));
- if (has_temp_offset(data))
- data->temp[i][3] =
- it87_read_value(data,
- IT87_REG_TEMP_OFFSET[i]);
+
+ dev_info(dev,
+ "PWM configuration is too broken to be fixed\n");
}
- /* Newer chips don't have clock dividers */
- if ((data->has_fan & 0x07) && !has_16bit_fans(data)) {
- i = it87_read_value(data, IT87_REG_FAN_DIV);
- data->fan_div[0] = i & 0x07;
- data->fan_div[1] = (i >> 3) & 0x07;
- data->fan_div[2] = (i & 0x40) ? 3 : 1;
- }
-
- data->alarms =
- it87_read_value(data, IT87_REG_ALARM1) |
- (it87_read_value(data, IT87_REG_ALARM2) << 8) |
- (it87_read_value(data, IT87_REG_ALARM3) << 16);
- data->beeps = it87_read_value(data, IT87_REG_BEEP_ENABLE);
-
- data->fan_main_ctrl = it87_read_value(data,
- IT87_REG_FAN_MAIN_CTRL);
- data->fan_ctl = it87_read_value(data, IT87_REG_FAN_CTL);
- for (i = 0; i < 3; i++)
- it87_update_pwm_ctrl(data, i);
-
- data->sensor = it87_read_value(data, IT87_REG_TEMP_ENABLE);
- data->extra = it87_read_value(data, IT87_REG_TEMP_EXTRA);
- /*
- * The IT8705F does not have VID capability.
- * The IT8718F and later don't use IT87_REG_VID for the
- * same purpose.
- */
- if (data->type == it8712 || data->type == it8716) {
- data->vid = it87_read_value(data, IT87_REG_VID);
- /*
- * The older IT8712F revisions had only 5 VID pins,
- * but we assume it is always safe to read 6 bits.
- */
- data->vid &= 0x3f;
- }
- data->last_updated = jiffies;
- data->valid = 1;
+ dev_info(dev,
+ "Detected broken BIOS defaults, disabling PWM interface\n");
+ return 0;
+ } else if (fix_pwm_polarity) {
+ dev_info(dev,
+ "PWM configuration looks sane, won't touch\n");
}
- mutex_unlock(&data->update_lock);
-
- return data;
+ return 1;
}
-static int __init it87_device_add(unsigned short address,
+static int it87_probe(struct platform_device *pdev)
+{
+ struct it87_data *data;
+ struct resource *res;
+ struct device *dev = &pdev->dev;
+ struct it87_sio_data *sio_data = dev_get_platdata(dev);
+ int enable_pwm_interface;
+ struct device *hwmon_dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+ if (!devm_request_region(&pdev->dev, res->start, IT87_EC_EXTENT,
+ DRVNAME)) {
+ dev_err(dev, "Failed to request region 0x%lx-0x%lx\n",
+ (unsigned long)res->start,
+ (unsigned long)(res->start + IT87_EC_EXTENT - 1));
+ return -EBUSY;
+ }
+
+ data = devm_kzalloc(&pdev->dev, sizeof(struct it87_data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->addr = res->start;
+ data->type = sio_data->type;
+ data->features = it87_devices[sio_data->type].features;
+ data->peci_mask = it87_devices[sio_data->type].peci_mask;
+ data->old_peci_mask = it87_devices[sio_data->type].old_peci_mask;
+ /*
+ * IT8705F Datasheet 0.4.1, 3h == Version G.
+ * IT8712F Datasheet 0.9.1, section 8.3.5 indicates 8h == Version J.
+ * These are the first revisions with 16-bit tachometer support.
+ */
+ switch (data->type) {
+ case it87:
+ if (sio_data->revision >= 0x03) {
+ data->features &= ~FEAT_OLD_AUTOPWM;
+ data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS;
+ }
+ break;
+ case it8712:
+ if (sio_data->revision >= 0x08) {
+ data->features &= ~FEAT_OLD_AUTOPWM;
+ data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS |
+ FEAT_FIVE_FANS;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Now, we do the remaining detection. */
+ if ((it87_read_value(data, IT87_REG_CONFIG) & 0x80) ||
+ it87_read_value(data, IT87_REG_CHIPID) != 0x90)
+ return -ENODEV;
+
+ platform_set_drvdata(pdev, data);
+
+ mutex_init(&data->update_lock);
+
+ /* Check PWM configuration */
+ enable_pwm_interface = it87_check_pwm(dev);
+
+ /* Starting with IT8721F, we handle scaling of internal voltages */
+ if (has_12mv_adc(data)) {
+ if (sio_data->internal & BIT(0))
+ data->in_scaled |= BIT(3); /* in3 is AVCC */
+ if (sio_data->internal & BIT(1))
+ data->in_scaled |= BIT(7); /* in7 is VSB */
+ if (sio_data->internal & BIT(2))
+ data->in_scaled |= BIT(8); /* in8 is Vbat */
+ if (sio_data->internal & BIT(3))
+ data->in_scaled |= BIT(9); /* in9 is AVCC */
+ } else if (sio_data->type == it8781 || sio_data->type == it8782 ||
+ sio_data->type == it8783) {
+ if (sio_data->internal & BIT(0))
+ data->in_scaled |= BIT(3); /* in3 is VCC5V */
+ if (sio_data->internal & BIT(1))
+ data->in_scaled |= BIT(7); /* in7 is VCCH5V */
+ }
+
+ data->has_temp = 0x07;
+ if (sio_data->skip_temp & BIT(2)) {
+ if (sio_data->type == it8782 &&
+ !(it87_read_value(data, IT87_REG_TEMP_EXTRA) & 0x80))
+ data->has_temp &= ~BIT(2);
+ }
+
+ data->in_internal = sio_data->internal;
+ data->has_in = 0x3ff & ~sio_data->skip_in;
+
+ if (has_six_temp(data)) {
+ u8 reg = it87_read_value(data, IT87_REG_TEMP456_ENABLE);
+
+ /* Check for additional temperature sensors */
+ if ((reg & 0x03) >= 0x02)
+ data->has_temp |= BIT(3);
+ if (((reg >> 2) & 0x03) >= 0x02)
+ data->has_temp |= BIT(4);
+ if (((reg >> 4) & 0x03) >= 0x02)
+ data->has_temp |= BIT(5);
+
+ /* Check for additional voltage sensors */
+ if ((reg & 0x03) == 0x01)
+ data->has_in |= BIT(10);
+ if (((reg >> 2) & 0x03) == 0x01)
+ data->has_in |= BIT(11);
+ if (((reg >> 4) & 0x03) == 0x01)
+ data->has_in |= BIT(12);
+ }
+
+ data->has_beep = !!sio_data->beep_pin;
+
+ /* Initialize the IT87 chip */
+ it87_init_device(pdev);
+
+ if (!sio_data->skip_vid) {
+ data->has_vid = true;
+ data->vrm = vid_which_vrm();
+ /* VID reading from Super-I/O config space if available */
+ data->vid = sio_data->vid_value;
+ }
+
+ /* Prepare for sysfs hooks */
+ data->groups[0] = &it87_group;
+ data->groups[1] = &it87_group_in;
+ data->groups[2] = &it87_group_temp;
+ data->groups[3] = &it87_group_fan;
+
+ if (enable_pwm_interface) {
+ data->has_pwm = BIT(ARRAY_SIZE(IT87_REG_PWM)) - 1;
+ data->has_pwm &= ~sio_data->skip_pwm;
+
+ data->groups[4] = &it87_group_pwm;
+ if (has_old_autopwm(data) || has_newer_autopwm(data))
+ data->groups[5] = &it87_group_auto_pwm;
+ }
+
+ hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+ it87_devices[sio_data->type].name,
+ data, data->groups);
+ return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static struct platform_driver it87_driver = {
+ .driver = {
+ .name = DRVNAME,
+ },
+ .probe = it87_probe,
+};
+
+static int __init it87_device_add(int index, unsigned short address,
const struct it87_sio_data *sio_data)
{
+ struct platform_device *pdev;
struct resource res = {
.start = address + IT87_EC_OFFSET,
.end = address + IT87_EC_OFFSET + IT87_EC_EXTENT - 1,
@@ -2781,14 +3064,11 @@
err = acpi_check_resource_conflict(&res);
if (err)
- goto exit;
+ return err;
pdev = platform_device_alloc(DRVNAME, address);
- if (!pdev) {
- err = -ENOMEM;
- pr_err("Device allocation failed\n");
- goto exit;
- }
+ if (!pdev)
+ return -ENOMEM;
err = platform_device_add_resources(pdev, &res, 1);
if (err) {
@@ -2809,44 +3089,61 @@
goto exit_device_put;
}
+ it87_pdev[index] = pdev;
return 0;
exit_device_put:
platform_device_put(pdev);
-exit:
return err;
}
static int __init sm_it87_init(void)
{
- int err;
- unsigned short isa_address = 0;
+ int sioaddr[2] = { REG_2E, REG_4E };
struct it87_sio_data sio_data;
+ unsigned short isa_address;
+ bool found = false;
+ int i, err;
- memset(&sio_data, 0, sizeof(struct it87_sio_data));
- err = it87_find(&isa_address, &sio_data);
- if (err)
- return err;
err = platform_driver_register(&it87_driver);
if (err)
return err;
- err = it87_device_add(isa_address, &sio_data);
- if (err) {
- platform_driver_unregister(&it87_driver);
- return err;
+ for (i = 0; i < ARRAY_SIZE(sioaddr); i++) {
+ memset(&sio_data, 0, sizeof(struct it87_sio_data));
+ isa_address = 0;
+ err = it87_find(sioaddr[i], &isa_address, &sio_data);
+ if (err || isa_address == 0)
+ continue;
+
+ err = it87_device_add(i, isa_address, &sio_data);
+ if (err)
+ goto exit_dev_unregister;
+ found = true;
}
+ if (!found) {
+ err = -ENODEV;
+ goto exit_unregister;
+ }
return 0;
+
+exit_dev_unregister:
+ /* NULL check handled by platform_device_unregister */
+ platform_device_unregister(it87_pdev[0]);
+exit_unregister:
+ platform_driver_unregister(&it87_driver);
+ return err;
}
static void __exit sm_it87_exit(void)
{
- platform_device_unregister(pdev);
+ /* NULL check handled by platform_device_unregister */
+ platform_device_unregister(it87_pdev[1]);
+ platform_device_unregister(it87_pdev[0]);
platform_driver_unregister(&it87_driver);
}
-
MODULE_AUTHOR("Chris Gauthron, Jean Delvare <jdelvare@suse.de>");
MODULE_DESCRIPTION("IT8705F/IT871xF/IT872xF hardware monitoring driver");
module_param(update_vbat, bool, 0);
diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c
new file mode 100644
index 0000000..30a100e
--- /dev/null
+++ b/drivers/hwmon/max31722.c
@@ -0,0 +1,165 @@
+/*
+ * max31722 - hwmon driver for Maxim Integrated MAX31722/MAX31723 SPI
+ * digital thermometer and thermostats.
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License. See the file COPYING in the main
+ * directory of this archive for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+
+#define MAX31722_REG_CFG 0x00
+#define MAX31722_REG_TEMP_LSB 0x01
+
+#define MAX31722_MODE_CONTINUOUS 0x00
+#define MAX31722_MODE_STANDBY 0x01
+#define MAX31722_MODE_MASK 0xFE
+#define MAX31722_RESOLUTION_12BIT 0x06
+#define MAX31722_WRITE_MASK 0x80
+
+struct max31722_data {
+ struct device *hwmon_dev;
+ struct spi_device *spi_device;
+ u8 mode;
+};
+
+static int max31722_set_mode(struct max31722_data *data, u8 mode)
+{
+ int ret;
+ struct spi_device *spi = data->spi_device;
+ u8 buf[2] = {
+ MAX31722_REG_CFG | MAX31722_WRITE_MASK,
+ (data->mode & MAX31722_MODE_MASK) | mode
+ };
+
+ ret = spi_write(spi, &buf, sizeof(buf));
+ if (ret < 0) {
+ dev_err(&spi->dev, "failed to set sensor mode.\n");
+ return ret;
+ }
+ data->mode = (data->mode & MAX31722_MODE_MASK) | mode;
+
+ return 0;
+}
+
+static ssize_t max31722_show_temp(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret;
+ struct max31722_data *data = dev_get_drvdata(dev);
+
+ ret = spi_w8r16(data->spi_device, MAX31722_REG_TEMP_LSB);
+ if (ret < 0)
+ return ret;
+ /* Keep 12 bits and multiply by the scale of 62.5 millidegrees/bit. */
+ return sprintf(buf, "%d\n", (s16)le16_to_cpu(ret) * 125 / 32);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+ max31722_show_temp, NULL, 0);
+
+static struct attribute *max31722_attrs[] = {
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
+ NULL,
+};
+
+ATTRIBUTE_GROUPS(max31722);
+
+static int max31722_probe(struct spi_device *spi)
+{
+ int ret;
+ struct max31722_data *data;
+
+ data = devm_kzalloc(&spi->dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ spi_set_drvdata(spi, data);
+ data->spi_device = spi;
+ /*
+ * Set SD bit to 0 so we can have continuous measurements.
+ * Set resolution to 12 bits for maximum precision.
+ */
+ data->mode = MAX31722_MODE_CONTINUOUS | MAX31722_RESOLUTION_12BIT;
+ ret = max31722_set_mode(data, MAX31722_MODE_CONTINUOUS);
+ if (ret < 0)
+ return ret;
+
+ data->hwmon_dev = hwmon_device_register_with_groups(&spi->dev,
+ spi->modalias,
+ data,
+ max31722_groups);
+ if (IS_ERR(data->hwmon_dev)) {
+ max31722_set_mode(data, MAX31722_MODE_STANDBY);
+ return PTR_ERR(data->hwmon_dev);
+ }
+
+ return 0;
+}
+
+static int max31722_remove(struct spi_device *spi)
+{
+ struct max31722_data *data = spi_get_drvdata(spi);
+
+ hwmon_device_unregister(data->hwmon_dev);
+
+ return max31722_set_mode(data, MAX31722_MODE_STANDBY);
+}
+
+static int __maybe_unused max31722_suspend(struct device *dev)
+{
+ struct spi_device *spi_device = to_spi_device(dev);
+ struct max31722_data *data = spi_get_drvdata(spi_device);
+
+ return max31722_set_mode(data, MAX31722_MODE_STANDBY);
+}
+
+static int __maybe_unused max31722_resume(struct device *dev)
+{
+ struct spi_device *spi_device = to_spi_device(dev);
+ struct max31722_data *data = spi_get_drvdata(spi_device);
+
+ return max31722_set_mode(data, MAX31722_MODE_CONTINUOUS);
+}
+
+static SIMPLE_DEV_PM_OPS(max31722_pm_ops, max31722_suspend, max31722_resume);
+
+static const struct spi_device_id max31722_spi_id[] = {
+ {"max31722", 0},
+ {"max31723", 0},
+ {}
+};
+
+static const struct acpi_device_id __maybe_unused max31722_acpi_id[] = {
+ {"MAX31722", 0},
+ {"MAX31723", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(spi, max31722_spi_id);
+
+static struct spi_driver max31722_driver = {
+ .driver = {
+ .name = "max31722",
+ .pm = &max31722_pm_ops,
+ .acpi_match_table = ACPI_PTR(max31722_acpi_id),
+ },
+ .probe = max31722_probe,
+ .remove = max31722_remove,
+ .id_table = max31722_spi_id,
+};
+
+module_spi_driver(max31722_driver);
+
+MODULE_AUTHOR("Tiberiu Breana <tiberiu.a.breana@intel.com>");
+MODULE_DESCRIPTION("max31722 sensor driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c
index 131a281..d24d7b6 100644
--- a/drivers/hwmon/sch5636.c
+++ b/drivers/hwmon/sch5636.c
@@ -449,7 +449,7 @@
}
revision[i] = val;
}
- pr_info("Found %s chip at %#hx, revison: %d.%02d\n", DEVNAME,
+ pr_info("Found %s chip at %#hx, revision: %d.%02d\n", DEVNAME,
data->addr, revision[0], revision[1]);
/* Read all temp + fan ctrl registers to determine which are active */
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index c6935de..c966492 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -766,6 +766,67 @@
.enter = NULL }
};
+static struct cpuidle_state bxt_cstates[] = {
+ {
+ .name = "C1-BXT",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00),
+ .exit_latency = 2,
+ .target_residency = 2,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .name = "C1E-BXT",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01),
+ .exit_latency = 10,
+ .target_residency = 20,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .name = "C6-BXT",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 133,
+ .target_residency = 133,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .name = "C7s-BXT",
+ .desc = "MWAIT 0x31",
+ .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 155,
+ .target_residency = 155,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .name = "C8-BXT",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 1000,
+ .target_residency = 1000,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .name = "C9-BXT",
+ .desc = "MWAIT 0x50",
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 2000,
+ .target_residency = 2000,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .name = "C10-BXT",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 10000,
+ .target_residency = 10000,
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
+ {
+ .enter = NULL }
+};
+
/**
* intel_idle
* @dev: cpuidle_device
@@ -950,6 +1011,11 @@
.state_table = knl_cstates,
};
+static const struct idle_cpu idle_cpu_bxt = {
+ .state_table = bxt_cstates,
+ .disable_promotion_to_c1e = true,
+};
+
#define ICPU(model, cpu) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
@@ -985,6 +1051,7 @@
ICPU(0x9e, idle_cpu_skl),
ICPU(0x55, idle_cpu_skx),
ICPU(0x57, idle_cpu_knl),
+ ICPU(0x5c, idle_cpu_bxt),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -1075,6 +1142,73 @@
/* else, 1 and 2 socket systems use default ivt_cstates */
}
+
+/*
+ * Translate IRTL (Interrupt Response Time Limit) MSR to usec
+ */
+
+static unsigned int irtl_ns_units[] = {
+ 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
+
+static unsigned long long irtl_2_usec(unsigned long long irtl)
+{
+ unsigned long long ns;
+
+ ns = irtl_ns_units[(irtl >> 10) & 0x3];
+
+ return div64_u64((irtl & 0x3FF) * ns, 1000);
+}
+/*
+ * bxt_idle_state_table_update(void)
+ *
+ * On BXT, we trust the IRTL to show the definitive maximum latency
+ * We use the same value for target_residency.
+ */
+static void bxt_idle_state_table_update(void)
+{
+ unsigned long long msr;
+
+ rdmsrl(MSR_PKGC6_IRTL, msr);
+ if (msr) {
+ unsigned int usec = irtl_2_usec(msr);
+
+ bxt_cstates[2].exit_latency = usec;
+ bxt_cstates[2].target_residency = usec;
+ }
+
+ rdmsrl(MSR_PKGC7_IRTL, msr);
+ if (msr) {
+ unsigned int usec = irtl_2_usec(msr);
+
+ bxt_cstates[3].exit_latency = usec;
+ bxt_cstates[3].target_residency = usec;
+ }
+
+ rdmsrl(MSR_PKGC8_IRTL, msr);
+ if (msr) {
+ unsigned int usec = irtl_2_usec(msr);
+
+ bxt_cstates[4].exit_latency = usec;
+ bxt_cstates[4].target_residency = usec;
+ }
+
+ rdmsrl(MSR_PKGC9_IRTL, msr);
+ if (msr) {
+ unsigned int usec = irtl_2_usec(msr);
+
+ bxt_cstates[5].exit_latency = usec;
+ bxt_cstates[5].target_residency = usec;
+ }
+
+ rdmsrl(MSR_PKGC10_IRTL, msr);
+ if (msr) {
+ unsigned int usec = irtl_2_usec(msr);
+
+ bxt_cstates[6].exit_latency = usec;
+ bxt_cstates[6].target_residency = usec;
+ }
+
+}
/*
* sklh_idle_state_table_update(void)
*
@@ -1130,6 +1264,9 @@
case 0x3e: /* IVT */
ivt_idle_state_table_update();
break;
+ case 0x5c: /* BXT */
+ bxt_idle_state_table_update();
+ break;
case 0x5e: /* SKL-H */
sklh_idle_state_table_update();
break;
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 6f8b084..3d8ff09 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -143,9 +143,9 @@
#include <linux/i8253.h>
-#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
-#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
-#define TIME_NAME (cpu_has_tsc?"TSC":"PIT")
+#define GET_TIME(x) do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
+#define DELTA(x,y) (boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
+#define TIME_NAME (boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT")
static unsigned int get_time_pit(void)
{
unsigned long flags;
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 8adaaea..49721b4 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -36,7 +36,7 @@
* As this gets called during crash dump, keep this simple for
* now.
*/
- if (cpu_has_apic || apic_from_smp_config())
+ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(0);
}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 282344b..095bb5b 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -55,7 +55,7 @@
static void gic_check_cpu_features(void)
{
- WARN_TAINT_ONCE(cpus_have_cap(ARM64_HAS_SYSREG_GIC_CPUIF),
+ WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_SYSREG_GIC_CPUIF),
TAINT_CPU_OUT_OF_SPEC,
"GICv3 system registers enabled, broken firmware!\n");
}
@@ -490,6 +490,7 @@
* Get what the GIC says our CPU mask is.
*/
BUG_ON(cpu >= NR_GIC_CPU_IF);
+ gic_check_cpu_features();
cpu_mask = gic_get_cpumask(gic);
gic_cpu_map[cpu] = cpu_mask;
@@ -1021,8 +1022,6 @@
BUG_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR);
- gic_check_cpu_features();
-
gic = &gic_data[gic_nr];
/* Initialize irq_chip */
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 2251478..5ae2834 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -413,10 +413,11 @@
tristate "LED driver for Intel NAS SS4200 series"
depends on LEDS_CLASS
depends on PCI && DMI
+ depends on X86
help
This option enables support for the Intel SS4200 series of
- Network Attached Storage servers. You may control the hard
- drive or power LEDs on the front panel. Using this driver
+ Network Attached Storage servers. You may control the hard
+ drive or power LEDs on the front panel. Using this driver
can stop the front LED from blinking after startup.
config LEDS_LT3593
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 2181581..55fa65e 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -26,7 +26,7 @@
* Nests outside led_cdev->trigger_lock
*/
static DECLARE_RWSEM(triggers_list_lock);
-static LIST_HEAD(trigger_list);
+LIST_HEAD(trigger_list);
/* Used by LED Class */
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 61143f5..8229f06 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -127,6 +127,8 @@
led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
if (!template->retain_state_suspended)
led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
+ if (template->panic_indicator)
+ led_dat->cdev.flags |= LED_PANIC_INDICATOR;
ret = gpiod_direction_output(led_dat->gpiod, state);
if (ret < 0)
@@ -200,6 +202,8 @@
if (fwnode_property_present(child, "retain-state-suspended"))
led.retain_state_suspended = 1;
+ if (fwnode_property_present(child, "panic-indicator"))
+ led.panic_indicator = 1;
ret = create_gpio_led(&led, &priv->leds[priv->num_leds],
dev, NULL);
diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index 046cb70..732eb86 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c
@@ -101,6 +101,19 @@
DMI_MATCH(DMI_PRODUCT_VERSION, "1.00.00")
}
},
+ {
+ /*
+ * FUJITSU SIEMENS SCALEO Home Server/SS4200-E
+ * BIOS V090L 12/19/2007
+ */
+ .callback = ss4200_led_dmi_callback,
+ .ident = "Fujitsu Siemens SCALEO Home Server",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "SCALEO Home Server"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "1.00.00")
+ }
+ },
{}
};
diff --git a/drivers/leds/leds-tca6507.c b/drivers/leds/leds-tca6507.c
index c548ea1..45222a7 100644
--- a/drivers/leds/leds-tca6507.c
+++ b/drivers/leds/leds-tca6507.c
@@ -327,6 +327,8 @@
int result;
result = choose_times(tca->bank[bank].ontime, &c1, &c2);
+ if (result < 0)
+ return;
dev_dbg(&tca->client->dev,
"Chose on times %d(%d) %d(%d) for %dms\n",
c1, time_codes[c1],
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index db3f20d..7d38e6b 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -30,5 +30,6 @@
extern struct rw_semaphore leds_list_lock;
extern struct list_head leds_list;
+extern struct list_head trigger_list;
#endif /* __LEDS_H_INCLUDED */
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
index 5bda6a9..9893d91 100644
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig
@@ -41,6 +41,14 @@
This allows LEDs to be controlled by IDE disk activity.
If unsure, say Y.
+config LEDS_TRIGGER_MTD
+ bool "LED MTD (NAND/NOR) Trigger"
+ depends on MTD
+ depends on LEDS_TRIGGERS
+ help
+ This allows LEDs to be controlled by MTD activity.
+ If unsure, say N.
+
config LEDS_TRIGGER_HEARTBEAT
tristate "LED Heartbeat Trigger"
depends on LEDS_TRIGGERS
@@ -108,4 +116,14 @@
This enables direct flash/torch on/off by the driver, kernel space.
If unsure, say Y.
+config LEDS_TRIGGER_PANIC
+ bool "LED Panic Trigger"
+ depends on LEDS_TRIGGERS
+ help
+ This allows LEDs to be configured to blink on a kernel panic.
+ Enabling this option will allow to mark certain LEDs as panic indicators,
+ allowing to blink them on a kernel panic, even if they are set to
+ a different trigger.
+ If unsure, say Y.
+
endif # LEDS_TRIGGERS
diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile
index 1abf48d..8cc64a4 100644
--- a/drivers/leds/trigger/Makefile
+++ b/drivers/leds/trigger/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_LEDS_TRIGGER_TIMER) += ledtrig-timer.o
obj-$(CONFIG_LEDS_TRIGGER_ONESHOT) += ledtrig-oneshot.o
obj-$(CONFIG_LEDS_TRIGGER_IDE_DISK) += ledtrig-ide-disk.o
+obj-$(CONFIG_LEDS_TRIGGER_MTD) += ledtrig-mtd.o
obj-$(CONFIG_LEDS_TRIGGER_HEARTBEAT) += ledtrig-heartbeat.o
obj-$(CONFIG_LEDS_TRIGGER_BACKLIGHT) += ledtrig-backlight.o
obj-$(CONFIG_LEDS_TRIGGER_GPIO) += ledtrig-gpio.o
@@ -8,3 +9,4 @@
obj-$(CONFIG_LEDS_TRIGGER_DEFAULT_ON) += ledtrig-default-on.o
obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT) += ledtrig-transient.o
obj-$(CONFIG_LEDS_TRIGGER_CAMERA) += ledtrig-camera.o
+obj-$(CONFIG_LEDS_TRIGGER_PANIC) += ledtrig-panic.o
diff --git a/drivers/leds/trigger/ledtrig-ide-disk.c b/drivers/leds/trigger/ledtrig-ide-disk.c
index c02a3ac..15123d3 100644
--- a/drivers/leds/trigger/ledtrig-ide-disk.c
+++ b/drivers/leds/trigger/ledtrig-ide-disk.c
@@ -18,10 +18,11 @@
#define BLINK_DELAY 30
DEFINE_LED_TRIGGER(ledtrig_ide);
-static unsigned long ide_blink_delay = BLINK_DELAY;
void ledtrig_ide_activity(void)
{
+ unsigned long ide_blink_delay = BLINK_DELAY;
+
led_trigger_blink_oneshot(ledtrig_ide,
&ide_blink_delay, &ide_blink_delay, 0);
}
diff --git a/drivers/leds/trigger/ledtrig-mtd.c b/drivers/leds/trigger/ledtrig-mtd.c
new file mode 100644
index 0000000..99b5b0a
--- /dev/null
+++ b/drivers/leds/trigger/ledtrig-mtd.c
@@ -0,0 +1,45 @@
+/*
+ * LED MTD trigger
+ *
+ * Copyright 2016 Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
+ *
+ * Based on LED IDE-Disk Activity Trigger
+ *
+ * Copyright 2006 Openedhand Ltd.
+ *
+ * Author: Richard Purdie <rpurdie@openedhand.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+
+#define BLINK_DELAY 30
+
+DEFINE_LED_TRIGGER(ledtrig_mtd);
+DEFINE_LED_TRIGGER(ledtrig_nand);
+
+void ledtrig_mtd_activity(void)
+{
+ unsigned long blink_delay = BLINK_DELAY;
+
+ led_trigger_blink_oneshot(ledtrig_mtd,
+ &blink_delay, &blink_delay, 0);
+ led_trigger_blink_oneshot(ledtrig_nand,
+ &blink_delay, &blink_delay, 0);
+}
+EXPORT_SYMBOL(ledtrig_mtd_activity);
+
+static int __init ledtrig_mtd_init(void)
+{
+ led_trigger_register_simple("mtd", &ledtrig_mtd);
+ led_trigger_register_simple("nand-disk", &ledtrig_nand);
+
+ return 0;
+}
+device_initcall(ledtrig_mtd_init);
diff --git a/drivers/leds/trigger/ledtrig-panic.c b/drivers/leds/trigger/ledtrig-panic.c
new file mode 100644
index 0000000..d735526
--- /dev/null
+++ b/drivers/leds/trigger/ledtrig-panic.c
@@ -0,0 +1,77 @@
+/*
+ * Kernel Panic LED Trigger
+ *
+ * Copyright 2016 Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/leds.h>
+#include "../leds.h"
+
+static struct led_trigger *trigger;
+
+/*
+ * This is called in a special context by the atomic panic
+ * notifier. This means the trigger can be changed without
+ * worrying about locking.
+ */
+static void led_trigger_set_panic(struct led_classdev *led_cdev)
+{
+ struct led_trigger *trig;
+
+ list_for_each_entry(trig, &trigger_list, next_trig) {
+ if (strcmp("panic", trig->name))
+ continue;
+ if (led_cdev->trigger)
+ list_del(&led_cdev->trig_list);
+ list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
+
+ /* Avoid the delayed blink path */
+ led_cdev->blink_delay_on = 0;
+ led_cdev->blink_delay_off = 0;
+
+ led_cdev->trigger = trig;
+ if (trig->activate)
+ trig->activate(led_cdev);
+ break;
+ }
+}
+
+static int led_trigger_panic_notifier(struct notifier_block *nb,
+ unsigned long code, void *unused)
+{
+ struct led_classdev *led_cdev;
+
+ list_for_each_entry(led_cdev, &leds_list, node)
+ if (led_cdev->flags & LED_PANIC_INDICATOR)
+ led_trigger_set_panic(led_cdev);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block led_trigger_panic_nb = {
+ .notifier_call = led_trigger_panic_notifier,
+};
+
+static long led_panic_blink(int state)
+{
+ led_trigger_event(trigger, state ? LED_FULL : LED_OFF);
+ return 0;
+}
+
+static int __init ledtrig_panic_init(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &led_trigger_panic_nb);
+
+ led_trigger_register_simple("panic", &trigger);
+ panic_blink = led_panic_blink;
+ return 0;
+}
+device_initcall(ledtrig_panic_init);
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index adc162c..6e9042e 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -603,7 +603,7 @@
* doing this.
*/
get_online_cpus();
- if (cpu_has_pge) { /* We have a broader idea of "global". */
+ if (boot_cpu_has(X86_FEATURE_PGE)) { /* We have a broader idea of "global". */
/* Remember that this was originally set (for cleanup). */
cpu_had_pge = 1;
/*
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 967b9dd..0307690 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -718,8 +718,8 @@
static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
void *mesg, int lines)
{
- unsigned long m, *val = mesg, gpa, save;
- int ret;
+ unsigned long m;
+ int ret, loops = 200; /* experimentally determined */
m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
if (lines == 2) {
@@ -735,22 +735,28 @@
return MQE_OK;
/*
- * Send a cross-partition interrupt to the SSI that contains the target
- * message queue. Normally, the interrupt is automatically delivered by
- * hardware but some error conditions require explicit delivery.
- * Use the GRU to deliver the interrupt. Otherwise partition failures
+ * Send a noop message in order to deliver a cross-partition interrupt
+ * to the SSI that contains the target message queue. Normally, the
+ * interrupt is automatically delivered by hardware following mesq
+ * operations, but some error conditions require explicit delivery.
+ * The noop message will trigger delivery. Otherwise partition failures
* could cause unrecovered errors.
*/
- gpa = uv_global_gru_mmr_address(mqd->interrupt_pnode, UVH_IPI_INT);
- save = *val;
- *val = uv_hub_ipi_value(mqd->interrupt_apicid, mqd->interrupt_vector,
- dest_Fixed);
- gru_vstore_phys(cb, gpa, gru_get_tri(mesg), IAA_REGISTER, IMA);
- ret = gru_wait(cb);
- *val = save;
- if (ret != CBS_IDLE)
- return MQE_UNEXPECTED_CB_ERR;
- return MQE_OK;
+ do {
+ ret = send_noop_message(cb, mqd, mesg);
+ } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION) && (loops-- > 0));
+
+ if (ret == MQIE_AGAIN || ret == MQE_CONGESTION) {
+ /*
+ * Don't indicate to the app to resend the message, as it's
+ * already been successfully sent. We simply send an OK
+ * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
+ * assuming that the other side is receiving enough
+ * interrupts to get this message processed anyway.
+ */
+ ret = MQE_OK;
+ }
+ return ret;
}
/*
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 8a0147d..5f2a3d69 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -35,6 +35,7 @@
#include <linux/capability.h>
#include <linux/compat.h>
#include <linux/pm_runtime.h>
+#include <linux/idr.h>
#include <linux/mmc/ioctl.h>
#include <linux/mmc/card.h>
@@ -78,14 +79,14 @@
/*
* We've only got one major, so number of mmcblk devices is
* limited to (1 << 20) / number of minors per device. It is also
- * currently limited by the size of the static bitmaps below.
+ * limited by the MAX_DEVICES below.
*/
static int max_devices;
#define MAX_DEVICES 256
-/* TODO: Replace these with struct ida */
-static DECLARE_BITMAP(dev_use, MAX_DEVICES);
+static DEFINE_IDA(mmc_blk_ida);
+static DEFINE_SPINLOCK(mmc_blk_lock);
/*
* There is one mmc_blk_data per slot.
@@ -178,7 +179,9 @@
int devidx = mmc_get_devidx(md->disk);
blk_cleanup_queue(md->queue.queue);
- __clear_bit(devidx, dev_use);
+ spin_lock(&mmc_blk_lock);
+ ida_remove(&mmc_blk_ida, devidx);
+ spin_unlock(&mmc_blk_lock);
put_disk(md->disk);
kfree(md);
@@ -945,16 +948,22 @@
req->rq_disk->disk_name, "timed out", name, status);
/* If the status cmd initially failed, retry the r/w cmd */
- if (!status_valid)
+ if (!status_valid) {
+ pr_err("%s: status not valid, retrying timeout\n",
+ req->rq_disk->disk_name);
return ERR_RETRY;
+ }
/*
* If it was a r/w cmd crc error, or illegal command
* (eg, issued in wrong state) then retry - we should
* have corrected the state problem above.
*/
- if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
+ if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) {
+ pr_err("%s: command error, retrying timeout\n",
+ req->rq_disk->disk_name);
return ERR_RETRY;
+ }
/* Otherwise abort the command */
return ERR_ABORT;
@@ -2189,10 +2198,23 @@
struct mmc_blk_data *md;
int devidx, ret;
- devidx = find_first_zero_bit(dev_use, max_devices);
- if (devidx >= max_devices)
- return ERR_PTR(-ENOSPC);
- __set_bit(devidx, dev_use);
+again:
+ if (!ida_pre_get(&mmc_blk_ida, GFP_KERNEL))
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock(&mmc_blk_lock);
+ ret = ida_get_new(&mmc_blk_ida, &devidx);
+ spin_unlock(&mmc_blk_lock);
+
+ if (ret == -EAGAIN)
+ goto again;
+ else if (ret)
+ return ERR_PTR(ret);
+
+ if (devidx >= max_devices) {
+ ret = -ENOSPC;
+ goto out;
+ }
md = kzalloc(sizeof(struct mmc_blk_data), GFP_KERNEL);
if (!md) {
@@ -2289,6 +2311,9 @@
err_kfree:
kfree(md);
out:
+ spin_lock(&mmc_blk_lock);
+ ida_remove(&mmc_blk_ida, devidx);
+ spin_unlock(&mmc_blk_lock);
return ERR_PTR(ret);
}
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index 4c33d76..250f223 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -1,3 +1,24 @@
#
# MMC core configuration
#
+config PWRSEQ_EMMC
+ tristate "HW reset support for eMMC"
+ default y
+ depends on OF
+ help
+ This selects Hardware reset support aka pwrseq-emmc for eMMC
+ devices. By default this option is set to y.
+
+ This driver can also be built as a module. If so, the module
+ will be called pwrseq_emmc.
+
+config PWRSEQ_SIMPLE
+ tristate "Simple HW reset support for MMC"
+ default y
+ depends on OF
+ help
+ This selects simple hardware reset support aka pwrseq-simple for MMC
+ devices. By default this option is set to y.
+
+ This driver can also be built as a module. If so, the module
+ will be called pwrseq_simple.
diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 2c25138..f007151 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -8,5 +8,7 @@
sdio.o sdio_ops.o sdio_bus.o \
sdio_cis.o sdio_io.o sdio_irq.o \
quirks.o slot-gpio.o
-mmc_core-$(CONFIG_OF) += pwrseq.o pwrseq_simple.o pwrseq_emmc.o
+mmc_core-$(CONFIG_OF) += pwrseq.o
+obj-$(CONFIG_PWRSEQ_SIMPLE) += pwrseq_simple.o
+obj-$(CONFIG_PWRSEQ_EMMC) += pwrseq_emmc.o
mmc_core-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 41b1e76..99275e4 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -36,6 +36,9 @@
#include <linux/mmc/sd.h>
#include <linux/mmc/slot-gpio.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mmc.h>
+
#include "core.h"
#include "bus.h"
#include "host.h"
@@ -140,6 +143,8 @@
cmd->retries = 0;
}
+ trace_mmc_request_done(host, mrq);
+
if (err && cmd->retries && !mmc_card_removed(host->card)) {
/*
* Request starter must handle retries - see
@@ -215,6 +220,8 @@
}
}
+ trace_mmc_request_start(host, mrq);
+
host->ops->request(host, mrq);
}
@@ -2449,8 +2456,9 @@
ret = host->bus_ops->reset(host);
mmc_bus_put(host);
- if (ret != -EOPNOTSUPP)
- pr_warn("%s: tried to reset card\n", mmc_hostname(host));
+ if (ret)
+ pr_warn("%s: tried to reset card, got error %d\n",
+ mmc_hostname(host), ret);
return ret;
}
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 6e4c55a..e0a3ee1 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -33,14 +33,14 @@
#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev)
-static DEFINE_IDR(mmc_host_idr);
+static DEFINE_IDA(mmc_host_ida);
static DEFINE_SPINLOCK(mmc_host_lock);
static void mmc_host_classdev_release(struct device *dev)
{
struct mmc_host *host = cls_dev_to_mmc_host(dev);
spin_lock(&mmc_host_lock);
- idr_remove(&mmc_host_idr, host->index);
+ ida_remove(&mmc_host_ida, host->index);
spin_unlock(&mmc_host_lock);
kfree(host);
}
@@ -321,14 +321,20 @@
/* scanning will be enabled when we're ready */
host->rescan_disable = 1;
- idr_preload(GFP_KERNEL);
+
+again:
+ if (!ida_pre_get(&mmc_host_ida, GFP_KERNEL)) {
+ kfree(host);
+ return NULL;
+ }
+
spin_lock(&mmc_host_lock);
- err = idr_alloc(&mmc_host_idr, host, 0, 0, GFP_NOWAIT);
- if (err >= 0)
- host->index = err;
+ err = ida_get_new(&mmc_host_ida, &host->index);
spin_unlock(&mmc_host_lock);
- idr_preload_end();
- if (err < 0) {
+
+ if (err == -EAGAIN) {
+ goto again;
+ } else if (err) {
kfree(host);
return NULL;
}
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4dbe3df..b81b08f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -333,6 +333,9 @@
}
}
+/* Minimum partition switch timeout in milliseconds */
+#define MMC_MIN_PART_SWITCH_TIME 300
+
/*
* Decode extended CSD.
*/
@@ -397,6 +400,10 @@
/* EXT_CSD value is in units of 10ms, but we store in ms */
card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME];
+ /* Some eMMC set the value too low so set a minimum */
+ if (card->ext_csd.part_time &&
+ card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME)
+ card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME;
/* Sleep / awake timeout in 100ns units */
if (sa_shift > 0 && sa_shift <= 0x17)
@@ -1244,10 +1251,11 @@
{
struct mmc_host *host = card->host;
bool send_status = true;
- unsigned int old_timing;
+ unsigned int old_timing, old_signal_voltage;
int err = -EINVAL;
u8 val;
+ old_signal_voltage = host->ios.signal_voltage;
if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
@@ -1256,7 +1264,7 @@
/* If fails try again during next card power cycle */
if (err)
- goto err;
+ return err;
mmc_select_driver_type(card);
@@ -1290,9 +1298,14 @@
}
}
err:
- if (err)
+ if (err) {
+ /* fall back to the old signal voltage, if fails report error */
+ if (__mmc_set_signal_voltage(host, old_signal_voltage))
+ err = -EIO;
+
pr_err("%s: %s failed, error %d\n", mmc_hostname(card->host),
__func__, err);
+ }
return err;
}
@@ -1314,21 +1327,13 @@
if (err && err != -EBADMSG)
return err;
- if (err) {
- pr_warn("%s: switch to %s failed\n",
- mmc_card_hs(card) ? "high-speed" :
- (mmc_card_hs200(card) ? "hs200" : ""),
- mmc_hostname(card->host));
- err = 0;
- }
-
bus_speed:
/*
* Set the bus speed to the selected bus timing.
* If timing is not selected, backward compatible is the default.
*/
mmc_set_bus_speed(card);
- return err;
+ return 0;
}
/*
@@ -1483,12 +1488,13 @@
if (err)
goto free_card;
- /* If doing byte addressing, check if required to do sector
+ /*
+ * If doing byte addressing, check if required to do sector
* addressing. Handle the case of <2GB cards needing sector
* addressing. See section 8.1 JEDEC Standard JED84-A441;
* ocr register has bit 30 set for sector addressing.
*/
- if (!(mmc_card_blockaddr(card)) && (rocr & (1<<30)))
+ if (rocr & BIT(30))
mmc_card_set_blockaddr(card);
/* Erase size depends on CSD and Extended CSD */
@@ -1957,19 +1963,23 @@
{
struct mmc_card *card = host->card;
- if (!(host->caps & MMC_CAP_HW_RESET) || !host->ops->hw_reset)
- return -EOPNOTSUPP;
+ /*
+ * In the case of recovery, we can't expect flushing the cache to work
+ * always, but we have a go and ignore errors.
+ */
+ mmc_flush_cache(host->card);
- if (!mmc_can_reset(card))
- return -EOPNOTSUPP;
-
- mmc_set_clock(host, host->f_init);
-
- host->ops->hw_reset(host);
-
- /* Set initial state and call mmc_set_ios */
- mmc_set_initial_state(host);
-
+ if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
+ mmc_can_reset(card)) {
+ /* If the card accept RST_n signal, send it. */
+ mmc_set_clock(host, host->f_init);
+ host->ops->hw_reset(host);
+ /* Set initial state and call mmc_set_ios */
+ mmc_set_initial_state(host);
+ } else {
+ /* Do a brute force power cycle */
+ mmc_power_cycle(host, card->ocr);
+ }
return mmc_init_card(host, card->ocr, card);
}
diff --git a/drivers/mmc/core/pwrseq.c b/drivers/mmc/core/pwrseq.c
index 4c1d175..9386c47 100644
--- a/drivers/mmc/core/pwrseq.c
+++ b/drivers/mmc/core/pwrseq.c
@@ -8,88 +8,55 @@
* MMC power sequence management
*/
#include <linux/kernel.h>
-#include <linux/platform_device.h>
#include <linux/err.h>
+#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/mmc/host.h>
#include "pwrseq.h"
-struct mmc_pwrseq_match {
- const char *compatible;
- struct mmc_pwrseq *(*alloc)(struct mmc_host *host, struct device *dev);
-};
-
-static struct mmc_pwrseq_match pwrseq_match[] = {
- {
- .compatible = "mmc-pwrseq-simple",
- .alloc = mmc_pwrseq_simple_alloc,
- }, {
- .compatible = "mmc-pwrseq-emmc",
- .alloc = mmc_pwrseq_emmc_alloc,
- },
-};
-
-static struct mmc_pwrseq_match *mmc_pwrseq_find(struct device_node *np)
-{
- struct mmc_pwrseq_match *match = ERR_PTR(-ENODEV);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(pwrseq_match); i++) {
- if (of_device_is_compatible(np, pwrseq_match[i].compatible)) {
- match = &pwrseq_match[i];
- break;
- }
- }
-
- return match;
-}
+static DEFINE_MUTEX(pwrseq_list_mutex);
+static LIST_HEAD(pwrseq_list);
int mmc_pwrseq_alloc(struct mmc_host *host)
{
- struct platform_device *pdev;
struct device_node *np;
- struct mmc_pwrseq_match *match;
- struct mmc_pwrseq *pwrseq;
- int ret = 0;
+ struct mmc_pwrseq *p;
np = of_parse_phandle(host->parent->of_node, "mmc-pwrseq", 0);
if (!np)
return 0;
- pdev = of_find_device_by_node(np);
- if (!pdev) {
- ret = -ENODEV;
- goto err;
+ mutex_lock(&pwrseq_list_mutex);
+ list_for_each_entry(p, &pwrseq_list, pwrseq_node) {
+ if (p->dev->of_node == np) {
+ if (!try_module_get(p->owner))
+ dev_err(host->parent,
+ "increasing module refcount failed\n");
+ else
+ host->pwrseq = p;
+
+ break;
+ }
}
- match = mmc_pwrseq_find(np);
- if (IS_ERR(match)) {
- ret = PTR_ERR(match);
- goto err;
- }
+ of_node_put(np);
+ mutex_unlock(&pwrseq_list_mutex);
- pwrseq = match->alloc(host, &pdev->dev);
- if (IS_ERR(pwrseq)) {
- ret = PTR_ERR(pwrseq);
- goto err;
- }
+ if (!host->pwrseq)
+ return -EPROBE_DEFER;
- host->pwrseq = pwrseq;
dev_info(host->parent, "allocated mmc-pwrseq\n");
-err:
- of_node_put(np);
- return ret;
+ return 0;
}
void mmc_pwrseq_pre_power_on(struct mmc_host *host)
{
struct mmc_pwrseq *pwrseq = host->pwrseq;
- if (pwrseq && pwrseq->ops && pwrseq->ops->pre_power_on)
+ if (pwrseq && pwrseq->ops->pre_power_on)
pwrseq->ops->pre_power_on(host);
}
@@ -97,7 +64,7 @@
{
struct mmc_pwrseq *pwrseq = host->pwrseq;
- if (pwrseq && pwrseq->ops && pwrseq->ops->post_power_on)
+ if (pwrseq && pwrseq->ops->post_power_on)
pwrseq->ops->post_power_on(host);
}
@@ -105,7 +72,7 @@
{
struct mmc_pwrseq *pwrseq = host->pwrseq;
- if (pwrseq && pwrseq->ops && pwrseq->ops->power_off)
+ if (pwrseq && pwrseq->ops->power_off)
pwrseq->ops->power_off(host);
}
@@ -113,8 +80,31 @@
{
struct mmc_pwrseq *pwrseq = host->pwrseq;
- if (pwrseq && pwrseq->ops && pwrseq->ops->free)
- pwrseq->ops->free(host);
-
- host->pwrseq = NULL;
+ if (pwrseq) {
+ module_put(pwrseq->owner);
+ host->pwrseq = NULL;
+ }
}
+
+int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq)
+{
+ if (!pwrseq || !pwrseq->ops || !pwrseq->dev)
+ return -EINVAL;
+
+ mutex_lock(&pwrseq_list_mutex);
+ list_add(&pwrseq->pwrseq_node, &pwrseq_list);
+ mutex_unlock(&pwrseq_list_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mmc_pwrseq_register);
+
+void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq)
+{
+ if (pwrseq) {
+ mutex_lock(&pwrseq_list_mutex);
+ list_del(&pwrseq->pwrseq_node);
+ mutex_unlock(&pwrseq_list_mutex);
+ }
+}
+EXPORT_SYMBOL_GPL(mmc_pwrseq_unregister);
diff --git a/drivers/mmc/core/pwrseq.h b/drivers/mmc/core/pwrseq.h
index 133de04..d69e751 100644
--- a/drivers/mmc/core/pwrseq.h
+++ b/drivers/mmc/core/pwrseq.h
@@ -8,32 +8,39 @@
#ifndef _MMC_CORE_PWRSEQ_H
#define _MMC_CORE_PWRSEQ_H
+#include <linux/mmc/host.h>
+
struct mmc_pwrseq_ops {
void (*pre_power_on)(struct mmc_host *host);
void (*post_power_on)(struct mmc_host *host);
void (*power_off)(struct mmc_host *host);
- void (*free)(struct mmc_host *host);
};
struct mmc_pwrseq {
const struct mmc_pwrseq_ops *ops;
+ struct device *dev;
+ struct list_head pwrseq_node;
+ struct module *owner;
};
#ifdef CONFIG_OF
+int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq);
+void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq);
+
int mmc_pwrseq_alloc(struct mmc_host *host);
void mmc_pwrseq_pre_power_on(struct mmc_host *host);
void mmc_pwrseq_post_power_on(struct mmc_host *host);
void mmc_pwrseq_power_off(struct mmc_host *host);
void mmc_pwrseq_free(struct mmc_host *host);
-struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
- struct device *dev);
-struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
- struct device *dev);
-
#else
+static inline int mmc_pwrseq_register(struct mmc_pwrseq *pwrseq)
+{
+ return -ENOSYS;
+}
+static inline void mmc_pwrseq_unregister(struct mmc_pwrseq *pwrseq) {}
static inline int mmc_pwrseq_alloc(struct mmc_host *host) { return 0; }
static inline void mmc_pwrseq_pre_power_on(struct mmc_host *host) {}
static inline void mmc_pwrseq_post_power_on(struct mmc_host *host) {}
diff --git a/drivers/mmc/core/pwrseq_emmc.c b/drivers/mmc/core/pwrseq_emmc.c
index 4a82bc7..adc9c0c 100644
--- a/drivers/mmc/core/pwrseq_emmc.c
+++ b/drivers/mmc/core/pwrseq_emmc.c
@@ -9,6 +9,9 @@
*/
#include <linux/delay.h>
#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -25,6 +28,8 @@
struct gpio_desc *reset_gpio;
};
+#define to_pwrseq_emmc(p) container_of(p, struct mmc_pwrseq_emmc, pwrseq)
+
static void __mmc_pwrseq_emmc_reset(struct mmc_pwrseq_emmc *pwrseq)
{
gpiod_set_value(pwrseq->reset_gpio, 1);
@@ -35,27 +40,11 @@
static void mmc_pwrseq_emmc_reset(struct mmc_host *host)
{
- struct mmc_pwrseq_emmc *pwrseq = container_of(host->pwrseq,
- struct mmc_pwrseq_emmc, pwrseq);
+ struct mmc_pwrseq_emmc *pwrseq = to_pwrseq_emmc(host->pwrseq);
__mmc_pwrseq_emmc_reset(pwrseq);
}
-static void mmc_pwrseq_emmc_free(struct mmc_host *host)
-{
- struct mmc_pwrseq_emmc *pwrseq = container_of(host->pwrseq,
- struct mmc_pwrseq_emmc, pwrseq);
-
- unregister_restart_handler(&pwrseq->reset_nb);
- gpiod_put(pwrseq->reset_gpio);
- kfree(pwrseq);
-}
-
-static const struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
- .post_power_on = mmc_pwrseq_emmc_reset,
- .free = mmc_pwrseq_emmc_free,
-};
-
static int mmc_pwrseq_emmc_reset_nb(struct notifier_block *this,
unsigned long mode, void *cmd)
{
@@ -66,21 +55,22 @@
return NOTIFY_DONE;
}
-struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
- struct device *dev)
+static const struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
+ .post_power_on = mmc_pwrseq_emmc_reset,
+};
+
+static int mmc_pwrseq_emmc_probe(struct platform_device *pdev)
{
struct mmc_pwrseq_emmc *pwrseq;
- int ret = 0;
+ struct device *dev = &pdev->dev;
- pwrseq = kzalloc(sizeof(struct mmc_pwrseq_emmc), GFP_KERNEL);
+ pwrseq = devm_kzalloc(dev, sizeof(*pwrseq), GFP_KERNEL);
if (!pwrseq)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
- pwrseq->reset_gpio = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
- if (IS_ERR(pwrseq->reset_gpio)) {
- ret = PTR_ERR(pwrseq->reset_gpio);
- goto free;
- }
+ pwrseq->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(pwrseq->reset_gpio))
+ return PTR_ERR(pwrseq->reset_gpio);
/*
* register reset handler to ensure emmc reset also from
@@ -92,9 +82,38 @@
register_restart_handler(&pwrseq->reset_nb);
pwrseq->pwrseq.ops = &mmc_pwrseq_emmc_ops;
+ pwrseq->pwrseq.dev = dev;
+ pwrseq->pwrseq.owner = THIS_MODULE;
+ platform_set_drvdata(pdev, pwrseq);
- return &pwrseq->pwrseq;
-free:
- kfree(pwrseq);
- return ERR_PTR(ret);
+ return mmc_pwrseq_register(&pwrseq->pwrseq);
}
+
+static int mmc_pwrseq_emmc_remove(struct platform_device *pdev)
+{
+ struct mmc_pwrseq_emmc *pwrseq = platform_get_drvdata(pdev);
+
+ unregister_restart_handler(&pwrseq->reset_nb);
+ mmc_pwrseq_unregister(&pwrseq->pwrseq);
+
+ return 0;
+}
+
+static const struct of_device_id mmc_pwrseq_emmc_of_match[] = {
+ { .compatible = "mmc-pwrseq-emmc",},
+ {/* sentinel */},
+};
+
+MODULE_DEVICE_TABLE(of, mmc_pwrseq_emmc_of_match);
+
+static struct platform_driver mmc_pwrseq_emmc_driver = {
+ .probe = mmc_pwrseq_emmc_probe,
+ .remove = mmc_pwrseq_emmc_remove,
+ .driver = {
+ .name = "pwrseq_emmc",
+ .of_match_table = mmc_pwrseq_emmc_of_match,
+ },
+};
+
+module_platform_driver(mmc_pwrseq_emmc_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index bc173e1..450d907 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -8,7 +8,10 @@
* Simple MMC power sequence management
*/
#include <linux/clk.h>
+#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -25,6 +28,8 @@
struct gpio_descs *reset_gpios;
};
+#define to_pwrseq_simple(p) container_of(p, struct mmc_pwrseq_simple, pwrseq)
+
static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
int value)
{
@@ -44,8 +49,7 @@
static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
{
- struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
- struct mmc_pwrseq_simple, pwrseq);
+ struct mmc_pwrseq_simple *pwrseq = to_pwrseq_simple(host->pwrseq);
if (!IS_ERR(pwrseq->ext_clk) && !pwrseq->clk_enabled) {
clk_prepare_enable(pwrseq->ext_clk);
@@ -57,16 +61,14 @@
static void mmc_pwrseq_simple_post_power_on(struct mmc_host *host)
{
- struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
- struct mmc_pwrseq_simple, pwrseq);
+ struct mmc_pwrseq_simple *pwrseq = to_pwrseq_simple(host->pwrseq);
mmc_pwrseq_simple_set_gpios_value(pwrseq, 0);
}
static void mmc_pwrseq_simple_power_off(struct mmc_host *host)
{
- struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
- struct mmc_pwrseq_simple, pwrseq);
+ struct mmc_pwrseq_simple *pwrseq = to_pwrseq_simple(host->pwrseq);
mmc_pwrseq_simple_set_gpios_value(pwrseq, 1);
@@ -76,59 +78,64 @@
}
}
-static void mmc_pwrseq_simple_free(struct mmc_host *host)
-{
- struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
- struct mmc_pwrseq_simple, pwrseq);
-
- if (!IS_ERR(pwrseq->reset_gpios))
- gpiod_put_array(pwrseq->reset_gpios);
-
- if (!IS_ERR(pwrseq->ext_clk))
- clk_put(pwrseq->ext_clk);
-
- kfree(pwrseq);
-}
-
static const struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
.pre_power_on = mmc_pwrseq_simple_pre_power_on,
.post_power_on = mmc_pwrseq_simple_post_power_on,
.power_off = mmc_pwrseq_simple_power_off,
- .free = mmc_pwrseq_simple_free,
};
-struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
- struct device *dev)
+static const struct of_device_id mmc_pwrseq_simple_of_match[] = {
+ { .compatible = "mmc-pwrseq-simple",},
+ {/* sentinel */},
+};
+MODULE_DEVICE_TABLE(of, mmc_pwrseq_simple_of_match);
+
+static int mmc_pwrseq_simple_probe(struct platform_device *pdev)
{
struct mmc_pwrseq_simple *pwrseq;
- int ret = 0;
+ struct device *dev = &pdev->dev;
- pwrseq = kzalloc(sizeof(*pwrseq), GFP_KERNEL);
+ pwrseq = devm_kzalloc(dev, sizeof(*pwrseq), GFP_KERNEL);
if (!pwrseq)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
- pwrseq->ext_clk = clk_get(dev, "ext_clock");
- if (IS_ERR(pwrseq->ext_clk) &&
- PTR_ERR(pwrseq->ext_clk) != -ENOENT) {
- ret = PTR_ERR(pwrseq->ext_clk);
- goto free;
- }
+ pwrseq->ext_clk = devm_clk_get(dev, "ext_clock");
+ if (IS_ERR(pwrseq->ext_clk) && PTR_ERR(pwrseq->ext_clk) != -ENOENT)
+ return PTR_ERR(pwrseq->ext_clk);
- pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
+ pwrseq->reset_gpios = devm_gpiod_get_array(dev, "reset",
+ GPIOD_OUT_HIGH);
if (IS_ERR(pwrseq->reset_gpios) &&
PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
- ret = PTR_ERR(pwrseq->reset_gpios);
- goto clk_put;
+ return PTR_ERR(pwrseq->reset_gpios);
}
+ pwrseq->pwrseq.dev = dev;
pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
+ pwrseq->pwrseq.owner = THIS_MODULE;
+ platform_set_drvdata(pdev, pwrseq);
- return &pwrseq->pwrseq;
-clk_put:
- if (!IS_ERR(pwrseq->ext_clk))
- clk_put(pwrseq->ext_clk);
-free:
- kfree(pwrseq);
- return ERR_PTR(ret);
+ return mmc_pwrseq_register(&pwrseq->pwrseq);
}
+
+static int mmc_pwrseq_simple_remove(struct platform_device *pdev)
+{
+ struct mmc_pwrseq_simple *pwrseq = platform_get_drvdata(pdev);
+
+ mmc_pwrseq_unregister(&pwrseq->pwrseq);
+
+ return 0;
+}
+
+static struct platform_driver mmc_pwrseq_simple_driver = {
+ .probe = mmc_pwrseq_simple_probe,
+ .remove = mmc_pwrseq_simple_remove,
+ .driver = {
+ .name = "pwrseq_simple",
+ .of_match_table = mmc_pwrseq_simple_of_match,
+ },
+};
+
+module_platform_driver(mmc_pwrseq_simple_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index 6f6fc52..dcb3dee 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -177,8 +177,13 @@
vsn = func->card->cccr.sdio_vsn;
min_size = (vsn == SDIO_SDIO_REV_1_00) ? 28 : 42;
- if (size < min_size)
+ if (size == 28 && vsn == SDIO_SDIO_REV_1_10) {
+ pr_warn("%s: card has broken SDIO 1.1 CIS, forcing SDIO 1.0\n",
+ mmc_hostname(card->host));
+ vsn = SDIO_SDIO_REV_1_00;
+ } else if (size < min_size) {
return -EINVAL;
+ }
/* TPLFE_MAX_BLK_SIZE */
func->max_blksize = buf[12] | (buf[13] << 8);
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index e657af0..0aa484c 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -677,9 +677,9 @@
depends on HAS_DMA
depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
help
- This selects the MMC Host Interface controller (MMCIF).
+ This selects the MMC Host Interface controller (MMCIF) found in various
+ Renesas SoCs for SH and ARM architectures.
- This driver supports MMCIF in sh7724/sh7757/sh7372.
config MMC_JZ4740
tristate "JZ4740 SD/Multimedia Card Interface support"
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 9268c41..0ad8ef5 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -1410,8 +1410,6 @@
WARN_ON(slot->mrq);
dev_dbg(&host->pdev->dev, "MRQ: cmd %u\n", mrq->cmd->opcode);
- pm_runtime_get_sync(&host->pdev->dev);
-
/*
* We may "know" the card is gone even though there's still an
* electrical connection. If so, we really need to communicate
@@ -1442,8 +1440,6 @@
struct atmel_mci *host = slot->host;
unsigned int i;
- pm_runtime_get_sync(&host->pdev->dev);
-
slot->sdc_reg &= ~ATMCI_SDCBUS_MASK;
switch (ios->bus_width) {
case MMC_BUS_WIDTH_1:
@@ -1576,8 +1572,6 @@
break;
}
- pm_runtime_mark_last_busy(&host->pdev->dev);
- pm_runtime_put_autosuspend(&host->pdev->dev);
}
static int atmci_get_ro(struct mmc_host *mmc)
@@ -1669,9 +1663,6 @@
spin_unlock(&host->lock);
mmc_request_done(prev_mmc, mrq);
spin_lock(&host->lock);
-
- pm_runtime_mark_last_busy(&host->pdev->dev);
- pm_runtime_put_autosuspend(&host->pdev->dev);
}
static void atmci_command_complete(struct atmel_mci *host,
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 693144e..a56373c 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -32,12 +32,10 @@
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
-#include <linux/edma.h>
#include <linux/mmc/mmc.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/platform_data/edma.h>
#include <linux/platform_data/mmc-davinci.h>
/*
@@ -202,7 +200,6 @@
u32 buffer_bytes_left;
u32 bytes_left;
- u32 rxdma, txdma;
struct dma_chan *dma_tx;
struct dma_chan *dma_rx;
bool use_dma;
@@ -513,35 +510,20 @@
static int __init davinci_acquire_dma_channels(struct mmc_davinci_host *host)
{
- int r;
- dma_cap_mask_t mask;
-
- dma_cap_zero(mask);
- dma_cap_set(DMA_SLAVE, mask);
-
- host->dma_tx =
- dma_request_slave_channel_compat(mask, edma_filter_fn,
- &host->txdma, mmc_dev(host->mmc), "tx");
- if (!host->dma_tx) {
+ host->dma_tx = dma_request_chan(mmc_dev(host->mmc), "tx");
+ if (IS_ERR(host->dma_tx)) {
dev_err(mmc_dev(host->mmc), "Can't get dma_tx channel\n");
- return -ENODEV;
+ return PTR_ERR(host->dma_tx);
}
- host->dma_rx =
- dma_request_slave_channel_compat(mask, edma_filter_fn,
- &host->rxdma, mmc_dev(host->mmc), "rx");
- if (!host->dma_rx) {
+ host->dma_rx = dma_request_chan(mmc_dev(host->mmc), "rx");
+ if (IS_ERR(host->dma_rx)) {
dev_err(mmc_dev(host->mmc), "Can't get dma_rx channel\n");
- r = -ENODEV;
- goto free_master_write;
+ dma_release_channel(host->dma_tx);
+ return PTR_ERR(host->dma_rx);
}
return 0;
-
-free_master_write:
- dma_release_channel(host->dma_tx);
-
- return r;
}
/*----------------------------------------------------------------------*/
@@ -1223,7 +1205,7 @@
struct mmc_davinci_host *host = NULL;
struct mmc_host *mmc = NULL;
struct resource *r, *mem = NULL;
- int ret = 0, irq = 0;
+ int ret, irq;
size_t mem_size;
const struct platform_device_id *id_entry;
@@ -1233,50 +1215,40 @@
return -ENOENT;
}
- ret = -ENODEV;
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq = platform_get_irq(pdev, 0);
if (!r || irq == NO_IRQ)
- goto out;
+ return -ENODEV;
- ret = -EBUSY;
mem_size = resource_size(r);
- mem = request_mem_region(r->start, mem_size, pdev->name);
+ mem = devm_request_mem_region(&pdev->dev, r->start, mem_size,
+ pdev->name);
if (!mem)
- goto out;
+ return -EBUSY;
- ret = -ENOMEM;
mmc = mmc_alloc_host(sizeof(struct mmc_davinci_host), &pdev->dev);
if (!mmc)
- goto out;
+ return -ENOMEM;
host = mmc_priv(mmc);
host->mmc = mmc; /* Important */
- r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
- if (!r)
- dev_warn(&pdev->dev, "RX DMA resource not specified\n");
- else
- host->rxdma = r->start;
-
- r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
- if (!r)
- dev_warn(&pdev->dev, "TX DMA resource not specified\n");
- else
- host->txdma = r->start;
-
host->mem_res = mem;
- host->base = ioremap(mem->start, mem_size);
- if (!host->base)
- goto out;
+ host->base = devm_ioremap(&pdev->dev, mem->start, mem_size);
+ if (!host->base) {
+ ret = -ENOMEM;
+ goto ioremap_fail;
+ }
- ret = -ENXIO;
- host->clk = clk_get(&pdev->dev, "MMCSDCLK");
+ host->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(host->clk)) {
ret = PTR_ERR(host->clk);
- goto out;
+ goto clk_get_fail;
}
- clk_enable(host->clk);
+ ret = clk_prepare_enable(host->clk);
+ if (ret)
+ goto clk_prepare_enable_fail;
+
host->mmc_input_clk = clk_get_rate(host->clk);
init_mmcsd_host(host);
@@ -1291,8 +1263,13 @@
host->mmc_irq = irq;
host->sdio_irq = platform_get_irq(pdev, 1);
- if (host->use_dma && davinci_acquire_dma_channels(host) != 0)
- host->use_dma = 0;
+ if (host->use_dma) {
+ ret = davinci_acquire_dma_channels(host);
+ if (ret == -EPROBE_DEFER)
+ goto dma_probe_defer;
+ else if (ret)
+ host->use_dma = 0;
+ }
/* REVISIT: someday, support IRQ-driven card detection. */
mmc->caps |= MMC_CAP_NEEDS_POLL;
@@ -1346,15 +1323,17 @@
ret = mmc_add_host(mmc);
if (ret < 0)
- goto out;
+ goto mmc_add_host_fail;
- ret = request_irq(irq, mmc_davinci_irq, 0, mmc_hostname(mmc), host);
+ ret = devm_request_irq(&pdev->dev, irq, mmc_davinci_irq, 0,
+ mmc_hostname(mmc), host);
if (ret)
- goto out;
+ goto request_irq_fail;
if (host->sdio_irq >= 0) {
- ret = request_irq(host->sdio_irq, mmc_davinci_sdio_irq, 0,
- mmc_hostname(mmc), host);
+ ret = devm_request_irq(&pdev->dev, host->sdio_irq,
+ mmc_davinci_sdio_irq, 0,
+ mmc_hostname(mmc), host);
if (!ret)
mmc->caps |= MMC_CAP_SDIO_IRQ;
}
@@ -1367,28 +1346,18 @@
return 0;
-out:
+request_irq_fail:
+ mmc_remove_host(mmc);
+mmc_add_host_fail:
mmc_davinci_cpufreq_deregister(host);
cpu_freq_fail:
- if (host) {
- davinci_release_dma_channels(host);
-
- if (host->clk) {
- clk_disable(host->clk);
- clk_put(host->clk);
- }
-
- if (host->base)
- iounmap(host->base);
- }
-
- if (mmc)
- mmc_free_host(mmc);
-
- if (mem)
- release_resource(mem);
-
- dev_dbg(&pdev->dev, "probe err %d\n", ret);
+ davinci_release_dma_channels(host);
+dma_probe_defer:
+ clk_disable_unprepare(host->clk);
+clk_prepare_enable_fail:
+clk_get_fail:
+ioremap_fail:
+ mmc_free_host(mmc);
return ret;
}
@@ -1397,25 +1366,11 @@
{
struct mmc_davinci_host *host = platform_get_drvdata(pdev);
- if (host) {
- mmc_davinci_cpufreq_deregister(host);
-
- mmc_remove_host(host->mmc);
- free_irq(host->mmc_irq, host);
- if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
- free_irq(host->sdio_irq, host);
-
- davinci_release_dma_channels(host);
-
- clk_disable(host->clk);
- clk_put(host->clk);
-
- iounmap(host->base);
-
- release_resource(host->mem_res);
-
- mmc_free_host(host->mmc);
- }
+ mmc_remove_host(host->mmc);
+ mmc_davinci_cpufreq_deregister(host);
+ davinci_release_dma_channels(host);
+ clk_disable_unprepare(host->clk);
+ mmc_free_host(host->mmc);
return 0;
}
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 8790f2a..7e3a324 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -91,10 +91,14 @@
return SDMMC_CLKSEL_GET_DIV(mci_readl(host, CLKSEL)) + 1;
}
-static int dw_mci_exynos_priv_init(struct dw_mci *host)
+static void dw_mci_exynos_config_smu(struct dw_mci *host)
{
struct dw_mci_exynos_priv_data *priv = host->priv;
+ /*
+ * If Exynos is provided the Security management,
+ * set for non-ecryption mode at this time.
+ */
if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS5420_SMU ||
priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU) {
mci_writel(host, MPSBEGIN0, 0);
@@ -104,6 +108,13 @@
SDMMC_MPSCTRL_VALID |
SDMMC_MPSCTRL_NON_SECURE_WRITE_BIT);
}
+}
+
+static int dw_mci_exynos_priv_init(struct dw_mci *host)
+{
+ struct dw_mci_exynos_priv_data *priv = host->priv;
+
+ dw_mci_exynos_config_smu(host);
if (priv->ctrl_type >= DW_MCI_TYPE_EXYNOS5420) {
priv->saved_strobe_ctrl = mci_readl(host, HS400_DLINE_CTRL);
@@ -115,13 +126,6 @@
DQS_CTRL_GET_RD_DELAY(priv->saved_strobe_ctrl);
}
- return 0;
-}
-
-static int dw_mci_exynos_setup_clock(struct dw_mci *host)
-{
- struct dw_mci_exynos_priv_data *priv = host->priv;
-
host->bus_hz /= (priv->ciu_div + 1);
return 0;
@@ -169,7 +173,7 @@
{
struct dw_mci *host = dev_get_drvdata(dev);
- dw_mci_exynos_priv_init(host);
+ dw_mci_exynos_config_smu(host);
return dw_mci_resume(host);
}
@@ -489,7 +493,6 @@
static const struct dw_mci_drv_data exynos_drv_data = {
.caps = exynos_dwmmc_caps,
.init = dw_mci_exynos_priv_init,
- .setup_clock = dw_mci_exynos_setup_clock,
.set_ios = dw_mci_exynos_set_ios,
.parse_dt = dw_mci_exynos_parse_dt,
.execute_tuning = dw_mci_exynos_execute_tuning,
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 84e50f3..8c20b81 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -26,13 +26,6 @@
int default_sample_phase;
};
-static int dw_mci_rk3288_setup_clock(struct dw_mci *host)
-{
- host->bus_hz /= RK3288_CLKGEN_DIV;
-
- return 0;
-}
-
static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios)
{
struct dw_mci_rockchip_priv_data *priv = host->priv;
@@ -231,18 +224,30 @@
/* It needs this quirk on all Rockchip SoCs */
host->pdata->quirks |= DW_MCI_QUIRK_BROKEN_DTO;
+ if (of_device_is_compatible(host->dev->of_node,
+ "rockchip,rk3288-dw-mshc"))
+ host->bus_hz /= RK3288_CLKGEN_DIV;
+
return 0;
}
+/* Common capabilities of RK3288 SoC */
+static unsigned long dw_mci_rk3288_dwmmc_caps[4] = {
+ MMC_CAP_ERASE,
+ MMC_CAP_ERASE,
+ MMC_CAP_ERASE,
+ MMC_CAP_ERASE,
+};
+
static const struct dw_mci_drv_data rk2928_drv_data = {
.init = dw_mci_rockchip_init,
};
static const struct dw_mci_drv_data rk3288_drv_data = {
+ .caps = dw_mci_rk3288_dwmmc_caps,
.set_ios = dw_mci_rk3288_set_ios,
.execute_tuning = dw_mci_rk3288_execute_tuning,
.parse_dt = dw_mci_rk3288_parse_dt,
- .setup_clock = dw_mci_rk3288_setup_clock,
.init = dw_mci_rockchip_init,
};
@@ -269,33 +274,13 @@
return dw_mci_pltfm_register(pdev, drv_data);
}
-#ifdef CONFIG_PM_SLEEP
-static int dw_mci_rockchip_suspend(struct device *dev)
-{
- struct dw_mci *host = dev_get_drvdata(dev);
-
- return dw_mci_suspend(host);
-}
-
-static int dw_mci_rockchip_resume(struct device *dev)
-{
- struct dw_mci *host = dev_get_drvdata(dev);
-
- return dw_mci_resume(host);
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(dw_mci_rockchip_pmops,
- dw_mci_rockchip_suspend,
- dw_mci_rockchip_resume);
-
static struct platform_driver dw_mci_rockchip_pltfm_driver = {
.probe = dw_mci_rockchip_probe,
.remove = dw_mci_pltfm_remove,
.driver = {
.name = "dwmmc_rockchip",
.of_match_table = dw_mci_rockchip_match,
- .pm = &dw_mci_rockchip_pmops,
+ .pm = &dw_mci_pltfm_pmops,
},
};
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 242f9a0..9dd1bd3 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -680,7 +680,7 @@
static void dw_mci_edmac_stop_dma(struct dw_mci *host)
{
- dmaengine_terminate_all(host->dms->ch);
+ dmaengine_terminate_async(host->dms->ch);
}
static int dw_mci_edmac_start_dma(struct dw_mci *host,
@@ -3003,15 +3003,6 @@
}
}
- if (drv_data && drv_data->setup_clock) {
- ret = drv_data->setup_clock(host);
- if (ret) {
- dev_err(host->dev,
- "implementation specific clock setup failed\n");
- goto err_clk_ciu;
- }
- }
-
setup_timer(&host->cmd11_timer,
dw_mci_cmd11_timer, (unsigned long)host);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 68d5da2..1e8d838 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -277,7 +277,6 @@
* dw_mci driver data - dw-mshc implementation specific driver data.
* @caps: mmc subsystem specified capabilities of the controller(s).
* @init: early implementation specific initialization.
- * @setup_clock: implementation specific clock configuration.
* @set_ios: handle bus specific extensions.
* @parse_dt: parse implementation specific device tree properties.
* @execute_tuning: implementation specific tuning procedure.
@@ -289,7 +288,6 @@
struct dw_mci_drv_data {
unsigned long *caps;
int (*init)(struct dw_mci *host);
- int (*setup_clock)(struct dw_mci *host);
void (*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
int (*parse_dt)(struct dw_mci *host);
int (*execute_tuning)(struct dw_mci_slot *slot, u32 opcode);
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 2e6c968..df990bb 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -226,16 +226,11 @@
unsigned long flags;
int busy = 0;
- pm_runtime_get_sync(mmc_dev(mmc));
-
spin_lock_irqsave(&host->lock, flags);
if (readl(host->base + MMCISTATUS) & MCI_ST_CARDBUSY)
busy = 1;
spin_unlock_irqrestore(&host->lock, flags);
- pm_runtime_mark_last_busy(mmc_dev(mmc));
- pm_runtime_put_autosuspend(mmc_dev(mmc));
-
return busy;
}
@@ -381,9 +376,6 @@
host->cmd = NULL;
mmc_request_done(host->mmc, mrq);
-
- pm_runtime_mark_last_busy(mmc_dev(host->mmc));
- pm_runtime_put_autosuspend(mmc_dev(host->mmc));
}
static void mmci_set_mask1(struct mmci_host *host, unsigned int mask)
@@ -1290,8 +1282,6 @@
return;
}
- pm_runtime_get_sync(mmc_dev(mmc));
-
spin_lock_irqsave(&host->lock, flags);
host->mrq = mrq;
@@ -1318,8 +1308,6 @@
unsigned long flags;
int ret;
- pm_runtime_get_sync(mmc_dev(mmc));
-
if (host->plat->ios_handler &&
host->plat->ios_handler(mmc_dev(mmc), ios))
dev_err(mmc_dev(mmc), "platform ios_handler failed\n");
@@ -1414,9 +1402,6 @@
mmci_reg_delay(host);
spin_unlock_irqrestore(&host->lock, flags);
-
- pm_runtime_mark_last_busy(mmc_dev(mmc));
- pm_runtime_put_autosuspend(mmc_dev(mmc));
}
static int mmci_get_cd(struct mmc_host *mmc)
@@ -1440,8 +1425,6 @@
if (!IS_ERR(mmc->supply.vqmmc)) {
- pm_runtime_get_sync(mmc_dev(mmc));
-
switch (ios->signal_voltage) {
case MMC_SIGNAL_VOLTAGE_330:
ret = regulator_set_voltage(mmc->supply.vqmmc,
@@ -1459,9 +1442,6 @@
if (ret)
dev_warn(mmc_dev(mmc), "Voltage switch failed\n");
-
- pm_runtime_mark_last_busy(mmc_dev(mmc));
- pm_runtime_put_autosuspend(mmc_dev(mmc));
}
return ret;
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index b17f30d..5642f71 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -736,9 +736,6 @@
if (mrq->data)
msdc_unprepare_data(host, mrq);
mmc_request_done(host->mmc, mrq);
-
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
}
/* returns true if command is fully handled; returns false otherwise */
@@ -886,8 +883,6 @@
WARN_ON(host->mrq);
host->mrq = mrq;
- pm_runtime_get_sync(host->dev);
-
if (mrq->data)
msdc_prepare_data(host, mrq);
@@ -1201,8 +1196,6 @@
struct msdc_host *host = mmc_priv(mmc);
int ret;
- pm_runtime_get_sync(host->dev);
-
msdc_set_buswidth(host, ios->bus_width);
/* Suspend/Resume will do power off/on */
@@ -1214,7 +1207,7 @@
ios->vdd);
if (ret) {
dev_err(host->dev, "Failed to set vmmc power!\n");
- goto end;
+ return;
}
}
break;
@@ -1242,10 +1235,6 @@
if (host->mclk != ios->clock || host->timing != ios->timing)
msdc_set_mclk(host, ios->timing, ios->clock);
-
-end:
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
}
static u32 test_delay_bit(u32 delay, u32 bit)
@@ -1408,19 +1397,15 @@
struct msdc_host *host = mmc_priv(mmc);
int ret;
- pm_runtime_get_sync(host->dev);
ret = msdc_tune_response(mmc, opcode);
if (ret == -EIO) {
dev_err(host->dev, "Tune response fail!\n");
- goto out;
+ return ret;
}
ret = msdc_tune_data(mmc, opcode);
if (ret == -EIO)
dev_err(host->dev, "Tune data fail!\n");
-out:
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
return ret;
}
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index b9958a1..f23d65e 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -23,7 +23,6 @@
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/of.h>
-#include <linux/omap-dma.h>
#include <linux/mmc/host.h>
#include <linux/mmc/card.h>
#include <linux/mmc/mmc.h>
@@ -1321,8 +1320,6 @@
struct omap_mmc_platform_data *pdata = pdev->dev.platform_data;
struct mmc_omap_host *host = NULL;
struct resource *res;
- dma_cap_mask_t mask;
- unsigned sig = 0;
int i, ret = 0;
int irq;
@@ -1382,29 +1379,34 @@
goto err_free_iclk;
}
- dma_cap_zero(mask);
- dma_cap_set(DMA_SLAVE, mask);
-
host->dma_tx_burst = -1;
host->dma_rx_burst = -1;
- res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "tx");
- if (res)
- sig = res->start;
- host->dma_tx = dma_request_slave_channel_compat(mask,
- omap_dma_filter_fn, &sig, &pdev->dev, "tx");
- if (!host->dma_tx)
- dev_warn(host->dev, "unable to obtain TX DMA engine channel %u\n",
- sig);
+ host->dma_tx = dma_request_chan(&pdev->dev, "tx");
+ if (IS_ERR(host->dma_tx)) {
+ ret = PTR_ERR(host->dma_tx);
+ if (ret == -EPROBE_DEFER) {
+ clk_put(host->fclk);
+ goto err_free_iclk;
+ }
- res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "rx");
- if (res)
- sig = res->start;
- host->dma_rx = dma_request_slave_channel_compat(mask,
- omap_dma_filter_fn, &sig, &pdev->dev, "rx");
- if (!host->dma_rx)
- dev_warn(host->dev, "unable to obtain RX DMA engine channel %u\n",
- sig);
+ host->dma_tx = NULL;
+ dev_warn(host->dev, "TX DMA channel request failed\n");
+ }
+
+ host->dma_rx = dma_request_chan(&pdev->dev, "rx");
+ if (IS_ERR(host->dma_rx)) {
+ ret = PTR_ERR(host->dma_rx);
+ if (ret == -EPROBE_DEFER) {
+ if (host->dma_tx)
+ dma_release_channel(host->dma_tx);
+ clk_put(host->fclk);
+ goto err_free_iclk;
+ }
+
+ host->dma_rx = NULL;
+ dev_warn(host->dev, "RX DMA channel request failed\n");
+ }
ret = request_irq(host->irq, mmc_omap_irq, 0, DRIVER_NAME, host);
if (ret)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f9ac3bb..24ebc9a 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -32,7 +32,6 @@
#include <linux/of_irq.h>
#include <linux/of_gpio.h>
#include <linux/of_device.h>
-#include <linux/omap-dmaengine.h>
#include <linux/mmc/host.h>
#include <linux/mmc/core.h>
#include <linux/mmc/mmc.h>
@@ -351,15 +350,14 @@
return 0;
}
-static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
+static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
+ int vdd)
{
- struct omap_hsmmc_host *host =
- platform_get_drvdata(to_platform_device(dev));
struct mmc_host *mmc = host->mmc;
int ret = 0;
if (mmc_pdata(host)->set_power)
- return mmc_pdata(host)->set_power(dev, power_on, vdd);
+ return mmc_pdata(host)->set_power(host->dev, power_on, vdd);
/*
* If we don't see a Vcc regulator, assume it's a fixed
@@ -369,7 +367,7 @@
return 0;
if (mmc_pdata(host)->before_set_reg)
- mmc_pdata(host)->before_set_reg(dev, power_on, vdd);
+ mmc_pdata(host)->before_set_reg(host->dev, power_on, vdd);
ret = omap_hsmmc_set_pbias(host, false, 0);
if (ret)
@@ -403,7 +401,7 @@
}
if (mmc_pdata(host)->after_set_reg)
- mmc_pdata(host)->after_set_reg(dev, power_on, vdd);
+ mmc_pdata(host)->after_set_reg(host->dev, power_on, vdd);
return 0;
@@ -968,8 +966,6 @@
return;
host->mrq = NULL;
mmc_request_done(host->mmc, mrq);
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
}
/*
@@ -1250,17 +1246,15 @@
int ret;
/* Disable the clocks */
- pm_runtime_put_sync(host->dev);
if (host->dbclk)
clk_disable_unprepare(host->dbclk);
/* Turn the power off */
- ret = omap_hsmmc_set_power(host->dev, 0, 0);
+ ret = omap_hsmmc_set_power(host, 0, 0);
/* Turn the power ON with given VDD 1.8 or 3.0v */
if (!ret)
- ret = omap_hsmmc_set_power(host->dev, 1, vdd);
- pm_runtime_get_sync(host->dev);
+ ret = omap_hsmmc_set_power(host, 1, vdd);
if (host->dbclk)
clk_prepare_enable(host->dbclk);
@@ -1368,8 +1362,6 @@
host->mrq = NULL;
mmc_request_done(host->mmc, mrq);
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
}
}
@@ -1602,7 +1594,6 @@
BUG_ON(host->req_in_progress);
BUG_ON(host->dma_ch != -1);
- pm_runtime_get_sync(host->dev);
if (host->protect_card) {
if (host->reqs_blocked < 3) {
/*
@@ -1619,8 +1610,6 @@
req->data->error = -EBADF;
req->cmd->retries = 0;
mmc_request_done(mmc, req);
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
return;
} else if (host->reqs_blocked)
host->reqs_blocked = 0;
@@ -1634,8 +1623,6 @@
req->data->error = err;
host->mrq = NULL;
mmc_request_done(mmc, req);
- pm_runtime_mark_last_busy(host->dev);
- pm_runtime_put_autosuspend(host->dev);
return;
}
if (req->sbc && !(host->flags & AUTO_CMD23)) {
@@ -1653,15 +1640,13 @@
struct omap_hsmmc_host *host = mmc_priv(mmc);
int do_send_init_stream = 0;
- pm_runtime_get_sync(host->dev);
-
if (ios->power_mode != host->power_mode) {
switch (ios->power_mode) {
case MMC_POWER_OFF:
- omap_hsmmc_set_power(host->dev, 0, 0);
+ omap_hsmmc_set_power(host, 0, 0);
break;
case MMC_POWER_UP:
- omap_hsmmc_set_power(host->dev, 1, ios->vdd);
+ omap_hsmmc_set_power(host, 1, ios->vdd);
break;
case MMC_POWER_ON:
do_send_init_stream = 1;
@@ -1698,8 +1683,6 @@
send_init_stream(host);
omap_hsmmc_set_bus_mode(host);
-
- pm_runtime_put_autosuspend(host->dev);
}
static int omap_hsmmc_get_cd(struct mmc_host *mmc)
@@ -1962,13 +1945,17 @@
static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
{
- struct omap_hsmmc_platform_data *pdata;
+ struct omap_hsmmc_platform_data *pdata, *legacy;
struct device_node *np = dev->of_node;
pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return ERR_PTR(-ENOMEM); /* out of memory */
+ legacy = dev_get_platdata(dev);
+ if (legacy && legacy->name)
+ pdata->name = legacy->name;
+
if (of_find_property(np, "ti,dual-volt", NULL))
pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
@@ -2005,8 +1992,6 @@
struct resource *res;
int ret, irq;
const struct of_device_id *match;
- dma_cap_mask_t mask;
- unsigned tx_req, rx_req;
const struct omap_mmc_of_data *data;
void __iomem *base;
@@ -2136,44 +2121,17 @@
omap_hsmmc_conf_bus_power(host);
- if (!pdev->dev.of_node) {
- res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "tx");
- if (!res) {
- dev_err(mmc_dev(host->mmc), "cannot get DMA TX channel\n");
- ret = -ENXIO;
- goto err_irq;
- }
- tx_req = res->start;
-
- res = platform_get_resource_byname(pdev, IORESOURCE_DMA, "rx");
- if (!res) {
- dev_err(mmc_dev(host->mmc), "cannot get DMA RX channel\n");
- ret = -ENXIO;
- goto err_irq;
- }
- rx_req = res->start;
- }
-
- dma_cap_zero(mask);
- dma_cap_set(DMA_SLAVE, mask);
-
- host->rx_chan =
- dma_request_slave_channel_compat(mask, omap_dma_filter_fn,
- &rx_req, &pdev->dev, "rx");
-
- if (!host->rx_chan) {
- dev_err(mmc_dev(host->mmc), "unable to obtain RX DMA engine channel\n");
- ret = -ENXIO;
+ host->rx_chan = dma_request_chan(&pdev->dev, "rx");
+ if (IS_ERR(host->rx_chan)) {
+ dev_err(mmc_dev(host->mmc), "RX DMA channel request failed\n");
+ ret = PTR_ERR(host->rx_chan);
goto err_irq;
}
- host->tx_chan =
- dma_request_slave_channel_compat(mask, omap_dma_filter_fn,
- &tx_req, &pdev->dev, "tx");
-
- if (!host->tx_chan) {
- dev_err(mmc_dev(host->mmc), "unable to obtain TX DMA engine channel\n");
- ret = -ENXIO;
+ host->tx_chan = dma_request_chan(&pdev->dev, "tx");
+ if (IS_ERR(host->tx_chan)) {
+ dev_err(mmc_dev(host->mmc), "TX DMA channel request failed\n");
+ ret = PTR_ERR(host->tx_chan);
goto err_irq;
}
@@ -2231,9 +2189,9 @@
mmc_remove_host(mmc);
err_irq:
device_init_wakeup(&pdev->dev, false);
- if (host->tx_chan)
+ if (!IS_ERR_OR_NULL(host->tx_chan))
dma_release_channel(host->tx_chan);
- if (host->rx_chan)
+ if (!IS_ERR_OR_NULL(host->rx_chan))
dma_release_channel(host->rx_chan);
pm_runtime_dont_use_autosuspend(host->dev);
pm_runtime_put_sync(host->dev);
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index bed6a49..b2d70ba 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -200,8 +200,6 @@
if (!gpio_cd)
return 0;
- pm_runtime_get_sync(mmc->parent);
-
spin_lock_irqsave(&host->lock, flags);
if (host->flags & SDHCI_DEVICE_DEAD)
@@ -211,9 +209,6 @@
out:
spin_unlock_irqrestore(&host->lock, flags);
- pm_runtime_mark_last_busy(mmc->parent);
- pm_runtime_put_autosuspend(mmc->parent);
-
return ret;
}
@@ -267,8 +262,10 @@
/* Platform specific code during sd probe slot goes here */
- if (hid && !strcmp(hid, "80865ACA"))
+ if (hid && !strcmp(hid, "80865ACA")) {
host->mmc_host_ops.get_cd = bxt_get_cd;
+ host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
+ }
return 0;
}
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 2e482b1..b6f4c1d 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -55,8 +55,32 @@
return freq;
}
+static void sdhci_arasan_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
+ bool ctrl_phy = false;
+
+ if (clock > MMC_HIGH_52_MAX_DTR && (!IS_ERR(sdhci_arasan->phy)))
+ ctrl_phy = true;
+
+ if (ctrl_phy) {
+ spin_unlock_irq(&host->lock);
+ phy_power_off(sdhci_arasan->phy);
+ spin_lock_irq(&host->lock);
+ }
+
+ sdhci_set_clock(host, clock);
+
+ if (ctrl_phy) {
+ spin_unlock_irq(&host->lock);
+ phy_power_on(sdhci_arasan->phy);
+ spin_lock_irq(&host->lock);
+ }
+}
+
static struct sdhci_ops sdhci_arasan_ops = {
- .set_clock = sdhci_set_clock,
+ .set_clock = sdhci_arasan_set_clock,
.get_max_clock = sdhci_pltfm_clk_get_max_clock,
.get_timeout_clock = sdhci_arasan_get_timeout_clock,
.set_bus_width = sdhci_set_bus_width,
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 2703aa9..25f779e 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -15,8 +15,10 @@
*/
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/err.h>
#include <linux/io.h>
+#include <linux/kernel.h>
#include <linux/mmc/host.h>
#include <linux/mmc/slot-gpio.h>
#include <linux/module.h>
@@ -31,14 +33,60 @@
#define SDMMC_CACR_CAPWREN BIT(0)
#define SDMMC_CACR_KEY (0x46 << 8)
+#define SDHCI_AT91_PRESET_COMMON_CONF 0x400 /* drv type B, programmable clock mode */
+
struct sdhci_at91_priv {
struct clk *hclock;
struct clk *gck;
struct clk *mainck;
};
+static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ u16 clk;
+ unsigned long timeout;
+
+ host->mmc->actual_clock = 0;
+
+ /*
+ * There is no requirement to disable the internal clock before
+ * changing the SD clock configuration. Moreover, disabling the
+ * internal clock, changing the configuration and re-enabling the
+ * internal clock causes some bugs. It can prevent to get the internal
+ * clock stable flag ready and an unexpected switch to the base clock
+ * when using presets.
+ */
+ clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
+ clk &= SDHCI_CLOCK_INT_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+ if (clock == 0)
+ return;
+
+ clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock);
+
+ clk |= SDHCI_CLOCK_INT_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+
+ /* Wait max 20 ms */
+ timeout = 20;
+ while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL))
+ & SDHCI_CLOCK_INT_STABLE)) {
+ if (timeout == 0) {
+ pr_err("%s: Internal clock never stabilised.\n",
+ mmc_hostname(host->mmc));
+ return;
+ }
+ timeout--;
+ mdelay(1);
+ }
+
+ clk |= SDHCI_CLOCK_CARD_EN;
+ sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
+}
+
static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
- .set_clock = sdhci_set_clock,
+ .set_clock = sdhci_at91_set_clock,
.set_bus_width = sdhci_set_bus_width,
.reset = sdhci_reset,
.set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -46,7 +94,6 @@
static const struct sdhci_pltfm_data soc_data_sama5d2 = {
.ops = &sdhci_at91_sama5d2_ops,
- .quirks2 = SDHCI_QUIRK2_NEED_DELAY_AFTER_INT_CLK_RST,
};
static const struct of_device_id sdhci_at91_dt_match[] = {
@@ -119,6 +166,7 @@
unsigned int clk_base, clk_mul;
unsigned int gck_rate, real_gck_rate;
int ret;
+ unsigned int preset_div;
match = of_match_device(sdhci_at91_dt_match, &pdev->dev);
if (!match)
@@ -186,6 +234,28 @@
clk_mul, real_gck_rate);
}
+ /*
+ * We have to set preset values because it depends on the clk_mul
+ * value. Moreover, SDR104 is supported in a degraded mode since the
+ * maximum sd clock value is 120 MHz instead of 208 MHz. For that
+ * reason, we need to use presets to support SDR104.
+ */
+ preset_div = DIV_ROUND_UP(real_gck_rate, 24000000) - 1;
+ writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+ host->ioaddr + SDHCI_PRESET_FOR_SDR12);
+ preset_div = DIV_ROUND_UP(real_gck_rate, 50000000) - 1;
+ writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+ host->ioaddr + SDHCI_PRESET_FOR_SDR25);
+ preset_div = DIV_ROUND_UP(real_gck_rate, 100000000) - 1;
+ writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+ host->ioaddr + SDHCI_PRESET_FOR_SDR50);
+ preset_div = DIV_ROUND_UP(real_gck_rate, 120000000) - 1;
+ writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+ host->ioaddr + SDHCI_PRESET_FOR_SDR104);
+ preset_div = DIV_ROUND_UP(real_gck_rate, 50000000) - 1;
+ writew(SDHCI_AT91_PRESET_COMMON_CONF | preset_div,
+ host->ioaddr + SDHCI_PRESET_FOR_DDR50);
+
clk_prepare_enable(priv->mainck);
clk_prepare_enable(priv->gck);
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 79e1901..97d4eeb 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -340,8 +340,6 @@
if (!gpio_cd)
return 0;
- pm_runtime_get_sync(mmc->parent);
-
spin_lock_irqsave(&host->lock, flags);
if (host->flags & SDHCI_DEVICE_DEAD)
@@ -351,9 +349,6 @@
out:
spin_unlock_irqrestore(&host->lock, flags);
- pm_runtime_mark_last_busy(mmc->parent);
- pm_runtime_put_autosuspend(mmc->parent);
-
return ret;
}
@@ -391,8 +386,10 @@
slot->cd_override_level = true;
if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD ||
slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXTM_SD ||
- slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD)
+ slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD) {
slot->host->mmc_host_ops.get_cd = bxt_get_cd;
+ slot->host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
+ }
return 0;
}
diff --git a/drivers/mmc/host/sdhci-pic32.c b/drivers/mmc/host/sdhci-pic32.c
index 059df70..72c13b6 100644
--- a/drivers/mmc/host/sdhci-pic32.c
+++ b/drivers/mmc/host/sdhci-pic32.c
@@ -243,7 +243,6 @@
static struct platform_driver pic32_sdhci_driver = {
.driver = {
.name = "pic32-sdhci",
- .owner = THIS_MODULE,
.of_match_table = of_match_ptr(pic32_sdhci_id_table),
},
.probe = pic32_sdhci_probe,
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index 072bb27..64f287a 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -119,16 +119,22 @@
{
struct sdhci_host *host;
struct resource *iomem;
- int ret;
+ void __iomem *ioaddr;
+ int irq, ret;
iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!iomem) {
- ret = -ENOMEM;
+ ioaddr = devm_ioremap_resource(&pdev->dev, iomem);
+ if (IS_ERR(ioaddr)) {
+ ret = PTR_ERR(ioaddr);
goto err;
}
- if (resource_size(iomem) < 0x100)
- dev_err(&pdev->dev, "Invalid iomem size!\n");
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "failed to get IRQ number\n");
+ ret = irq;
+ goto err;
+ }
host = sdhci_alloc_host(&pdev->dev,
sizeof(struct sdhci_pltfm_host) + priv_size);
@@ -138,6 +144,8 @@
goto err;
}
+ host->ioaddr = ioaddr;
+ host->irq = irq;
host->hw_name = dev_name(&pdev->dev);
if (pdata && pdata->ops)
host->ops = pdata->ops;
@@ -148,22 +156,6 @@
host->quirks2 = pdata->quirks2;
}
- host->irq = platform_get_irq(pdev, 0);
-
- if (!request_mem_region(iomem->start, resource_size(iomem),
- mmc_hostname(host->mmc))) {
- dev_err(&pdev->dev, "cannot request region\n");
- ret = -EBUSY;
- goto err_request;
- }
-
- host->ioaddr = ioremap(iomem->start, resource_size(iomem));
- if (!host->ioaddr) {
- dev_err(&pdev->dev, "failed to remap registers\n");
- ret = -ENOMEM;
- goto err_remap;
- }
-
/*
* Some platforms need to probe the controller to be able to
* determine which caps should be used.
@@ -174,11 +166,6 @@
platform_set_drvdata(pdev, host);
return host;
-
-err_remap:
- release_mem_region(iomem->start, resource_size(iomem));
-err_request:
- sdhci_free_host(host);
err:
dev_err(&pdev->dev, "%s failed %d\n", __func__, ret);
return ERR_PTR(ret);
@@ -188,10 +175,7 @@
void sdhci_pltfm_free(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
- struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- iounmap(host->ioaddr);
- release_mem_region(iomem->start, resource_size(iomem));
sdhci_free_host(host);
}
EXPORT_SYMBOL_GPL(sdhci_pltfm_free);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 6bd3d17..e010ea4 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -38,11 +38,6 @@
#define DBG(f, x...) \
pr_debug(DRIVER_NAME " [%s()]: " f, __func__,## x)
-#if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \
- defined(CONFIG_MMC_SDHCI_MODULE))
-#define SDHCI_USE_LEDS_CLASS
-#endif
-
#define MAX_TUNING_LOOP 40
static unsigned int debug_quirks = 0;
@@ -53,29 +48,7 @@
static void sdhci_finish_command(struct sdhci_host *);
static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
-static int sdhci_do_get_cd(struct sdhci_host *host);
-
-#ifdef CONFIG_PM
-static int sdhci_runtime_pm_get(struct sdhci_host *host);
-static int sdhci_runtime_pm_put(struct sdhci_host *host);
-static void sdhci_runtime_pm_bus_on(struct sdhci_host *host);
-static void sdhci_runtime_pm_bus_off(struct sdhci_host *host);
-#else
-static inline int sdhci_runtime_pm_get(struct sdhci_host *host)
-{
- return 0;
-}
-static inline int sdhci_runtime_pm_put(struct sdhci_host *host)
-{
- return 0;
-}
-static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
-{
-}
-static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
-{
-}
-#endif
+static int sdhci_get_cd(struct mmc_host *mmc);
static void sdhci_dumpregs(struct sdhci_host *host)
{
@@ -171,6 +144,22 @@
sdhci_set_card_detection(host, false);
}
+static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
+{
+ if (host->bus_on)
+ return;
+ host->bus_on = true;
+ pm_runtime_get_noresume(host->mmc->parent);
+}
+
+static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
+{
+ if (!host->bus_on)
+ return;
+ host->bus_on = false;
+ pm_runtime_put_noidle(host->mmc->parent);
+}
+
void sdhci_reset(struct sdhci_host *host, u8 mask)
{
unsigned long timeout;
@@ -204,7 +193,7 @@
static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
{
if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
- if (!sdhci_do_get_cd(host))
+ if (!sdhci_get_cd(host->mmc))
return;
}
@@ -252,7 +241,7 @@
sdhci_enable_card_detection(host);
}
-static void sdhci_activate_led(struct sdhci_host *host)
+static void __sdhci_led_activate(struct sdhci_host *host)
{
u8 ctrl;
@@ -261,7 +250,7 @@
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
}
-static void sdhci_deactivate_led(struct sdhci_host *host)
+static void __sdhci_led_deactivate(struct sdhci_host *host)
{
u8 ctrl;
@@ -270,9 +259,9 @@
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
}
-#ifdef SDHCI_USE_LEDS_CLASS
+#if IS_REACHABLE(CONFIG_LEDS_CLASS)
static void sdhci_led_control(struct led_classdev *led,
- enum led_brightness brightness)
+ enum led_brightness brightness)
{
struct sdhci_host *host = container_of(led, struct sdhci_host, led);
unsigned long flags;
@@ -283,12 +272,62 @@
goto out;
if (brightness == LED_OFF)
- sdhci_deactivate_led(host);
+ __sdhci_led_deactivate(host);
else
- sdhci_activate_led(host);
+ __sdhci_led_activate(host);
out:
spin_unlock_irqrestore(&host->lock, flags);
}
+
+static int sdhci_led_register(struct sdhci_host *host)
+{
+ struct mmc_host *mmc = host->mmc;
+
+ snprintf(host->led_name, sizeof(host->led_name),
+ "%s::", mmc_hostname(mmc));
+
+ host->led.name = host->led_name;
+ host->led.brightness = LED_OFF;
+ host->led.default_trigger = mmc_hostname(mmc);
+ host->led.brightness_set = sdhci_led_control;
+
+ return led_classdev_register(mmc_dev(mmc), &host->led);
+}
+
+static void sdhci_led_unregister(struct sdhci_host *host)
+{
+ led_classdev_unregister(&host->led);
+}
+
+static inline void sdhci_led_activate(struct sdhci_host *host)
+{
+}
+
+static inline void sdhci_led_deactivate(struct sdhci_host *host)
+{
+}
+
+#else
+
+static inline int sdhci_led_register(struct sdhci_host *host)
+{
+ return 0;
+}
+
+static inline void sdhci_led_unregister(struct sdhci_host *host)
+{
+}
+
+static inline void sdhci_led_activate(struct sdhci_host *host)
+{
+ __sdhci_led_activate(host);
+}
+
+static inline void sdhci_led_deactivate(struct sdhci_host *host)
+{
+ __sdhci_led_deactivate(host);
+}
+
#endif
/*****************************************************************************\
@@ -1091,23 +1130,14 @@
return preset;
}
-void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+u16 sdhci_calc_clk(struct sdhci_host *host, unsigned int clock,
+ unsigned int *actual_clock)
{
int div = 0; /* Initialized for compiler warning */
int real_div = div, clk_mul = 1;
u16 clk = 0;
- unsigned long timeout;
bool switch_base_clk = false;
- host->mmc->actual_clock = 0;
-
- sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
- if (host->quirks2 & SDHCI_QUIRK2_NEED_DELAY_AFTER_INT_CLK_RST)
- mdelay(1);
-
- if (clock == 0)
- return;
-
if (host->version >= SDHCI_SPEC_300) {
if (host->preset_enabled) {
u16 pre_val;
@@ -1184,10 +1214,29 @@
clock_set:
if (real_div)
- host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div;
+ *actual_clock = (host->max_clk * clk_mul) / real_div;
clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
<< SDHCI_DIVIDER_HI_SHIFT;
+
+ return clk;
+}
+EXPORT_SYMBOL_GPL(sdhci_calc_clk);
+
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+{
+ u16 clk;
+ unsigned long timeout;
+
+ host->mmc->actual_clock = 0;
+
+ sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
+
+ if (clock == 0)
+ return;
+
+ clk = sdhci_calc_clk(host, clock, &host->mmc->actual_clock);
+
clk |= SDHCI_CLOCK_INT_EN;
sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
@@ -1319,8 +1368,6 @@
host = mmc_priv(mmc);
- sdhci_runtime_pm_get(host);
-
/* Firstly check card presence */
present = mmc->ops->get_cd(mmc);
@@ -1328,9 +1375,7 @@
WARN_ON(host->mrq != NULL);
-#ifndef SDHCI_USE_LEDS_CLASS
- sdhci_activate_led(host);
-#endif
+ sdhci_led_activate(host);
/*
* Ensure we don't send the STOP for non-SET_BLOCK_COUNTED
@@ -1405,11 +1450,11 @@
}
EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
-static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
+static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
+ struct sdhci_host *host = mmc_priv(mmc);
unsigned long flags;
u8 ctrl;
- struct mmc_host *mmc = host->mmc;
spin_lock_irqsave(&host->lock, flags);
@@ -1563,18 +1608,10 @@
spin_unlock_irqrestore(&host->lock, flags);
}
-static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+static int sdhci_get_cd(struct mmc_host *mmc)
{
struct sdhci_host *host = mmc_priv(mmc);
-
- sdhci_runtime_pm_get(host);
- sdhci_do_set_ios(host, ios);
- sdhci_runtime_pm_put(host);
-}
-
-static int sdhci_do_get_cd(struct sdhci_host *host)
-{
- int gpio_cd = mmc_gpio_get_cd(host->mmc);
+ int gpio_cd = mmc_gpio_get_cd(mmc);
if (host->flags & SDHCI_DEVICE_DEAD)
return 0;
@@ -1598,17 +1635,6 @@
return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
}
-static int sdhci_get_cd(struct mmc_host *mmc)
-{
- struct sdhci_host *host = mmc_priv(mmc);
- int ret;
-
- sdhci_runtime_pm_get(host);
- ret = sdhci_do_get_cd(host);
- sdhci_runtime_pm_put(host);
- return ret;
-}
-
static int sdhci_check_ro(struct sdhci_host *host)
{
unsigned long flags;
@@ -1633,8 +1659,9 @@
#define SAMPLE_COUNT 5
-static int sdhci_do_get_ro(struct sdhci_host *host)
+static int sdhci_get_ro(struct mmc_host *mmc)
{
+ struct sdhci_host *host = mmc_priv(mmc);
int i, ro_count;
if (!(host->quirks & SDHCI_QUIRK_UNSTABLE_RO_DETECT))
@@ -1659,17 +1686,6 @@
host->ops->hw_reset(host);
}
-static int sdhci_get_ro(struct mmc_host *mmc)
-{
- struct sdhci_host *host = mmc_priv(mmc);
- int ret;
-
- sdhci_runtime_pm_get(host);
- ret = sdhci_do_get_ro(host);
- sdhci_runtime_pm_put(host);
- return ret;
-}
-
static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
{
if (!(host->flags & SDHCI_DEVICE_DEAD)) {
@@ -1689,8 +1705,6 @@
struct sdhci_host *host = mmc_priv(mmc);
unsigned long flags;
- sdhci_runtime_pm_get(host);
-
spin_lock_irqsave(&host->lock, flags);
if (enable)
host->flags |= SDHCI_SDIO_IRQ_ENABLED;
@@ -1699,14 +1713,12 @@
sdhci_enable_sdio_irq_nolock(host, enable);
spin_unlock_irqrestore(&host->lock, flags);
-
- sdhci_runtime_pm_put(host);
}
-static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
- struct mmc_ios *ios)
+static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
+ struct mmc_ios *ios)
{
- struct mmc_host *mmc = host->mmc;
+ struct sdhci_host *host = mmc_priv(mmc);
u16 ctrl;
int ret;
@@ -1794,29 +1806,13 @@
}
}
-static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
- struct mmc_ios *ios)
-{
- struct sdhci_host *host = mmc_priv(mmc);
- int err;
-
- if (host->version < SDHCI_SPEC_300)
- return 0;
- sdhci_runtime_pm_get(host);
- err = sdhci_do_start_signal_voltage_switch(host, ios);
- sdhci_runtime_pm_put(host);
- return err;
-}
-
static int sdhci_card_busy(struct mmc_host *mmc)
{
struct sdhci_host *host = mmc_priv(mmc);
u32 present_state;
- sdhci_runtime_pm_get(host);
/* Check whether DAT[3:0] is 0000 */
present_state = sdhci_readl(host, SDHCI_PRESENT_STATE);
- sdhci_runtime_pm_put(host);
return !(present_state & SDHCI_DATA_LVL_MASK);
}
@@ -1843,7 +1839,6 @@
unsigned int tuning_count = 0;
bool hs400_tuning;
- sdhci_runtime_pm_get(host);
spin_lock_irqsave(&host->lock, flags);
hs400_tuning = host->flags & SDHCI_HS400_TUNING;
@@ -1879,8 +1874,7 @@
break;
case MMC_TIMING_UHS_SDR50:
- if (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
- host->flags & SDHCI_SDR104_NEEDS_TUNING)
+ if (host->flags & SDHCI_SDR50_NEEDS_TUNING)
break;
/* FALLTHROUGH */
@@ -1891,7 +1885,6 @@
if (host->ops->platform_execute_tuning) {
spin_unlock_irqrestore(&host->lock, flags);
err = host->ops->platform_execute_tuning(host, opcode);
- sdhci_runtime_pm_put(host);
return err;
}
@@ -2023,8 +2016,6 @@
sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
out_unlock:
spin_unlock_irqrestore(&host->lock, flags);
- sdhci_runtime_pm_put(host);
-
return err;
}
@@ -2105,7 +2096,7 @@
if (host->ops->card_event)
host->ops->card_event(host);
- present = sdhci_do_get_cd(host);
+ present = sdhci_get_cd(host->mmc);
spin_lock_irqsave(&host->lock, flags);
@@ -2214,15 +2205,12 @@
host->cmd = NULL;
host->data = NULL;
-#ifndef SDHCI_USE_LEDS_CLASS
- sdhci_deactivate_led(host);
-#endif
+ sdhci_led_deactivate(host);
mmiowb();
spin_unlock_irqrestore(&host->lock, flags);
mmc_request_done(host->mmc, mrq);
- sdhci_runtime_pm_put(host);
}
static void sdhci_timeout_timer(unsigned long data)
@@ -2679,7 +2667,7 @@
sdhci_init(host, 0);
host->pwr = 0;
host->clock = 0;
- sdhci_do_set_ios(host, &host->mmc->ios);
+ sdhci_set_ios(host->mmc, &host->mmc->ios);
} else {
sdhci_init(host, (host->mmc->pm_flags & MMC_PM_KEEP_POWER));
mmiowb();
@@ -2703,33 +2691,6 @@
EXPORT_SYMBOL_GPL(sdhci_resume_host);
-static int sdhci_runtime_pm_get(struct sdhci_host *host)
-{
- return pm_runtime_get_sync(host->mmc->parent);
-}
-
-static int sdhci_runtime_pm_put(struct sdhci_host *host)
-{
- pm_runtime_mark_last_busy(host->mmc->parent);
- return pm_runtime_put_autosuspend(host->mmc->parent);
-}
-
-static void sdhci_runtime_pm_bus_on(struct sdhci_host *host)
-{
- if (host->bus_on)
- return;
- host->bus_on = true;
- pm_runtime_get_noresume(host->mmc->parent);
-}
-
-static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
-{
- if (!host->bus_on)
- return;
- host->bus_on = false;
- pm_runtime_put_noidle(host->mmc->parent);
-}
-
int sdhci_runtime_suspend_host(struct sdhci_host *host)
{
unsigned long flags;
@@ -2768,8 +2729,8 @@
/* Force clock and power re-program */
host->pwr = 0;
host->clock = 0;
- sdhci_do_start_signal_voltage_switch(host, &host->mmc->ios);
- sdhci_do_set_ios(host, &host->mmc->ios);
+ sdhci_start_signal_voltage_switch(host->mmc, &host->mmc->ios);
+ sdhci_set_ios(host->mmc, &host->mmc->ios);
if ((host_flags & SDHCI_PV_ENABLED) &&
!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN)) {
@@ -3014,7 +2975,8 @@
if (!host->ops->get_max_clock) {
pr_err("%s: Hardware doesn't specify base clock frequency.\n",
mmc_hostname(mmc));
- return -ENODEV;
+ ret = -ENODEV;
+ goto undma;
}
host->max_clk = host->ops->get_max_clock(host);
}
@@ -3051,7 +3013,7 @@
} else
mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
- if (!mmc->f_max || (mmc->f_max && (mmc->f_max > max_clk)))
+ if (!mmc->f_max || mmc->f_max > max_clk)
mmc->f_max = max_clk;
if (!(host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) {
@@ -3064,7 +3026,8 @@
} else {
pr_err("%s: Hardware doesn't specify timeout clock frequency.\n",
mmc_hostname(mmc));
- return -ENODEV;
+ ret = -ENODEV;
+ goto undma;
}
}
@@ -3118,8 +3081,9 @@
mmc->caps |= MMC_CAP_NEEDS_POLL;
/* If there are external regulators, get them */
- if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ ret = mmc_regulator_get_supply(mmc);
+ if (ret == -EPROBE_DEFER)
+ goto undma;
/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
if (!IS_ERR(mmc->supply.vqmmc)) {
@@ -3174,10 +3138,6 @@
if (caps[1] & SDHCI_USE_SDR50_TUNING)
host->flags |= SDHCI_SDR50_NEEDS_TUNING;
- /* Does the host need tuning for SDR104 / HS200? */
- if (mmc->caps2 & MMC_CAP2_HS200)
- host->flags |= SDHCI_SDR104_NEEDS_TUNING;
-
/* Driver Type(s) (A, C, D) supported by the host */
if (caps[1] & SDHCI_DRIVER_TYPE_A)
mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
@@ -3276,7 +3236,8 @@
if (mmc->ocr_avail == 0) {
pr_err("%s: Hardware doesn't report any support voltages.\n",
mmc_hostname(mmc));
- return -ENODEV;
+ ret = -ENODEV;
+ goto unreg;
}
spin_lock_init(&host->lock);
@@ -3360,25 +3321,18 @@
sdhci_dumpregs(host);
#endif
-#ifdef SDHCI_USE_LEDS_CLASS
- snprintf(host->led_name, sizeof(host->led_name),
- "%s::", mmc_hostname(mmc));
- host->led.name = host->led_name;
- host->led.brightness = LED_OFF;
- host->led.default_trigger = mmc_hostname(mmc);
- host->led.brightness_set = sdhci_led_control;
-
- ret = led_classdev_register(mmc_dev(mmc), &host->led);
+ ret = sdhci_led_register(host);
if (ret) {
pr_err("%s: Failed to register LED device: %d\n",
mmc_hostname(mmc), ret);
- goto reset;
+ goto unirq;
}
-#endif
mmiowb();
- mmc_add_host(mmc);
+ ret = mmc_add_host(mmc);
+ if (ret)
+ goto unled;
pr_info("%s: SDHCI controller on %s [%s] using %s\n",
mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
@@ -3390,15 +3344,25 @@
return 0;
-#ifdef SDHCI_USE_LEDS_CLASS
-reset:
+unled:
+ sdhci_led_unregister(host);
+unirq:
sdhci_do_reset(host, SDHCI_RESET_ALL);
sdhci_writel(host, 0, SDHCI_INT_ENABLE);
sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
free_irq(host->irq, host);
-#endif
untasklet:
tasklet_kill(&host->finish_tasklet);
+unreg:
+ if (!IS_ERR(mmc->supply.vqmmc))
+ regulator_disable(mmc->supply.vqmmc);
+undma:
+ if (host->align_buffer)
+ dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
+ host->adma_table_sz, host->align_buffer,
+ host->align_addr);
+ host->adma_table = NULL;
+ host->align_buffer = NULL;
return ret;
}
@@ -3430,9 +3394,7 @@
mmc_remove_host(mmc);
-#ifdef SDHCI_USE_LEDS_CLASS
- led_classdev_unregister(&host->led);
-#endif
+ sdhci_led_unregister(host);
if (!dead)
sdhci_do_reset(host, SDHCI_RESET_ALL);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0f39f4f..609f87c 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -417,11 +417,6 @@
#define SDHCI_QUIRK2_ACMD23_BROKEN (1<<14)
/* Broken Clock divider zero in controller */
#define SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN (1<<15)
-/*
- * When internal clock is disabled, a delay is needed before modifying the
- * SD clock frequency or enabling back the internal clock.
- */
-#define SDHCI_QUIRK2_NEED_DELAY_AFTER_INT_CLK_RST (1<<16)
int irq; /* Device IRQ */
void __iomem *ioaddr; /* Mapped address */
@@ -433,7 +428,7 @@
struct mmc_host_ops mmc_host_ops; /* MMC host ops */
u64 dma_mask; /* custom DMA mask */
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#if IS_ENABLED(CONFIG_LEDS_CLASS)
struct led_classdev led; /* LED control */
char led_name[32];
#endif
@@ -450,7 +445,6 @@
#define SDHCI_AUTO_CMD23 (1<<7) /* Auto CMD23 support */
#define SDHCI_PV_ENABLED (1<<8) /* Preset value enabled */
#define SDHCI_SDIO_IRQ_ENABLED (1<<9) /* SDIO irq enabled */
-#define SDHCI_SDR104_NEEDS_TUNING (1<<10) /* SDR104/HS200 needs tuning */
#define SDHCI_USE_64_BIT_DMA (1<<12) /* Use 64-bit DMA */
#define SDHCI_HS400_TUNING (1<<13) /* Tuning for HS400 */
@@ -661,6 +655,8 @@
return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED);
}
+u16 sdhci_calc_clk(struct sdhci_host *host, unsigned int clock,
+ unsigned int *actual_clock);
void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
unsigned short vdd);
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index d9a655f..dd64b86 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -248,7 +248,6 @@
int sg_idx;
int sg_blkidx;
bool power;
- bool card_present;
bool ccs_enable; /* Command Completion Signal support */
bool clk_ctrl2_enable;
struct mutex thread_lock;
@@ -1064,16 +1063,6 @@
host->mmc->f_max, host->mmc->f_min);
}
-static void sh_mmcif_set_power(struct sh_mmcif_host *host, struct mmc_ios *ios)
-{
- struct mmc_host *mmc = host->mmc;
-
- if (!IS_ERR(mmc->supply.vmmc))
- /* Errors ignored... */
- mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
- ios->power_mode ? ios->vdd : 0);
-}
-
static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
{
struct sh_mmcif_host *host = mmc_priv(mmc);
@@ -1091,42 +1080,32 @@
host->state = STATE_IOS;
spin_unlock_irqrestore(&host->lock, flags);
- if (ios->power_mode == MMC_POWER_UP) {
- if (!host->card_present) {
- /* See if we also get DMA */
- sh_mmcif_request_dma(host);
- host->card_present = true;
- }
- sh_mmcif_set_power(host, ios);
- } else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
- /* clock stop */
- sh_mmcif_clock_control(host, 0);
- if (ios->power_mode == MMC_POWER_OFF) {
- if (host->card_present) {
- sh_mmcif_release_dma(host);
- host->card_present = false;
- }
- }
- if (host->power) {
- pm_runtime_put_sync(dev);
- clk_disable_unprepare(host->clk);
- host->power = false;
- if (ios->power_mode == MMC_POWER_OFF)
- sh_mmcif_set_power(host, ios);
- }
- host->state = STATE_IDLE;
- return;
- }
-
- if (ios->clock) {
+ switch (ios->power_mode) {
+ case MMC_POWER_UP:
+ if (!IS_ERR(mmc->supply.vmmc))
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
if (!host->power) {
clk_prepare_enable(host->clk);
-
pm_runtime_get_sync(dev);
- host->power = true;
sh_mmcif_sync_reset(host);
+ sh_mmcif_request_dma(host);
+ host->power = true;
}
+ break;
+ case MMC_POWER_OFF:
+ if (!IS_ERR(mmc->supply.vmmc))
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+ if (host->power) {
+ sh_mmcif_clock_control(host, 0);
+ sh_mmcif_release_dma(host);
+ pm_runtime_put(dev);
+ clk_disable_unprepare(host->clk);
+ host->power = false;
+ }
+ break;
+ case MMC_POWER_ON:
sh_mmcif_clock_control(host, ios->clock);
+ break;
}
host->timing = ios->timing;
@@ -1519,23 +1498,23 @@
platform_set_drvdata(pdev, host);
- pm_runtime_enable(dev);
- host->power = false;
-
host->clk = devm_clk_get(dev, NULL);
if (IS_ERR(host->clk)) {
ret = PTR_ERR(host->clk);
dev_err(dev, "cannot get clock: %d\n", ret);
- goto err_pm;
+ goto err_host;
}
ret = clk_prepare_enable(host->clk);
if (ret < 0)
- goto err_pm;
+ goto err_host;
sh_mmcif_clk_setup(host);
- ret = pm_runtime_resume(dev);
+ pm_runtime_enable(dev);
+ host->power = false;
+
+ ret = pm_runtime_get_sync(dev);
if (ret < 0)
goto err_clk;
@@ -1579,12 +1558,13 @@
sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff,
clk_get_rate(host->clk) / 1000000UL);
+ pm_runtime_put(dev);
clk_disable_unprepare(host->clk);
return ret;
err_clk:
clk_disable_unprepare(host->clk);
-err_pm:
+ pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
err_host:
mmc_free_host(mmc);
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 9aa1479..f750f94 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -28,10 +28,12 @@
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
#include <linux/mfd/tmio.h>
#include <linux/sh_dma.h>
#include <linux/delay.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/pinctrl/pinctrl-state.h>
+#include <linux/regulator/consumer.h>
#include "tmio_mmc.h"
@@ -48,10 +50,8 @@
unsigned bus_shift;
};
-static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
- {
- .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT,
- },
+static const struct sh_mobile_sdhi_of_data of_default_cfg = {
+ .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT,
};
static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = {
@@ -62,7 +62,7 @@
static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
- TMIO_MMC_CLK_ACTUAL | TMIO_MMC_FAST_CLK_CHG,
+ TMIO_MMC_CLK_ACTUAL | TMIO_MMC_MIN_RCAR2,
.capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
.dma_buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES,
.dma_rx_offset = 0x2000,
@@ -70,17 +70,16 @@
static const struct sh_mobile_sdhi_of_data of_rcar_gen3_compatible = {
.tmio_flags = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
- TMIO_MMC_CLK_ACTUAL | TMIO_MMC_FAST_CLK_CHG,
- .capabilities = MMC_CAP_SD_HIGHSPEED,
+ TMIO_MMC_CLK_ACTUAL | TMIO_MMC_MIN_RCAR2,
+ .capabilities = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
.bus_shift = 2,
};
static const struct of_device_id sh_mobile_sdhi_of_match[] = {
{ .compatible = "renesas,sdhi-shmobile" },
- { .compatible = "renesas,sdhi-sh7372" },
- { .compatible = "renesas,sdhi-sh73a0", .data = &sh_mobile_sdhi_of_cfg[0], },
- { .compatible = "renesas,sdhi-r8a73a4", .data = &sh_mobile_sdhi_of_cfg[0], },
- { .compatible = "renesas,sdhi-r8a7740", .data = &sh_mobile_sdhi_of_cfg[0], },
+ { .compatible = "renesas,sdhi-sh73a0", .data = &of_default_cfg, },
+ { .compatible = "renesas,sdhi-r8a73a4", .data = &of_default_cfg, },
+ { .compatible = "renesas,sdhi-r8a7740", .data = &of_default_cfg, },
{ .compatible = "renesas,sdhi-r8a7778", .data = &of_rcar_gen1_compatible, },
{ .compatible = "renesas,sdhi-r8a7779", .data = &of_rcar_gen1_compatible, },
{ .compatible = "renesas,sdhi-r8a7790", .data = &of_rcar_gen2_compatible, },
@@ -97,6 +96,8 @@
struct clk *clk;
struct tmio_mmc_data mmc_data;
struct tmio_mmc_dma dma_priv;
+ struct pinctrl *pinctrl;
+ struct pinctrl_state *pins_default, *pins_uhs;
};
static void sh_mobile_sdhi_sdbuf_width(struct tmio_mmc_host *host, int width)
@@ -131,16 +132,28 @@
sd_ctrl_write16(host, EXT_ACC, val);
}
-static int sh_mobile_sdhi_clk_enable(struct platform_device *pdev, unsigned int *f)
+static int sh_mobile_sdhi_clk_enable(struct tmio_mmc_host *host)
{
- struct mmc_host *mmc = platform_get_drvdata(pdev);
- struct tmio_mmc_host *host = mmc_priv(mmc);
+ struct mmc_host *mmc = host->mmc;
struct sh_mobile_sdhi *priv = host_to_priv(host);
int ret = clk_prepare_enable(priv->clk);
if (ret < 0)
return ret;
- *f = clk_get_rate(priv->clk);
+ /*
+ * The clock driver may not know what maximum frequency
+ * actually works, so it should be set with the max-frequency
+ * property which will already have been read to f_max. If it
+ * was missing, assume the current frequency is the maximum.
+ */
+ if (!mmc->f_max)
+ mmc->f_max = clk_get_rate(priv->clk);
+
+ /*
+ * Minimum frequency is the minimum input clock frequency
+ * divided by our maximum divider.
+ */
+ mmc->f_min = max(clk_round_rate(priv->clk, 1) / 512, 1L);
/* enable 16bit data access on SDBUF as default */
sh_mobile_sdhi_sdbuf_width(host, 16);
@@ -148,19 +161,92 @@
return 0;
}
-static void sh_mobile_sdhi_clk_disable(struct platform_device *pdev)
+static unsigned int sh_mobile_sdhi_clk_update(struct tmio_mmc_host *host,
+ unsigned int new_clock)
{
- struct mmc_host *mmc = platform_get_drvdata(pdev);
+ struct sh_mobile_sdhi *priv = host_to_priv(host);
+ unsigned int freq, diff, best_freq = 0, diff_min = ~0;
+ int i, ret;
+
+ /* tested only on RCar Gen2+ currently; may work for others */
+ if (!(host->pdata->flags & TMIO_MMC_MIN_RCAR2))
+ return clk_get_rate(priv->clk);
+
+ /*
+ * We want the bus clock to be as close as possible to, but no
+ * greater than, new_clock. As we can divide by 1 << i for
+ * any i in [0, 9] we want the input clock to be as close as
+ * possible, but no greater than, new_clock << i.
+ */
+ for (i = min(9, ilog2(UINT_MAX / new_clock)); i >= 0; i--) {
+ freq = clk_round_rate(priv->clk, new_clock << i);
+ if (freq > (new_clock << i)) {
+ /* Too fast; look for a slightly slower option */
+ freq = clk_round_rate(priv->clk,
+ (new_clock << i) / 4 * 3);
+ if (freq > (new_clock << i))
+ continue;
+ }
+
+ diff = new_clock - (freq >> i);
+ if (diff <= diff_min) {
+ best_freq = freq;
+ diff_min = diff;
+ }
+ }
+
+ ret = clk_set_rate(priv->clk, best_freq);
+
+ return ret == 0 ? best_freq : clk_get_rate(priv->clk);
+}
+
+static void sh_mobile_sdhi_clk_disable(struct tmio_mmc_host *host)
+{
+ struct sh_mobile_sdhi *priv = host_to_priv(host);
+
+ clk_disable_unprepare(priv->clk);
+}
+
+static int sh_mobile_sdhi_start_signal_voltage_switch(struct mmc_host *mmc,
+ struct mmc_ios *ios)
+{
struct tmio_mmc_host *host = mmc_priv(mmc);
struct sh_mobile_sdhi *priv = host_to_priv(host);
- clk_disable_unprepare(priv->clk);
+ struct pinctrl_state *pin_state;
+ int ret;
+
+ switch (ios->signal_voltage) {
+ case MMC_SIGNAL_VOLTAGE_330:
+ pin_state = priv->pins_default;
+ break;
+ case MMC_SIGNAL_VOLTAGE_180:
+ pin_state = priv->pins_uhs;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * If anything is missing, assume signal voltage is fixed at
+ * 3.3V and succeed/fail accordingly.
+ */
+ if (IS_ERR(priv->pinctrl) || IS_ERR(pin_state))
+ return ios->signal_voltage ==
+ MMC_SIGNAL_VOLTAGE_330 ? 0 : -EINVAL;
+
+ ret = mmc_regulator_set_vqmmc(host->mmc, ios);
+ if (ret)
+ return ret;
+
+ return pinctrl_select_state(priv->pinctrl, pin_state);
}
static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host)
{
int timeout = 1000;
- while (--timeout && !(sd_ctrl_read16(host, CTL_STATUS2) & (1 << 13)))
+ while (--timeout && !(sd_ctrl_read16_and_16_as_32(host, CTL_STATUS)
+ & TMIO_STAT_SCLKDIVEN))
udelay(1);
if (!timeout) {
@@ -226,7 +312,6 @@
struct tmio_mmc_host *host;
struct resource *res;
int irq, ret, i = 0;
- bool multiplexed_isr = true;
struct tmio_mmc_dma *dma_priv;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -247,6 +332,14 @@
goto eprobe;
}
+ priv->pinctrl = devm_pinctrl_get(&pdev->dev);
+ if (!IS_ERR(priv->pinctrl)) {
+ priv->pins_default = pinctrl_lookup_state(priv->pinctrl,
+ PINCTRL_STATE_DEFAULT);
+ priv->pins_uhs = pinctrl_lookup_state(priv->pinctrl,
+ "state_uhs");
+ }
+
host = tmio_mmc_host_alloc(pdev);
if (!host) {
ret = -ENOMEM;
@@ -267,8 +360,10 @@
host->dma = dma_priv;
host->write16_hook = sh_mobile_sdhi_write16_hook;
host->clk_enable = sh_mobile_sdhi_clk_enable;
+ host->clk_update = sh_mobile_sdhi_clk_update;
host->clk_disable = sh_mobile_sdhi_clk_disable;
host->multi_io_quirk = sh_mobile_sdhi_multi_io_quirk;
+ host->start_signal_voltage_switch = sh_mobile_sdhi_start_signal_voltage_switch;
/* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */
if (!host->bus_shift && resource_size(res) > 0x100) /* old way to determine the shift */
@@ -308,63 +403,24 @@
if (ret < 0)
goto efree;
- /*
- * Allow one or more specific (named) ISRs or
- * one or more multiplexed (un-named) ISRs.
- */
-
- irq = platform_get_irq_byname(pdev, SH_MOBILE_SDHI_IRQ_CARD_DETECT);
- if (irq >= 0) {
- multiplexed_isr = false;
- ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_card_detect_irq, 0,
+ while (1) {
+ irq = platform_get_irq(pdev, i);
+ if (irq < 0)
+ break;
+ i++;
+ ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
dev_name(&pdev->dev), host);
if (ret)
goto eirq;
}
- irq = platform_get_irq_byname(pdev, SH_MOBILE_SDHI_IRQ_SDIO);
- if (irq >= 0) {
- multiplexed_isr = false;
- ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_sdio_irq, 0,
- dev_name(&pdev->dev), host);
- if (ret)
- goto eirq;
- }
-
- irq = platform_get_irq_byname(pdev, SH_MOBILE_SDHI_IRQ_SDCARD);
- if (irq >= 0) {
- multiplexed_isr = false;
- ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_sdcard_irq, 0,
- dev_name(&pdev->dev), host);
- if (ret)
- goto eirq;
- } else if (!multiplexed_isr) {
- dev_err(&pdev->dev,
- "Principal SD-card IRQ is missing among named interrupts\n");
+ /* There must be at least one IRQ source */
+ if (!i) {
ret = irq;
goto eirq;
}
- if (multiplexed_isr) {
- while (1) {
- irq = platform_get_irq(pdev, i);
- if (irq < 0)
- break;
- i++;
- ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
- dev_name(&pdev->dev), host);
- if (ret)
- goto eirq;
- }
-
- /* There must be at least one IRQ source */
- if (!i) {
- ret = irq;
- goto eirq;
- }
- }
-
- dev_info(&pdev->dev, "%s base at 0x%08lx clock rate %u MHz\n",
+ dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n",
mmc_hostname(host->mmc), (unsigned long)
(platform_get_resource(pdev, IORESOURCE_MEM, 0)->start),
host->mmc->f_max / 1000000);
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 4a597f5a..1aac2ad 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -1,6 +1,8 @@
/*
* linux/drivers/mmc/host/tmio_mmc.h
*
+ * Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ * Copyright (C) 2015-16 Renesas Electronics Corporation
* Copyright (C) 2007 Ian Molton
* Copyright (C) 2004 Ian Molton
*
@@ -18,12 +20,67 @@
#include <linux/dmaengine.h>
#include <linux/highmem.h>
-#include <linux/mmc/tmio.h>
#include <linux/mutex.h>
#include <linux/pagemap.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
+#define CTL_SD_CMD 0x00
+#define CTL_ARG_REG 0x04
+#define CTL_STOP_INTERNAL_ACTION 0x08
+#define CTL_XFER_BLK_COUNT 0xa
+#define CTL_RESPONSE 0x0c
+/* driver merges STATUS and following STATUS2 */
+#define CTL_STATUS 0x1c
+/* driver merges IRQ_MASK and following IRQ_MASK2 */
+#define CTL_IRQ_MASK 0x20
+#define CTL_SD_CARD_CLK_CTL 0x24
+#define CTL_SD_XFER_LEN 0x26
+#define CTL_SD_MEM_CARD_OPT 0x28
+#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
+#define CTL_SD_DATA_PORT 0x30
+#define CTL_TRANSACTION_CTL 0x34
+#define CTL_SDIO_STATUS 0x36
+#define CTL_SDIO_IRQ_MASK 0x38
+#define CTL_DMA_ENABLE 0xd8
+#define CTL_RESET_SD 0xe0
+#define CTL_VERSION 0xe2
+#define CTL_SDIO_REGS 0x100
+#define CTL_CLK_AND_WAIT_CTL 0x138
+#define CTL_RESET_SDIO 0x1e0
+
+/* Definitions for values the CTRL_STATUS register can take. */
+#define TMIO_STAT_CMDRESPEND BIT(0)
+#define TMIO_STAT_DATAEND BIT(2)
+#define TMIO_STAT_CARD_REMOVE BIT(3)
+#define TMIO_STAT_CARD_INSERT BIT(4)
+#define TMIO_STAT_SIGSTATE BIT(5)
+#define TMIO_STAT_WRPROTECT BIT(7)
+#define TMIO_STAT_CARD_REMOVE_A BIT(8)
+#define TMIO_STAT_CARD_INSERT_A BIT(9)
+#define TMIO_STAT_SIGSTATE_A BIT(10)
+
+/* These belong technically to CTRL_STATUS2, but the driver merges them */
+#define TMIO_STAT_CMD_IDX_ERR BIT(16)
+#define TMIO_STAT_CRCFAIL BIT(17)
+#define TMIO_STAT_STOPBIT_ERR BIT(18)
+#define TMIO_STAT_DATATIMEOUT BIT(19)
+#define TMIO_STAT_RXOVERFLOW BIT(20)
+#define TMIO_STAT_TXUNDERRUN BIT(21)
+#define TMIO_STAT_CMDTIMEOUT BIT(22)
+#define TMIO_STAT_DAT0 BIT(23) /* only known on R-Car so far */
+#define TMIO_STAT_RXRDY BIT(24)
+#define TMIO_STAT_TXRQ BIT(25)
+#define TMIO_STAT_ILL_FUNC BIT(29) /* only when !TMIO_MMC_HAS_IDLE_WAIT */
+#define TMIO_STAT_SCLKDIVEN BIT(29) /* only when TMIO_MMC_HAS_IDLE_WAIT */
+#define TMIO_STAT_CMD_BUSY BIT(30)
+#define TMIO_STAT_ILL_ACCESS BIT(31)
+
+#define CLK_CTL_DIV_MASK 0xff
+#define CLK_CTL_SCLKEN BIT(8)
+
+#define TMIO_BBS 512 /* Boot block size */
+
/* Definitions for values the CTRL_SDIO_STATUS register can take. */
#define TMIO_SDIO_STAT_IOIRQ 0x0001
#define TMIO_SDIO_STAT_EXPUB52 0x4000
@@ -95,10 +152,14 @@
bool sdio_irq_enabled;
int (*write16_hook)(struct tmio_mmc_host *host, int addr);
- int (*clk_enable)(struct platform_device *pdev, unsigned int *f);
- void (*clk_disable)(struct platform_device *pdev);
+ int (*clk_enable)(struct tmio_mmc_host *host);
+ unsigned int (*clk_update)(struct tmio_mmc_host *host,
+ unsigned int new_clock);
+ void (*clk_disable)(struct tmio_mmc_host *host);
int (*multi_io_quirk)(struct mmc_card *card,
unsigned int direction, int blk_size);
+ int (*start_signal_voltage_switch)(struct mmc_host *mmc,
+ struct mmc_ios *ios);
};
struct tmio_mmc_host *tmio_mmc_host_alloc(struct platform_device *pdev);
@@ -111,9 +172,6 @@
void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i);
void tmio_mmc_disable_mmc_irqs(struct tmio_mmc_host *host, u32 i);
irqreturn_t tmio_mmc_irq(int irq, void *devid);
-irqreturn_t tmio_mmc_sdcard_irq(int irq, void *devid);
-irqreturn_t tmio_mmc_card_detect_irq(int irq, void *devid);
-irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid);
static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg,
unsigned long *flags)
@@ -177,7 +235,7 @@
readsw(host->ctl + (addr << host->bus_shift), buf, count);
}
-static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+static inline u32 sd_ctrl_read16_and_16_as_32(struct tmio_mmc_host *host, int addr)
{
return readw(host->ctl + (addr << host->bus_shift)) |
readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
@@ -199,11 +257,10 @@
writesw(host->ctl + (addr << host->bus_shift), buf, count);
}
-static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
+static inline void sd_ctrl_write32_as_16_and_16(struct tmio_mmc_host *host, int addr, u32 val)
{
writew(val, host->ctl + (addr << host->bus_shift));
writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
}
-
#endif
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 7fb0c03..fa8a936 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -15,7 +15,6 @@
#include <linux/dmaengine.h>
#include <linux/mfd/tmio.h>
#include <linux/mmc/host.h>
-#include <linux/mmc/tmio.h>
#include <linux/pagemap.h>
#include <linux/scatterlist.h>
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 0521b46..f44e2ab 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -39,7 +39,6 @@
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
#include <linux/mmc/slot-gpio.h>
-#include <linux/mmc/tmio.h>
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/platform_device.h>
@@ -56,18 +55,18 @@
void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i)
{
host->sdcard_irq_mask &= ~(i & TMIO_MASK_IRQ);
- sd_ctrl_write32(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
+ sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
}
void tmio_mmc_disable_mmc_irqs(struct tmio_mmc_host *host, u32 i)
{
host->sdcard_irq_mask |= (i & TMIO_MASK_IRQ);
- sd_ctrl_write32(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
+ sd_ctrl_write32_as_16_and_16(host, CTL_IRQ_MASK, host->sdcard_irq_mask);
}
static void tmio_mmc_ack_mmc_irqs(struct tmio_mmc_host *host, u32 i)
{
- sd_ctrl_write32(host, CTL_STATUS, ~i);
+ sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, ~i);
}
static void tmio_mmc_init_sg(struct tmio_mmc_host *host, struct mmc_data *data)
@@ -154,31 +153,16 @@
}
}
-static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
- unsigned int new_clock)
+static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
{
- u32 clk = 0, clock;
+ sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
+ sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
+ msleep(host->pdata->flags & TMIO_MMC_MIN_RCAR2 ? 1 : 10);
- if (new_clock) {
- for (clock = host->mmc->f_min, clk = 0x80000080;
- new_clock >= (clock << 1);
- clk >>= 1)
- clock <<= 1;
-
- /* 1/1 clock is option */
- if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
- ((clk >> 22) & 0x1))
- clk |= 0xff;
- }
-
- if (host->set_clk_div)
- host->set_clk_div(host->pdev, (clk >> 22) & 1);
-
- sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
- sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
- sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & CLK_CTL_DIV_MASK);
- if (!(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG))
+ if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
+ sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
msleep(10);
+ }
}
static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
@@ -190,19 +174,41 @@
sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
- msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 5 : 10);
+ msleep(host->pdata->flags & TMIO_MMC_MIN_RCAR2 ? 5 : 10);
}
-static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
+static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
+ unsigned int new_clock)
{
- sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
- sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
- msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 1 : 10);
+ u32 clk = 0, clock;
- if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
- sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
- msleep(10);
+ if (new_clock == 0) {
+ tmio_mmc_clk_stop(host);
+ return;
}
+
+ if (host->clk_update)
+ clock = host->clk_update(host, new_clock) / 512;
+ else
+ clock = host->mmc->f_min;
+
+ for (clk = 0x80000080; new_clock >= (clock << 1); clk >>= 1)
+ clock <<= 1;
+
+ /* 1/1 clock is option */
+ if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) && ((clk >> 22) & 0x1))
+ clk |= 0xff;
+
+ if (host->set_clk_div)
+ host->set_clk_div(host->pdev, (clk >> 22) & 1);
+
+ sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
+ sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
+ sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & CLK_CTL_DIV_MASK);
+ if (!(host->pdata->flags & TMIO_MMC_MIN_RCAR2))
+ msleep(10);
+
+ tmio_mmc_clk_start(host);
}
static void tmio_mmc_reset(struct tmio_mmc_host *host)
@@ -264,9 +270,6 @@
tmio_mmc_abort_dma(host);
mmc_request_done(host->mmc, mrq);
-
- pm_runtime_mark_last_busy(mmc_dev(host->mmc));
- pm_runtime_put_autosuspend(mmc_dev(host->mmc));
}
/* called with host->lock held, interrupts disabled */
@@ -296,9 +299,6 @@
tmio_mmc_abort_dma(host);
mmc_request_done(host->mmc, mrq);
-
- pm_runtime_mark_last_busy(mmc_dev(host->mmc));
- pm_runtime_put_autosuspend(mmc_dev(host->mmc));
}
static void tmio_mmc_done_work(struct work_struct *work)
@@ -375,7 +375,7 @@
tmio_mmc_enable_mmc_irqs(host, irq_mask);
/* Fire off the command */
- sd_ctrl_write32(host, CTL_ARG_REG, cmd->arg);
+ sd_ctrl_write32_as_16_and_16(host, CTL_ARG_REG, cmd->arg);
sd_ctrl_write16(host, CTL_SD_CMD, c);
return 0;
@@ -530,7 +530,7 @@
goto out;
if (host->chan_tx && (data->flags & MMC_DATA_WRITE) && !host->force_pio) {
- u32 status = sd_ctrl_read32(host, CTL_STATUS);
+ u32 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
bool done = false;
/*
@@ -542,7 +542,7 @@
* waiting for one more interrupt fixes the problem.
*/
if (host->pdata->flags & TMIO_MMC_HAS_IDLE_WAIT) {
- if (status & TMIO_STAT_ILL_FUNC)
+ if (status & TMIO_STAT_SCLKDIVEN)
done = true;
} else {
if (!(status & TMIO_STAT_CMD_BUSY))
@@ -585,7 +585,7 @@
*/
for (i = 3, addr = CTL_RESPONSE ; i >= 0 ; i--, addr += 4)
- cmd->resp[i] = sd_ctrl_read32(host, addr);
+ cmd->resp[i] = sd_ctrl_read16_and_16_as_32(host, addr);
if (cmd->flags & MMC_RSP_136) {
cmd->resp[0] = (cmd->resp[0] << 8) | (cmd->resp[1] >> 24);
@@ -625,19 +625,6 @@
spin_unlock(&host->lock);
}
-static void tmio_mmc_card_irq_status(struct tmio_mmc_host *host,
- int *ireg, int *status)
-{
- *status = sd_ctrl_read32(host, CTL_STATUS);
- *ireg = *status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
-
- pr_debug_status(*status);
- pr_debug_status(*ireg);
-
- /* Clear the status except the interrupt status */
- sd_ctrl_write32(host, CTL_STATUS, TMIO_MASK_IRQ);
-}
-
static bool __tmio_mmc_card_detect_irq(struct tmio_mmc_host *host,
int ireg, int status)
{
@@ -657,18 +644,6 @@
return false;
}
-irqreturn_t tmio_mmc_card_detect_irq(int irq, void *devid)
-{
- unsigned int ireg, status;
- struct tmio_mmc_host *host = devid;
-
- tmio_mmc_card_irq_status(host, &ireg, &status);
- __tmio_mmc_card_detect_irq(host, ireg, status);
-
- return IRQ_HANDLED;
-}
-EXPORT_SYMBOL(tmio_mmc_card_detect_irq);
-
static bool __tmio_mmc_sdcard_irq(struct tmio_mmc_host *host,
int ireg, int status)
{
@@ -698,19 +673,7 @@
return false;
}
-irqreturn_t tmio_mmc_sdcard_irq(int irq, void *devid)
-{
- unsigned int ireg, status;
- struct tmio_mmc_host *host = devid;
-
- tmio_mmc_card_irq_status(host, &ireg, &status);
- __tmio_mmc_sdcard_irq(host, ireg, status);
-
- return IRQ_HANDLED;
-}
-EXPORT_SYMBOL(tmio_mmc_sdcard_irq);
-
-irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid)
+static void tmio_mmc_sdio_irq(int irq, void *devid)
{
struct tmio_mmc_host *host = devid;
struct mmc_host *mmc = host->mmc;
@@ -719,7 +682,7 @@
unsigned int sdio_status;
if (!(pdata->flags & TMIO_MMC_SDIO_IRQ))
- return IRQ_HANDLED;
+ return;
status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask;
@@ -732,19 +695,22 @@
if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ)
mmc_signal_sdio_irq(mmc);
-
- return IRQ_HANDLED;
}
-EXPORT_SYMBOL(tmio_mmc_sdio_irq);
irqreturn_t tmio_mmc_irq(int irq, void *devid)
{
struct tmio_mmc_host *host = devid;
unsigned int ireg, status;
- pr_debug("MMC IRQ begin\n");
+ status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
+ ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
- tmio_mmc_card_irq_status(host, &ireg, &status);
+ pr_debug_status(status);
+ pr_debug_status(ireg);
+
+ /* Clear the status except the interrupt status */
+ sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
+
if (__tmio_mmc_card_detect_irq(host, ireg, status))
return IRQ_HANDLED;
if (__tmio_mmc_sdcard_irq(host, ireg, status))
@@ -812,8 +778,6 @@
spin_unlock_irqrestore(&host->lock, flags);
- pm_runtime_get_sync(mmc_dev(mmc));
-
if (mrq->data) {
ret = tmio_mmc_start_data(host, mrq->data);
if (ret)
@@ -832,24 +796,14 @@
host->mrq = NULL;
mrq->cmd->error = ret;
mmc_request_done(mmc, mrq);
-
- pm_runtime_mark_last_busy(mmc_dev(mmc));
- pm_runtime_put_autosuspend(mmc_dev(mmc));
}
-static int tmio_mmc_clk_update(struct tmio_mmc_host *host)
+static int tmio_mmc_clk_enable(struct tmio_mmc_host *host)
{
- struct mmc_host *mmc = host->mmc;
- int ret;
-
if (!host->clk_enable)
return -ENOTSUPP;
- ret = host->clk_enable(host->pdev, &mmc->f_max);
- if (!ret)
- mmc->f_min = mmc->f_max / 512;
-
- return ret;
+ return host->clk_enable(host);
}
static void tmio_mmc_power_on(struct tmio_mmc_host *host, unsigned short vdd)
@@ -925,8 +879,6 @@
struct device *dev = &host->pdev->dev;
unsigned long flags;
- pm_runtime_get_sync(mmc_dev(mmc));
-
mutex_lock(&host->ios_lock);
spin_lock_irqsave(&host->lock, flags);
@@ -959,14 +911,12 @@
tmio_mmc_clk_stop(host);
break;
case MMC_POWER_UP:
- tmio_mmc_set_clock(host, ios->clock);
tmio_mmc_power_on(host, ios->vdd);
- tmio_mmc_clk_start(host);
+ tmio_mmc_set_clock(host, ios->clock);
tmio_mmc_set_bus_width(host, ios->bus_width);
break;
case MMC_POWER_ON:
tmio_mmc_set_clock(host, ios->clock);
- tmio_mmc_clk_start(host);
tmio_mmc_set_bus_width(host, ios->bus_width);
break;
}
@@ -983,9 +933,6 @@
host->clk_cache = ios->clock;
mutex_unlock(&host->ios_lock);
-
- pm_runtime_mark_last_busy(mmc_dev(mmc));
- pm_runtime_put_autosuspend(mmc_dev(mmc));
}
static int tmio_mmc_get_ro(struct mmc_host *mmc)
@@ -996,11 +943,8 @@
if (ret >= 0)
return ret;
- pm_runtime_get_sync(mmc_dev(mmc));
ret = !((pdata->flags & TMIO_MMC_WRPROTECT_DISABLE) ||
- (sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
- pm_runtime_mark_last_busy(mmc_dev(mmc));
- pm_runtime_put_autosuspend(mmc_dev(mmc));
+ (sd_ctrl_read16_and_16_as_32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
return ret;
}
@@ -1016,12 +960,20 @@
return blk_size;
}
-static const struct mmc_host_ops tmio_mmc_ops = {
+static int tmio_mmc_card_busy(struct mmc_host *mmc)
+{
+ struct tmio_mmc_host *host = mmc_priv(mmc);
+
+ return !(sd_ctrl_read16_and_16_as_32(host, CTL_STATUS) & TMIO_STAT_DAT0);
+}
+
+static struct mmc_host_ops tmio_mmc_ops = {
.request = tmio_mmc_request,
.set_ios = tmio_mmc_set_ios,
.get_ro = tmio_mmc_get_ro,
.get_cd = mmc_gpio_get_cd,
.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
+ .card_busy = tmio_mmc_card_busy,
.multi_io_quirk = tmio_multi_io_quirk,
};
@@ -1120,7 +1072,9 @@
goto host_free;
}
+ tmio_mmc_ops.start_signal_voltage_switch = _host->start_signal_voltage_switch;
mmc->ops = &tmio_mmc_ops;
+
mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities;
mmc->caps2 |= pdata->capabilities2;
mmc->max_segs = 32;
@@ -1135,7 +1089,7 @@
mmc->caps & MMC_CAP_NONREMOVABLE ||
mmc->slot.cd_irq >= 0);
- if (tmio_mmc_clk_update(_host) < 0) {
+ if (tmio_mmc_clk_enable(_host) < 0) {
mmc->f_max = pdata->hclk;
mmc->f_min = mmc->f_max / 512;
}
@@ -1159,7 +1113,7 @@
tmio_mmc_clk_stop(_host);
tmio_mmc_reset(_host);
- _host->sdcard_irq_mask = sd_ctrl_read32(_host, CTL_IRQ_MASK);
+ _host->sdcard_irq_mask = sd_ctrl_read16_and_16_as_32(_host, CTL_IRQ_MASK);
tmio_mmc_disable_mmc_irqs(_host, TMIO_MASK_ALL);
/* Unmask the IRQs we want to know about */
@@ -1251,7 +1205,7 @@
tmio_mmc_clk_stop(host);
if (host->clk_disable)
- host->clk_disable(host->pdev);
+ host->clk_disable(host);
return 0;
}
@@ -1263,12 +1217,10 @@
struct tmio_mmc_host *host = mmc_priv(mmc);
tmio_mmc_reset(host);
- tmio_mmc_clk_update(host);
+ tmio_mmc_clk_enable(host);
- if (host->clk_cache) {
+ if (host->clk_cache)
tmio_mmc_set_clock(host, host->clk_cache);
- tmio_mmc_clk_start(host);
- }
tmio_mmc_enable_dma(host, true);
diff --git a/drivers/mmc/host/toshsd.c b/drivers/mmc/host/toshsd.c
index e2cdd5f..553ef41 100644
--- a/drivers/mmc/host/toshsd.c
+++ b/drivers/mmc/host/toshsd.c
@@ -21,6 +21,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/pm.h>
+#include <linux/pm_runtime.h>
#include <linux/mmc/host.h>
#include <linux/mmc/mmc.h>
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 807c06e..1bd5f1a 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -22,6 +22,7 @@
#include <linux/mmc/sdio.h>
#include <linux/module.h>
#include <linux/pagemap.h>
+#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
#include <linux/scatterlist.h>
#include <linux/string.h>
@@ -198,6 +199,11 @@
struct dma_chan *chan_rx;
struct dma_chan *chan_tx;
bool dma_active;
+
+ /* Pin control */
+ struct pinctrl *pinctrl;
+ struct pinctrl_state *pins_default;
+ struct pinctrl_state *pins_uhs;
};
/* I/O primitives */
@@ -1147,12 +1153,45 @@
}
}
+static int usdhi6_set_pinstates(struct usdhi6_host *host, int voltage)
+{
+ if (IS_ERR(host->pins_uhs))
+ return 0;
+
+ switch (voltage) {
+ case MMC_SIGNAL_VOLTAGE_180:
+ case MMC_SIGNAL_VOLTAGE_120:
+ return pinctrl_select_state(host->pinctrl,
+ host->pins_uhs);
+
+ default:
+ return pinctrl_select_state(host->pinctrl,
+ host->pins_default);
+ }
+}
+
+static int usdhi6_sig_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ int ret;
+
+ ret = mmc_regulator_set_vqmmc(mmc, ios);
+ if (ret < 0)
+ return ret;
+
+ ret = usdhi6_set_pinstates(mmc_priv(mmc), ios->signal_voltage);
+ if (ret)
+ dev_warn_once(mmc_dev(mmc),
+ "Failed to set pinstate err=%d\n", ret);
+ return ret;
+}
+
static struct mmc_host_ops usdhi6_ops = {
.request = usdhi6_request,
.set_ios = usdhi6_set_ios,
.get_cd = usdhi6_get_cd,
.get_ro = usdhi6_get_ro,
.enable_sdio_irq = usdhi6_enable_sdio_irq,
+ .start_signal_voltage_switch = usdhi6_sig_volt_switch,
};
/* State machine handlers */
@@ -1730,6 +1769,25 @@
host->wait = USDHI6_WAIT_FOR_REQUEST;
host->timeout = msecs_to_jiffies(4000);
+ host->pinctrl = devm_pinctrl_get(&pdev->dev);
+ if (IS_ERR(host->pinctrl)) {
+ ret = PTR_ERR(host->pinctrl);
+ goto e_free_mmc;
+ }
+
+ host->pins_uhs = pinctrl_lookup_state(host->pinctrl, "state_uhs");
+ if (!IS_ERR(host->pins_uhs)) {
+ host->pins_default = pinctrl_lookup_state(host->pinctrl,
+ PINCTRL_STATE_DEFAULT);
+
+ if (IS_ERR(host->pins_default)) {
+ dev_err(dev,
+ "UHS pinctrl requires a default pin state.\n");
+ ret = PTR_ERR(host->pins_default);
+ goto e_free_mmc;
+ }
+ }
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
host->base = devm_ioremap_resource(dev, res);
if (IS_ERR(host->base)) {
@@ -1785,7 +1843,7 @@
mmc->ops = &usdhi6_ops;
mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
- MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_DDR50 | MMC_CAP_SDIO_IRQ;
+ MMC_CAP_SDIO_IRQ;
/* Set .max_segs to some random number. Feel free to adjust. */
mmc->max_segs = 32;
mmc->max_blk_size = 512;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 3096251..bee180bd 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -40,6 +40,7 @@
#include <linux/slab.h>
#include <linux/reboot.h>
#include <linux/kconfig.h>
+#include <linux/leds.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
@@ -862,6 +863,7 @@
mtd_erase_callback(instr);
return 0;
}
+ ledtrig_mtd_activity();
return mtd->_erase(mtd, instr);
}
EXPORT_SYMBOL_GPL(mtd_erase);
@@ -925,6 +927,7 @@
if (!len)
return 0;
+ ledtrig_mtd_activity();
/*
* In the absence of an error, drivers return a non-negative integer
* representing the maximum number of bitflips that were corrected on
@@ -949,6 +952,7 @@
return -EROFS;
if (!len)
return 0;
+ ledtrig_mtd_activity();
return mtd->_write(mtd, to, len, retlen, buf);
}
EXPORT_SYMBOL_GPL(mtd_write);
@@ -982,6 +986,8 @@
ops->retlen = ops->oobretlen = 0;
if (!mtd->_read_oob)
return -EOPNOTSUPP;
+
+ ledtrig_mtd_activity();
/*
* In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics
* similar to mtd->_read(), returning a non-negative integer
@@ -997,6 +1003,19 @@
}
EXPORT_SYMBOL_GPL(mtd_read_oob);
+int mtd_write_oob(struct mtd_info *mtd, loff_t to,
+ struct mtd_oob_ops *ops)
+{
+ ops->retlen = ops->oobretlen = 0;
+ if (!mtd->_write_oob)
+ return -EOPNOTSUPP;
+ if (!(mtd->flags & MTD_WRITEABLE))
+ return -EROFS;
+ ledtrig_mtd_activity();
+ return mtd->_write_oob(mtd, to, ops);
+}
+EXPORT_SYMBOL_GPL(mtd_write_oob);
+
/*
* Method to access the protection register area, present in some flash
* devices. The user data is one time programmable but the factory data is read
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 557b846..ba4f603 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -43,7 +43,6 @@
#include <linux/mtd/nand_bch.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
-#include <linux/leds.h>
#include <linux/io.h>
#include <linux/mtd/partitions.h>
#include <linux/of_mtd.h>
@@ -97,12 +96,6 @@
static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
struct mtd_oob_ops *ops);
-/*
- * For devices which display every fart in the system on a separate LED. Is
- * compiled away when LED support is disabled.
- */
-DEFINE_LED_TRIGGER(nand_led_trigger);
-
static int check_offs_len(struct mtd_info *mtd,
loff_t ofs, uint64_t len)
{
@@ -540,19 +533,16 @@
if (in_interrupt() || oops_in_progress)
return panic_nand_wait_ready(mtd, timeo);
- led_trigger_event(nand_led_trigger, LED_FULL);
/* Wait until command is processed or timeout occurs */
timeo = jiffies + msecs_to_jiffies(timeo);
do {
if (chip->dev_ready(mtd))
- goto out;
+ return;
cond_resched();
} while (time_before(jiffies, timeo));
if (!chip->dev_ready(mtd))
pr_warn_ratelimited("timeout while waiting for chip to become ready\n");
-out:
- led_trigger_event(nand_led_trigger, LED_OFF);
}
EXPORT_SYMBOL_GPL(nand_wait_ready);
@@ -885,8 +875,6 @@
int status;
unsigned long timeo = 400;
- led_trigger_event(nand_led_trigger, LED_FULL);
-
/*
* Apply this short delay always to ensure that we do wait tWB in any
* case on any machine.
@@ -910,7 +898,6 @@
cond_resched();
} while (time_before(jiffies, timeo));
}
- led_trigger_event(nand_led_trigger, LED_OFF);
status = (int)chip->read_byte(mtd);
/* This can happen if in case of timeout or buggy dev_ready */
@@ -4466,20 +4453,6 @@
}
EXPORT_SYMBOL_GPL(nand_release);
-static int __init nand_base_init(void)
-{
- led_trigger_register_simple("nand-disk", &nand_led_trigger);
- return 0;
-}
-
-static void __exit nand_base_exit(void)
-{
- led_trigger_unregister_simple(nand_led_trigger);
-}
-
-module_init(nand_base_init);
-module_exit(nand_base_exit);
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Steven J. Hill <sjhill@realitydiluted.com>");
MODULE_AUTHOR("Thomas Gleixner <tglx@linutronix.de>");
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 72c9f1f..7c78307 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -635,10 +635,10 @@
#ifdef __i386__
#include <asm/msr.h>
-#define GETTICK(x) \
-({ \
- if (cpu_has_tsc) \
- x = (unsigned int)rdtsc(); \
+#define GETTICK(x) \
+({ \
+ if (boot_cpu_has(X86_FEATURE_TSC)) \
+ x = (unsigned int)rdtsc(); \
})
#else /* __i386__ */
#define GETTICK(x)
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index e2a4841..b3bec3a 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -112,4 +112,7 @@
While this option is selected automatically when needed, you can
enable it manually to improve device tree unit test coverage.
+config OF_NUMA
+ bool
+
endif # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 156c072..bee3fa9 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -14,5 +14,6 @@
obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
obj-$(CONFIG_OF_RESOLVE) += resolver.o
obj-$(CONFIG_OF_OVERLAY) += overlay.o
+obj-$(CONFIG_OF_NUMA) += of_numa.o
obj-$(CONFIG_OF_UNITTEST) += unittest-data/
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
new file mode 100644
index 0000000..0f2784b
--- /dev/null
+++ b/drivers/of/of_numa.c
@@ -0,0 +1,211 @@
+/*
+ * OF NUMA Parsing support.
+ *
+ * Copyright (C) 2015 - 2016 Cavium Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/nodemask.h>
+
+#include <asm/numa.h>
+
+/* define default numa node to 0 */
+#define DEFAULT_NODE 0
+
+/*
+ * Even though we connect cpus to numa domains later in SMP
+ * init, we need to know the node ids now for all cpus.
+*/
+static void __init of_numa_parse_cpu_nodes(void)
+{
+ u32 nid;
+ int r;
+ struct device_node *cpus;
+ struct device_node *np = NULL;
+
+ cpus = of_find_node_by_path("/cpus");
+ if (!cpus)
+ return;
+
+ for_each_child_of_node(cpus, np) {
+ /* Skip things that are not CPUs */
+ if (of_node_cmp(np->type, "cpu") != 0)
+ continue;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r)
+ continue;
+
+ pr_debug("NUMA: CPU on %u\n", nid);
+ if (nid >= MAX_NUMNODES)
+ pr_warn("NUMA: Node id %u exceeds maximum value\n",
+ nid);
+ else
+ node_set(nid, numa_nodes_parsed);
+ }
+}
+
+static int __init of_numa_parse_memory_nodes(void)
+{
+ struct device_node *np = NULL;
+ struct resource rsrc;
+ u32 nid;
+ int r = 0;
+
+ for (;;) {
+ np = of_find_node_by_type(np, "memory");
+ if (!np)
+ break;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ if (r == -EINVAL)
+ /*
+ * property doesn't exist if -EINVAL, continue
+ * looking for more memory nodes with
+ * "numa-node-id" property
+ */
+ continue;
+ else if (r)
+ /* some other error */
+ break;
+
+ r = of_address_to_resource(np, 0, &rsrc);
+ if (r) {
+ pr_err("NUMA: bad reg property in memory node\n");
+ break;
+ }
+
+ pr_debug("NUMA: base = %llx len = %llx, node = %u\n",
+ rsrc.start, rsrc.end - rsrc.start + 1, nid);
+
+ r = numa_add_memblk(nid, rsrc.start,
+ rsrc.end - rsrc.start + 1);
+ if (r)
+ break;
+ }
+ of_node_put(np);
+
+ return r;
+}
+
+static int __init of_numa_parse_distance_map_v1(struct device_node *map)
+{
+ const __be32 *matrix;
+ int entry_count;
+ int i;
+
+ pr_info("NUMA: parsing numa-distance-map-v1\n");
+
+ matrix = of_get_property(map, "distance-matrix", NULL);
+ if (!matrix) {
+ pr_err("NUMA: No distance-matrix property in distance-map\n");
+ return -EINVAL;
+ }
+
+ entry_count = of_property_count_u32_elems(map, "distance-matrix");
+ if (entry_count <= 0) {
+ pr_err("NUMA: Invalid distance-matrix\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i + 2 < entry_count; i += 3) {
+ u32 nodea, nodeb, distance;
+
+ nodea = of_read_number(matrix, 1);
+ matrix++;
+ nodeb = of_read_number(matrix, 1);
+ matrix++;
+ distance = of_read_number(matrix, 1);
+ matrix++;
+
+ numa_set_distance(nodea, nodeb, distance);
+ pr_debug("NUMA: distance[node%d -> node%d] = %d\n",
+ nodea, nodeb, distance);
+
+ /* Set default distance of node B->A same as A->B */
+ if (nodeb > nodea)
+ numa_set_distance(nodeb, nodea, distance);
+ }
+
+ return 0;
+}
+
+static int __init of_numa_parse_distance_map(void)
+{
+ int ret = 0;
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL,
+ "numa-distance-map-v1");
+ if (np)
+ ret = of_numa_parse_distance_map_v1(np);
+
+ of_node_put(np);
+ return ret;
+}
+
+int of_node_to_nid(struct device_node *device)
+{
+ struct device_node *np;
+ u32 nid;
+ int r = -ENODATA;
+
+ np = of_node_get(device);
+
+ while (np) {
+ struct device_node *parent;
+
+ r = of_property_read_u32(np, "numa-node-id", &nid);
+ /*
+ * -EINVAL indicates the property was not found, and
+ * we walk up the tree trying to find a parent with a
+ * "numa-node-id". Any other type of error indicates
+ * a bad device tree and we give up.
+ */
+ if (r != -EINVAL)
+ break;
+
+ parent = of_get_parent(np);
+ of_node_put(np);
+ np = parent;
+ }
+ if (np && r)
+ pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n",
+ np->name);
+ of_node_put(np);
+
+ if (!r) {
+ if (nid >= MAX_NUMNODES)
+ pr_warn("NUMA: Node id %u exceeds maximum value\n",
+ nid);
+ else
+ return nid;
+ }
+
+ return NUMA_NO_NODE;
+}
+EXPORT_SYMBOL(of_node_to_nid);
+
+int __init of_numa_init(void)
+{
+ int r;
+
+ of_numa_parse_cpu_nodes();
+ r = of_numa_parse_memory_nodes();
+ if (r)
+ return r;
+ return of_numa_parse_distance_map();
+}
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index f700908..f2d01d4 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -847,6 +847,14 @@
if (!platform_get_irq(cpu_pmu->plat_device, 0))
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ /*
+ * This is a CPU PMU potentially in a heterogeneous configuration (e.g.
+ * big.LITTLE). This is not an uncore PMU, and we have taken ctx
+ * sharing into account (e.g. with our pmu::filter_match callback and
+ * pmu::event_init group validation).
+ */
+ cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
+
return 0;
out_unregister:
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index 1062fa4..79d64ea 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -793,15 +793,6 @@
return AE_OK;
}
-static int AMW0_set_cap_acpi_check_device_found __initdata;
-
-static acpi_status __init AMW0_set_cap_acpi_check_device_cb(acpi_handle handle,
- u32 level, void *context, void **retval)
-{
- AMW0_set_cap_acpi_check_device_found = 1;
- return AE_OK;
-}
-
static const struct acpi_device_id norfkill_ids[] __initconst = {
{ "VPC2004", 0},
{ "IBM0068", 0},
@@ -816,9 +807,10 @@
const struct acpi_device_id *id;
for (id = norfkill_ids; id->id[0]; id++)
- acpi_get_devices(id->id, AMW0_set_cap_acpi_check_device_cb,
- NULL, NULL);
- return AMW0_set_cap_acpi_check_device_found;
+ if (acpi_dev_found(id->id))
+ return true;
+
+ return false;
}
static acpi_status __init AMW0_set_capabilities(void)
diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c
index 14fd2ec..17b365f 100644
--- a/drivers/platform/x86/eeepc-wmi.c
+++ b/drivers/platform/x86/eeepc-wmi.c
@@ -204,30 +204,10 @@
}
}
-static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level,
- void *context, void **retval)
-{
- pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID);
- *(bool *)context = true;
- return AE_CTRL_TERMINATE;
-}
-
-static int eeepc_wmi_check_atkd(void)
-{
- acpi_status status;
- bool found = false;
-
- status = acpi_get_devices(EEEPC_ACPI_HID, eeepc_wmi_parse_device,
- &found, NULL);
-
- if (ACPI_FAILURE(status) || !found)
- return 0;
- return -1;
-}
-
static int eeepc_wmi_probe(struct platform_device *pdev)
{
- if (eeepc_wmi_check_atkd()) {
+ if (acpi_dev_found(EEEPC_ACPI_HID)) {
+ pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID);
pr_warn("WMI device present, but legacy ATKD device is also "
"present and enabled\n");
pr_warn("You probably booted with acpi_osi=\"Linux\" or "
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index facd43b..81603d9 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -521,10 +521,11 @@
int ret;
if (pnpbios_disabled || dmi_check_system(pnpbios_dmi_table) ||
- paravirt_enabled()) {
+ arch_pnpbios_disabled()) {
printk(KERN_INFO "PnPBIOS: Disabled\n");
return -ENODEV;
}
+
#ifdef CONFIG_PNPACPI
if (!acpi_disabled && !pnpacpi_disabled) {
pnpbios_disabled = 1;
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 8986382..01b6d3f 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -336,6 +336,7 @@
struct device_node *np = pdev->dev.of_node;
const struct of_device_id *match;
struct rockchip_iodomain *iod;
+ struct device *parent;
int i, ret = 0;
if (!np)
@@ -351,7 +352,14 @@
match = of_match_node(rockchip_iodomain_match, np);
iod->soc_data = (struct rockchip_iodomain_soc_data *)match->data;
- iod->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+ parent = pdev->dev.parent;
+ if (parent && parent->of_node) {
+ iod->grf = syscon_node_to_regmap(parent->of_node);
+ } else {
+ dev_dbg(&pdev->dev, "falling back to old binding\n");
+ iod->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+ }
+
if (IS_ERR(iod->grf)) {
dev_err(&pdev->dev, "couldn't find grf regmap\n");
return PTR_ERR(iod->grf);
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 8fad0a7..b2766b8 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -34,6 +34,9 @@
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+/* Local defines */
+#define MSR_PLATFORM_POWER_LIMIT 0x0000065C
+
/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
@@ -86,6 +89,7 @@
RAPL_DOMAIN_PP0, /* core power plane */
RAPL_DOMAIN_PP1, /* graphics uncore */
RAPL_DOMAIN_DRAM,/* DRAM control_type */
+ RAPL_DOMAIN_PLATFORM, /* PSys control_type */
RAPL_DOMAIN_MAX,
};
@@ -251,9 +255,11 @@
"core",
"uncore",
"dram",
+ "psys",
};
static struct powercap_control_type *control_type; /* PowerCap Controller */
+static struct rapl_domain *platform_rapl_domain; /* Platform (PSys) domain */
/* caller to ensure CPU hotplug lock is held */
static struct rapl_package *find_package_by_id(int id)
@@ -409,6 +415,14 @@
.set_enable = set_domain_enable,
.get_enable = get_domain_enable,
},
+ /* RAPL_DOMAIN_PLATFORM */
+ {
+ .get_energy_uj = get_energy_counter,
+ .get_max_energy_range_uj = get_max_energy_counter,
+ .release = release_zone,
+ .set_enable = set_domain_enable,
+ .get_enable = get_domain_enable,
+ },
};
static int set_power_limit(struct powercap_zone *power_zone, int id,
@@ -1101,6 +1115,8 @@
RAPL_CPU(0X5C, rapl_defaults_core),/* Broxton */
RAPL_CPU(0x5E, rapl_defaults_core),/* Skylake-H/S */
RAPL_CPU(0x57, rapl_defaults_hsw_server),/* Knights Landing */
+ RAPL_CPU(0x8E, rapl_defaults_core),/* Kabylake */
+ RAPL_CPU(0x9E, rapl_defaults_core),/* Kabylake */
{}
};
MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
@@ -1160,6 +1176,13 @@
powercap_unregister_zone(control_type,
&rd_package->power_zone);
}
+
+ if (platform_rapl_domain) {
+ powercap_unregister_zone(control_type,
+ &platform_rapl_domain->power_zone);
+ kfree(platform_rapl_domain);
+ }
+
powercap_unregister_control_type(control_type);
return 0;
@@ -1239,6 +1262,47 @@
return ret;
}
+static int rapl_register_psys(void)
+{
+ struct rapl_domain *rd;
+ struct powercap_zone *power_zone;
+ u64 val;
+
+ if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_ENERGY_STATUS, &val) || !val)
+ return -ENODEV;
+
+ if (rdmsrl_safe_on_cpu(0, MSR_PLATFORM_POWER_LIMIT, &val) || !val)
+ return -ENODEV;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ rd->name = rapl_domain_names[RAPL_DOMAIN_PLATFORM];
+ rd->id = RAPL_DOMAIN_PLATFORM;
+ rd->msrs[0] = MSR_PLATFORM_POWER_LIMIT;
+ rd->msrs[1] = MSR_PLATFORM_ENERGY_STATUS;
+ rd->rpl[0].prim_id = PL1_ENABLE;
+ rd->rpl[0].name = pl1_name;
+ rd->rpl[1].prim_id = PL2_ENABLE;
+ rd->rpl[1].name = pl2_name;
+ rd->rp = find_package_by_id(0);
+
+ power_zone = powercap_register_zone(&rd->power_zone, control_type,
+ "psys", NULL,
+ &zone_ops[RAPL_DOMAIN_PLATFORM],
+ 2, &constraint_ops);
+
+ if (IS_ERR(power_zone)) {
+ kfree(rd);
+ return PTR_ERR(power_zone);
+ }
+
+ platform_rapl_domain = rd;
+
+ return 0;
+}
+
static int rapl_register_powercap(void)
{
struct rapl_domain *rd;
@@ -1255,6 +1319,10 @@
list_for_each_entry(rp, &rapl_packages, plist)
if (rapl_package_register_powercap(rp))
goto err_cleanup_package;
+
+ /* Don't bail out if PSys is not supported */
+ rapl_register_psys();
+
return ret;
err_cleanup_package:
@@ -1289,6 +1357,9 @@
case RAPL_DOMAIN_DRAM:
msr = MSR_DRAM_ENERGY_STATUS;
break;
+ case RAPL_DOMAIN_PLATFORM:
+ /* PSYS(PLATFORM) is not a CPU domain, so avoid printng error */
+ return -EINVAL;
default:
pr_err("invalid domain id %d\n", domain);
return -EINVAL;
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 7831bc6..680fbc7 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -128,6 +128,13 @@
set_bit(PWMF_REQUESTED, &pwm->flags);
pwm->label = label;
+ /*
+ * FIXME: This should be removed once all PWM users properly make use
+ * of struct pwm_args to initialize the PWM device. As long as this is
+ * here, the PWM state and hardware state can get out of sync.
+ */
+ pwm_apply_args(pwm);
+
return 0;
}
@@ -146,12 +153,12 @@
if (IS_ERR(pwm))
return pwm;
- pwm_set_period(pwm, args->args[1]);
+ pwm->args.period = args->args[1];
if (args->args[2] & PWM_POLARITY_INVERTED)
- pwm_set_polarity(pwm, PWM_POLARITY_INVERSED);
+ pwm->args.polarity = PWM_POLARITY_INVERSED;
else
- pwm_set_polarity(pwm, PWM_POLARITY_NORMAL);
+ pwm->args.polarity = PWM_POLARITY_NORMAL;
return pwm;
}
@@ -172,7 +179,7 @@
if (IS_ERR(pwm))
return pwm;
- pwm_set_period(pwm, args->args[1]);
+ pwm->args.period = args->args[1];
return pwm;
}
@@ -747,13 +754,13 @@
if (!chip)
goto out;
+ pwm->args.period = chosen->period;
+ pwm->args.polarity = chosen->polarity;
+
pwm = pwm_request_from_chip(chip, chosen->index, con_id ?: dev_id);
if (IS_ERR(pwm))
goto out;
- pwm_set_period(pwm, chosen->period);
- pwm_set_polarity(pwm, chosen->polarity);
-
out:
mutex_unlock(&pwm_lookup_lock);
return pwm;
diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c
index a80c108..7d33542 100644
--- a/drivers/pwm/pwm-clps711x.c
+++ b/drivers/pwm/pwm-clps711x.c
@@ -60,7 +60,7 @@
return -EINVAL;
/* Store constant period value */
- pwm_set_period(pwm, DIV_ROUND_CLOSEST(NSEC_PER_SEC, freq));
+ pwm->args.period = DIV_ROUND_CLOSEST(NSEC_PER_SEC, freq);
return 0;
}
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index cb2f702..58b709f 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -160,7 +160,7 @@
if (IS_ERR(pwm))
return pwm;
- pwm_set_period(pwm, args->args[0]);
+ pwm->args.period = args->args[0];
return pwm;
}
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index c77dc08..144cbf5 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -321,6 +321,15 @@
help
This driver supports LP8720/LP8725 PMIC
+config REGULATOR_LP873X
+ tristate "TI LP873X Power regulators"
+ depends on MFD_LP873X && OF
+ help
+ This driver supports LP873X voltage regulator chips. LP873X
+ provides two step-down converters and two general-purpose LDO
+ voltage regulators. It supports software based voltage control
+ for different voltage domains
+
config REGULATOR_LP8755
tristate "TI LP8755 High Performance PMU driver"
depends on I2C
@@ -409,6 +418,7 @@
config REGULATOR_MAX8973
tristate "Maxim MAX8973 voltage regulator "
depends on I2C
+ depends on THERMAL && THERMAL_OF
select REGMAP_I2C
help
The MAXIM MAX8973 high-efficiency. three phase, DC-DC step-down
@@ -548,6 +558,13 @@
Say y here to support the voltage regulators and convertors
PV88060
+config REGULATOR_PV88080
+ tristate "Powerventure Semiconductor PV88080 regulator"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ Say y here to support the buck convertors on PV88080
+
config REGULATOR_PV88090
tristate "Powerventure Semiconductor PV88090 regulator"
depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 61bfbb9..85a1d44 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -42,11 +42,12 @@
obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o
+obj-$(CONFIG_REGULATOR_LP873X) += lp873x-regulator.o
obj-$(CONFIG_REGULATOR_LP8788) += lp8788-buck.o
obj-$(CONFIG_REGULATOR_LP8788) += lp8788-ldo.o
obj-$(CONFIG_REGULATOR_LP8755) += lp8755.o
obj-$(CONFIG_REGULATOR_LTC3589) += ltc3589.o
-obj-$(CONFIG_REGULATOR_MAX14577) += max14577.o
+obj-$(CONFIG_REGULATOR_MAX14577) += max14577-regulator.o
obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
obj-$(CONFIG_REGULATOR_MAX77620) += max77620-regulator.o
obj-$(CONFIG_REGULATOR_MAX8649) += max8649.o
@@ -55,10 +56,10 @@
obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o
obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o
obj-$(CONFIG_REGULATOR_MAX8973) += max8973-regulator.o
-obj-$(CONFIG_REGULATOR_MAX8997) += max8997.o
+obj-$(CONFIG_REGULATOR_MAX8997) += max8997-regulator.o
obj-$(CONFIG_REGULATOR_MAX8998) += max8998.o
obj-$(CONFIG_REGULATOR_MAX77686) += max77686-regulator.o
-obj-$(CONFIG_REGULATOR_MAX77693) += max77693.o
+obj-$(CONFIG_REGULATOR_MAX77693) += max77693-regulator.o
obj-$(CONFIG_REGULATOR_MAX77802) += max77802-regulator.o
obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o
obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o
@@ -71,6 +72,7 @@
obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o
obj-$(CONFIG_REGULATOR_PFUZE100) += pfuze100-regulator.o
obj-$(CONFIG_REGULATOR_PV88060) += pv88060-regulator.o
+obj-$(CONFIG_REGULATOR_PV88080) += pv88080-regulator.o
obj-$(CONFIG_REGULATOR_PV88090) += pv88090-regulator.o
obj-$(CONFIG_REGULATOR_PWM) += pwm-regulator.o
obj-$(CONFIG_REGULATOR_TPS51632) += tps51632-regulator.o
diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 000d566..a1cd0d4 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -139,6 +139,74 @@
int off_mask;
};
+static const struct regmap_range act8600_reg_ranges[] = {
+ regmap_reg_range(0x00, 0x01),
+ regmap_reg_range(0x10, 0x10),
+ regmap_reg_range(0x12, 0x12),
+ regmap_reg_range(0x20, 0x20),
+ regmap_reg_range(0x22, 0x22),
+ regmap_reg_range(0x30, 0x30),
+ regmap_reg_range(0x32, 0x32),
+ regmap_reg_range(0x40, 0x41),
+ regmap_reg_range(0x50, 0x51),
+ regmap_reg_range(0x60, 0x61),
+ regmap_reg_range(0x70, 0x71),
+ regmap_reg_range(0x80, 0x81),
+ regmap_reg_range(0x91, 0x91),
+ regmap_reg_range(0xA1, 0xA1),
+ regmap_reg_range(0xA8, 0xAA),
+ regmap_reg_range(0xB0, 0xB0),
+ regmap_reg_range(0xB2, 0xB2),
+ regmap_reg_range(0xC1, 0xC1),
+};
+
+static const struct regmap_range act8600_reg_ro_ranges[] = {
+ regmap_reg_range(0xAA, 0xAA),
+ regmap_reg_range(0xC1, 0xC1),
+};
+
+static const struct regmap_range act8600_reg_volatile_ranges[] = {
+ regmap_reg_range(0x00, 0x01),
+ regmap_reg_range(0x12, 0x12),
+ regmap_reg_range(0x22, 0x22),
+ regmap_reg_range(0x32, 0x32),
+ regmap_reg_range(0x41, 0x41),
+ regmap_reg_range(0x51, 0x51),
+ regmap_reg_range(0x61, 0x61),
+ regmap_reg_range(0x71, 0x71),
+ regmap_reg_range(0x81, 0x81),
+ regmap_reg_range(0xA8, 0xA8),
+ regmap_reg_range(0xAA, 0xAA),
+ regmap_reg_range(0xB0, 0xB0),
+ regmap_reg_range(0xC1, 0xC1),
+};
+
+static const struct regmap_access_table act8600_write_ranges_table = {
+ .yes_ranges = act8600_reg_ranges,
+ .n_yes_ranges = ARRAY_SIZE(act8600_reg_ranges),
+ .no_ranges = act8600_reg_ro_ranges,
+ .n_no_ranges = ARRAY_SIZE(act8600_reg_ro_ranges),
+};
+
+static const struct regmap_access_table act8600_read_ranges_table = {
+ .yes_ranges = act8600_reg_ranges,
+ .n_yes_ranges = ARRAY_SIZE(act8600_reg_ranges),
+};
+
+static const struct regmap_access_table act8600_volatile_ranges_table = {
+ .yes_ranges = act8600_reg_volatile_ranges,
+ .n_yes_ranges = ARRAY_SIZE(act8600_reg_volatile_ranges),
+};
+
+static const struct regmap_config act8600_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0xFF,
+ .wr_table = &act8600_write_ranges_table,
+ .rd_table = &act8600_read_ranges_table,
+ .volatile_table = &act8600_volatile_ranges_table,
+};
+
static const struct regmap_config act8865_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
@@ -319,7 +387,6 @@
};
static int act8865_pdata_from_dt(struct device *dev,
- struct device_node **of_node,
struct act8865_platform_data *pdata,
unsigned long type)
{
@@ -370,7 +437,7 @@
regulator->id = i;
regulator->name = matches[i].name;
regulator->init_data = matches[i].init_data;
- of_node[i] = matches[i].of_node;
+ regulator->of_node = matches[i].of_node;
regulator++;
}
@@ -378,7 +445,6 @@
}
#else
static inline int act8865_pdata_from_dt(struct device *dev,
- struct device_node **of_node,
struct act8865_platform_data *pdata,
unsigned long type)
{
@@ -386,8 +452,8 @@
}
#endif
-static struct regulator_init_data
-*act8865_get_init_data(int id, struct act8865_platform_data *pdata)
+static struct act8865_regulator_data *act8865_get_regulator_data(
+ int id, struct act8865_platform_data *pdata)
{
int i;
@@ -396,7 +462,7 @@
for (i = 0; i < pdata->num_regulators; i++) {
if (pdata->regulators[i].id == id)
- return pdata->regulators[i].init_data;
+ return &pdata->regulators[i];
}
return NULL;
@@ -418,9 +484,9 @@
const struct regulator_desc *regulators;
struct act8865_platform_data pdata_of, *pdata;
struct device *dev = &client->dev;
- struct device_node **of_node;
int i, ret, num_regulators;
struct act8865 *act8865;
+ const struct regmap_config *regmap_config;
unsigned long type;
int off_reg, off_mask;
int voltage_select = 0;
@@ -447,12 +513,14 @@
case ACT8600:
regulators = act8600_regulators;
num_regulators = ARRAY_SIZE(act8600_regulators);
+ regmap_config = &act8600_regmap_config;
off_reg = -1;
off_mask = -1;
break;
case ACT8846:
regulators = act8846_regulators;
num_regulators = ARRAY_SIZE(act8846_regulators);
+ regmap_config = &act8865_regmap_config;
off_reg = ACT8846_GLB_OFF_CTRL;
off_mask = ACT8846_OFF_SYSMASK;
break;
@@ -464,6 +532,7 @@
regulators = act8865_regulators;
num_regulators = ARRAY_SIZE(act8865_regulators);
}
+ regmap_config = &act8865_regmap_config;
off_reg = ACT8865_SYS_CTRL;
off_mask = ACT8865_MSTROFF;
break;
@@ -472,34 +541,22 @@
return -EINVAL;
}
- of_node = devm_kzalloc(dev, sizeof(struct device_node *) *
- num_regulators, GFP_KERNEL);
- if (!of_node)
- return -ENOMEM;
-
if (dev->of_node && !pdata) {
- ret = act8865_pdata_from_dt(dev, of_node, &pdata_of, type);
+ ret = act8865_pdata_from_dt(dev, &pdata_of, type);
if (ret < 0)
return ret;
pdata = &pdata_of;
}
- if (pdata->num_regulators > num_regulators) {
- dev_err(dev, "too many regulators: %d\n",
- pdata->num_regulators);
- return -EINVAL;
- }
-
act8865 = devm_kzalloc(dev, sizeof(struct act8865), GFP_KERNEL);
if (!act8865)
return -ENOMEM;
- act8865->regmap = devm_regmap_init_i2c(client, &act8865_regmap_config);
+ act8865->regmap = devm_regmap_init_i2c(client, regmap_config);
if (IS_ERR(act8865->regmap)) {
ret = PTR_ERR(act8865->regmap);
- dev_err(&client->dev, "Failed to allocate register map: %d\n",
- ret);
+ dev_err(dev, "Failed to allocate register map: %d\n", ret);
return ret;
}
@@ -518,15 +575,20 @@
for (i = 0; i < num_regulators; i++) {
const struct regulator_desc *desc = ®ulators[i];
struct regulator_config config = { };
+ struct act8865_regulator_data *rdata;
struct regulator_dev *rdev;
config.dev = dev;
- config.init_data = act8865_get_init_data(desc->id, pdata);
- config.of_node = of_node[i];
config.driver_data = act8865;
config.regmap = act8865->regmap;
- rdev = devm_regulator_register(&client->dev, desc, &config);
+ rdata = act8865_get_regulator_data(desc->id, pdata);
+ if (rdata) {
+ config.init_data = rdata->init_data;
+ config.of_node = rdata->of_node;
+ }
+
+ rdev = devm_regulator_register(dev, desc, &config);
if (IS_ERR(rdev)) {
dev_err(dev, "failed to register %s\n", desc->name);
return PTR_ERR(rdev);
@@ -534,7 +596,6 @@
}
i2c_set_clientdata(client, act8865);
- devm_kfree(dev, of_node);
return 0;
}
diff --git a/drivers/regulator/as3722-regulator.c b/drivers/regulator/as3722-regulator.c
index 8b046ee..66337e1 100644
--- a/drivers/regulator/as3722-regulator.c
+++ b/drivers/regulator/as3722-regulator.c
@@ -372,7 +372,7 @@
AS3722_LDO_ILIMIT_MASK, reg);
}
-static struct regulator_ops as3722_ldo0_ops = {
+static const struct regulator_ops as3722_ldo0_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -383,7 +383,7 @@
.set_current_limit = as3722_ldo_set_current_limit,
};
-static struct regulator_ops as3722_ldo0_extcntrl_ops = {
+static const struct regulator_ops as3722_ldo0_extcntrl_ops = {
.list_voltage = regulator_list_voltage_linear,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
@@ -415,7 +415,7 @@
return 150000;
}
-static struct regulator_ops as3722_ldo3_ops = {
+static const struct regulator_ops as3722_ldo3_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -425,20 +425,45 @@
.get_current_limit = as3722_ldo3_get_current_limit,
};
-static struct regulator_ops as3722_ldo3_extcntrl_ops = {
+static const struct regulator_ops as3722_ldo3_extcntrl_ops = {
.list_voltage = regulator_list_voltage_linear,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.get_current_limit = as3722_ldo3_get_current_limit,
};
+static const struct regulator_ops as3722_ldo6_ops = {
+ .is_enabled = regulator_is_enabled_regmap,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .map_voltage = regulator_map_voltage_linear_range,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear_range,
+ .get_current_limit = as3722_ldo_get_current_limit,
+ .set_current_limit = as3722_ldo_set_current_limit,
+ .get_bypass = regulator_get_bypass_regmap,
+ .set_bypass = regulator_set_bypass_regmap,
+};
+
+static const struct regulator_ops as3722_ldo6_extcntrl_ops = {
+ .map_voltage = regulator_map_voltage_linear_range,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear_range,
+ .get_current_limit = as3722_ldo_get_current_limit,
+ .set_current_limit = as3722_ldo_set_current_limit,
+ .get_bypass = regulator_get_bypass_regmap,
+ .set_bypass = regulator_set_bypass_regmap,
+};
+
static const struct regulator_linear_range as3722_ldo_ranges[] = {
REGULATOR_LINEAR_RANGE(0, 0x00, 0x00, 0),
REGULATOR_LINEAR_RANGE(825000, 0x01, 0x24, 25000),
REGULATOR_LINEAR_RANGE(1725000, 0x40, 0x7F, 25000),
};
-static struct regulator_ops as3722_ldo_ops = {
+static const struct regulator_ops as3722_ldo_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -450,7 +475,7 @@
.set_current_limit = as3722_ldo_set_current_limit,
};
-static struct regulator_ops as3722_ldo_extcntrl_ops = {
+static const struct regulator_ops as3722_ldo_extcntrl_ops = {
.map_voltage = regulator_map_voltage_linear_range,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -616,7 +641,7 @@
REGULATOR_LINEAR_RANGE(2650000, 0x71, 0x7F, 50000),
};
-static struct regulator_ops as3722_sd016_ops = {
+static const struct regulator_ops as3722_sd016_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -630,7 +655,7 @@
.set_mode = as3722_sd_set_mode,
};
-static struct regulator_ops as3722_sd016_extcntrl_ops = {
+static const struct regulator_ops as3722_sd016_extcntrl_ops = {
.list_voltage = regulator_list_voltage_linear,
.map_voltage = regulator_map_voltage_linear,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -641,7 +666,7 @@
.set_mode = as3722_sd_set_mode,
};
-static struct regulator_ops as3722_sd2345_ops = {
+static const struct regulator_ops as3722_sd2345_ops = {
.is_enabled = regulator_is_enabled_regmap,
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
@@ -653,7 +678,7 @@
.set_mode = as3722_sd_set_mode,
};
-static struct regulator_ops as3722_sd2345_extcntrl_ops = {
+static const struct regulator_ops as3722_sd2345_extcntrl_ops = {
.list_voltage = regulator_list_voltage_linear_range,
.map_voltage = regulator_map_voltage_linear_range,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
@@ -760,7 +785,7 @@
struct as3722_regulator_config_data *reg_config;
struct regulator_dev *rdev;
struct regulator_config config = { };
- struct regulator_ops *ops;
+ const struct regulator_ops *ops;
int id;
int ret;
@@ -829,6 +854,24 @@
}
}
break;
+ case AS3722_REGULATOR_ID_LDO6:
+ if (reg_config->ext_control)
+ ops = &as3722_ldo6_extcntrl_ops;
+ else
+ ops = &as3722_ldo6_ops;
+ as3722_regs->desc[id].enable_time = 500;
+ as3722_regs->desc[id].bypass_reg =
+ AS3722_LDO6_VOLTAGE_REG;
+ as3722_regs->desc[id].bypass_mask =
+ AS3722_LDO_VSEL_MASK;
+ as3722_regs->desc[id].bypass_val_on =
+ AS3722_LDO6_VSEL_BYPASS;
+ as3722_regs->desc[id].bypass_val_off =
+ AS3722_LDO6_VSEL_BYPASS;
+ as3722_regs->desc[id].linear_ranges = as3722_ldo_ranges;
+ as3722_regs->desc[id].n_linear_ranges =
+ ARRAY_SIZE(as3722_ldo_ranges);
+ break;
case AS3722_REGULATOR_ID_SD0:
case AS3722_REGULATOR_ID_SD1:
case AS3722_REGULATOR_ID_SD6:
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index e0b7642..ec8184d5 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -132,6 +132,19 @@
return has_full_constraints || of_have_populated_dt();
}
+static bool regulator_ops_is_valid(struct regulator_dev *rdev, int ops)
+{
+ if (!rdev->constraints) {
+ rdev_err(rdev, "no constraints\n");
+ return false;
+ }
+
+ if (rdev->constraints->valid_ops_mask & ops)
+ return true;
+
+ return false;
+}
+
static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
{
if (rdev && rdev->supply)
@@ -198,28 +211,13 @@
return regnode;
}
-static int _regulator_can_change_status(struct regulator_dev *rdev)
-{
- if (!rdev->constraints)
- return 0;
-
- if (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_STATUS)
- return 1;
- else
- return 0;
-}
-
/* Platform voltage constraint check */
static int regulator_check_voltage(struct regulator_dev *rdev,
int *min_uV, int *max_uV)
{
BUG_ON(*min_uV > *max_uV);
- if (!rdev->constraints) {
- rdev_err(rdev, "no constraints\n");
- return -ENODEV;
- }
- if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
rdev_err(rdev, "voltage operation not allowed\n");
return -EPERM;
}
@@ -275,11 +273,7 @@
{
BUG_ON(*min_uA > *max_uA);
- if (!rdev->constraints) {
- rdev_err(rdev, "no constraints\n");
- return -ENODEV;
- }
- if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_CURRENT)) {
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_CURRENT)) {
rdev_err(rdev, "current operation not allowed\n");
return -EPERM;
}
@@ -312,11 +306,7 @@
return -EINVAL;
}
- if (!rdev->constraints) {
- rdev_err(rdev, "no constraints\n");
- return -ENODEV;
- }
- if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_MODE)) {
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_MODE)) {
rdev_err(rdev, "mode operation not allowed\n");
return -EPERM;
}
@@ -333,20 +323,6 @@
return -EINVAL;
}
-/* dynamic regulator mode switching constraint check */
-static int regulator_check_drms(struct regulator_dev *rdev)
-{
- if (!rdev->constraints) {
- rdev_err(rdev, "no constraints\n");
- return -ENODEV;
- }
- if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS)) {
- rdev_dbg(rdev, "drms operation not allowed\n");
- return -EPERM;
- }
- return 0;
-}
-
static ssize_t regulator_uV_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -692,8 +668,7 @@
* first check to see if we can set modes at all, otherwise just
* tell the consumer everything is OK.
*/
- err = regulator_check_drms(rdev);
- if (err < 0)
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS))
return 0;
if (!rdev->desc->ops->get_optimum_mode &&
@@ -808,8 +783,6 @@
/* locks held by caller */
static int suspend_prepare(struct regulator_dev *rdev, suspend_state_t state)
{
- lockdep_assert_held_once(&rdev->mutex);
-
if (!rdev->constraints)
return -EINVAL;
@@ -893,7 +866,7 @@
rdev_dbg(rdev, "%s\n", buf);
if ((constraints->min_uV != constraints->max_uV) &&
- !(constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE))
+ !regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE))
rdev_warn(rdev,
"Voltage range but no REGULATOR_CHANGE_VOLTAGE\n");
}
@@ -906,7 +879,8 @@
/* do we need to apply the constraint voltage */
if (rdev->constraints->apply_uV &&
- rdev->constraints->min_uV == rdev->constraints->max_uV) {
+ rdev->constraints->min_uV && rdev->constraints->max_uV) {
+ int target_min, target_max;
int current_uV = _regulator_get_voltage(rdev);
if (current_uV < 0) {
rdev_err(rdev,
@@ -914,15 +888,34 @@
current_uV);
return current_uV;
}
- if (current_uV < rdev->constraints->min_uV ||
- current_uV > rdev->constraints->max_uV) {
+
+ /*
+ * If we're below the minimum voltage move up to the
+ * minimum voltage, if we're above the maximum voltage
+ * then move down to the maximum.
+ */
+ target_min = current_uV;
+ target_max = current_uV;
+
+ if (current_uV < rdev->constraints->min_uV) {
+ target_min = rdev->constraints->min_uV;
+ target_max = rdev->constraints->min_uV;
+ }
+
+ if (current_uV > rdev->constraints->max_uV) {
+ target_min = rdev->constraints->max_uV;
+ target_max = rdev->constraints->max_uV;
+ }
+
+ if (target_min != current_uV || target_max != current_uV) {
+ rdev_info(rdev, "Bringing %duV into %d-%duV\n",
+ current_uV, target_min, target_max);
ret = _regulator_do_set_voltage(
- rdev, rdev->constraints->min_uV,
- rdev->constraints->max_uV);
+ rdev, target_min, target_max);
if (ret < 0) {
rdev_err(rdev,
- "failed to apply %duV constraint(%d)\n",
- rdev->constraints->min_uV, ret);
+ "failed to apply %d-%duV constraint(%d)\n",
+ target_min, target_max, ret);
return ret;
}
}
@@ -1150,17 +1143,6 @@
}
}
- if (rdev->constraints->active_discharge && ops->set_active_discharge) {
- bool ad_state = (rdev->constraints->active_discharge ==
- REGULATOR_ACTIVE_DISCHARGE_ENABLE) ? true : false;
-
- ret = ops->set_active_discharge(rdev, ad_state);
- if (ret < 0) {
- rdev_err(rdev, "failed to set active discharge\n");
- return ret;
- }
- }
-
print_constraints(rdev);
return 0;
}
@@ -1272,6 +1254,55 @@
}
}
+#ifdef CONFIG_DEBUG_FS
+static ssize_t constraint_flags_read_file(struct file *file,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ const struct regulator *regulator = file->private_data;
+ const struct regulation_constraints *c = regulator->rdev->constraints;
+ char *buf;
+ ssize_t ret;
+
+ if (!c)
+ return 0;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = snprintf(buf, PAGE_SIZE,
+ "always_on: %u\n"
+ "boot_on: %u\n"
+ "apply_uV: %u\n"
+ "ramp_disable: %u\n"
+ "soft_start: %u\n"
+ "pull_down: %u\n"
+ "over_current_protection: %u\n",
+ c->always_on,
+ c->boot_on,
+ c->apply_uV,
+ c->ramp_disable,
+ c->soft_start,
+ c->pull_down,
+ c->over_current_protection);
+
+ ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
+ kfree(buf);
+
+ return ret;
+}
+
+#endif
+
+static const struct file_operations constraint_flags_fops = {
+#ifdef CONFIG_DEBUG_FS
+ .open = simple_open,
+ .read = constraint_flags_read_file,
+ .llseek = default_llseek,
+#endif
+};
+
#define REG_STR_SIZE 64
static struct regulator *create_regulator(struct regulator_dev *rdev,
@@ -1327,6 +1358,9 @@
®ulator->min_uV);
debugfs_create_u32("max_uV", 0444, regulator->debugfs,
®ulator->max_uV);
+ debugfs_create_file("constraint_flags", 0444,
+ regulator->debugfs, regulator,
+ &constraint_flags_fops);
}
/*
@@ -1334,7 +1368,7 @@
* it is then we don't need to do nearly so much work for
* enable/disable calls.
*/
- if (!_regulator_can_change_status(rdev) &&
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS) &&
_regulator_is_enabled(rdev))
regulator->always_on = true;
@@ -1532,10 +1566,11 @@
}
/* Cascade always-on state to supply */
- if (_regulator_is_enabled(rdev) && rdev->supply) {
+ if (_regulator_is_enabled(rdev)) {
ret = regulator_enable(rdev->supply);
if (ret < 0) {
_regulator_put(rdev->supply);
+ rdev->supply = NULL;
return ret;
}
}
@@ -2111,15 +2146,15 @@
lockdep_assert_held_once(&rdev->mutex);
/* check voltage and requested load before enabling */
- if (rdev->constraints &&
- (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS))
+ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS))
drms_uA_update(rdev);
if (rdev->use_count == 0) {
/* The regulator may on if it's not switchable or left on */
ret = _regulator_is_enabled(rdev);
if (ret == -EINVAL || ret == 0) {
- if (!_regulator_can_change_status(rdev))
+ if (!regulator_ops_is_valid(rdev,
+ REGULATOR_CHANGE_STATUS))
return -EPERM;
ret = _regulator_do_enable(rdev);
@@ -2221,7 +2256,7 @@
(rdev->constraints && !rdev->constraints->always_on)) {
/* we are last user */
- if (_regulator_can_change_status(rdev)) {
+ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS)) {
ret = _notifier_call_chain(rdev,
REGULATOR_EVENT_PRE_DISABLE,
NULL);
@@ -2242,10 +2277,7 @@
rdev->use_count = 0;
} else if (rdev->use_count > 1) {
-
- if (rdev->constraints &&
- (rdev->constraints->valid_ops_mask &
- REGULATOR_CHANGE_DRMS))
+ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_DRMS))
drms_uA_update(rdev);
rdev->use_count--;
@@ -2489,8 +2521,7 @@
{
struct regulator_dev *rdev = regulator->rdev;
- if (rdev->constraints &&
- (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+ if (regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
if (rdev->desc->n_voltages - rdev->desc->linear_min_sel > 1)
return 1;
@@ -2644,7 +2675,7 @@
int i, voltages, ret;
/* If we can't change voltage check the current voltage */
- if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
ret = regulator_get_voltage(regulator);
if (ret >= 0)
return min_uV <= ret && ret <= max_uV;
@@ -2850,7 +2881,7 @@
* return successfully even though the regulator does not support
* changing the voltage.
*/
- if (!(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_VOLTAGE)) {
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_VOLTAGE)) {
current_uV = _regulator_get_voltage(rdev);
if (min_uV <= current_uV && current_uV <= max_uV) {
regulator->min_uV = min_uV;
@@ -3109,6 +3140,23 @@
static int _regulator_get_voltage(struct regulator_dev *rdev)
{
int sel, ret;
+ bool bypassed;
+
+ if (rdev->desc->ops->get_bypass) {
+ ret = rdev->desc->ops->get_bypass(rdev, &bypassed);
+ if (ret < 0)
+ return ret;
+ if (bypassed) {
+ /* if bypassed the regulator must have a supply */
+ if (!rdev->supply) {
+ rdev_err(rdev,
+ "bypassed regulator has no supply!\n");
+ return -EPROBE_DEFER;
+ }
+
+ return _regulator_get_voltage(rdev->supply->rdev);
+ }
+ }
if (rdev->desc->ops->get_voltage_sel) {
sel = rdev->desc->ops->get_voltage_sel(rdev);
@@ -3365,8 +3413,7 @@
if (!rdev->desc->ops->set_bypass)
return 0;
- if (rdev->constraints &&
- !(rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_BYPASS))
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_BYPASS))
return 0;
mutex_lock(&rdev->mutex);
@@ -3840,6 +3887,16 @@
&rdev->bypass_count);
}
+static int regulator_register_resolve_supply(struct device *dev, void *data)
+{
+ struct regulator_dev *rdev = dev_to_rdev(dev);
+
+ if (regulator_resolve_supply(rdev))
+ rdev_dbg(rdev, "unable to resolve supply\n");
+
+ return 0;
+}
+
/**
* regulator_register - register regulator
* @regulator_desc: regulator to register
@@ -3911,8 +3968,6 @@
rdev->dev.of_node = of_node_get(config->of_node);
}
- mutex_lock(®ulator_list_mutex);
-
mutex_init(&rdev->mutex);
rdev->reg_data = config->driver_data;
rdev->owner = regulator_desc->owner;
@@ -3937,7 +3992,9 @@
if ((config->ena_gpio || config->ena_gpio_initialized) &&
gpio_is_valid(config->ena_gpio)) {
+ mutex_lock(®ulator_list_mutex);
ret = regulator_ena_gpio_request(rdev, config);
+ mutex_unlock(®ulator_list_mutex);
if (ret != 0) {
rdev_err(rdev, "Failed to request enable GPIO%d: %d\n",
config->ena_gpio, ret);
@@ -3950,63 +4007,73 @@
rdev->dev.parent = dev;
dev_set_name(&rdev->dev, "regulator.%lu",
(unsigned long) atomic_inc_return(®ulator_no));
- ret = device_register(&rdev->dev);
- if (ret != 0) {
- put_device(&rdev->dev);
- goto wash;
- }
-
- dev_set_drvdata(&rdev->dev, rdev);
/* set regulator constraints */
if (init_data)
constraints = &init_data->constraints;
- ret = set_machine_constraints(rdev, constraints);
- if (ret < 0)
- goto scrub;
-
if (init_data && init_data->supply_regulator)
rdev->supply_name = init_data->supply_regulator;
else if (regulator_desc->supply_name)
rdev->supply_name = regulator_desc->supply_name;
+ /*
+ * Attempt to resolve the regulator supply, if specified,
+ * but don't return an error if we fail because we will try
+ * to resolve it again later as more regulators are added.
+ */
+ if (regulator_resolve_supply(rdev))
+ rdev_dbg(rdev, "unable to resolve supply\n");
+
+ ret = set_machine_constraints(rdev, constraints);
+ if (ret < 0)
+ goto wash;
+
/* add consumers devices */
if (init_data) {
+ mutex_lock(®ulator_list_mutex);
for (i = 0; i < init_data->num_consumer_supplies; i++) {
ret = set_consumer_device_supply(rdev,
init_data->consumer_supplies[i].dev_name,
init_data->consumer_supplies[i].supply);
if (ret < 0) {
+ mutex_unlock(®ulator_list_mutex);
dev_err(dev, "Failed to set supply %s\n",
init_data->consumer_supplies[i].supply);
goto unset_supplies;
}
}
+ mutex_unlock(®ulator_list_mutex);
}
+ ret = device_register(&rdev->dev);
+ if (ret != 0) {
+ put_device(&rdev->dev);
+ goto unset_supplies;
+ }
+
+ dev_set_drvdata(&rdev->dev, rdev);
rdev_init_debugfs(rdev);
-out:
- mutex_unlock(®ulator_list_mutex);
+
+ /* try to resolve regulators supply since a new one was registered */
+ class_for_each_device(®ulator_class, NULL, NULL,
+ regulator_register_resolve_supply);
kfree(config);
return rdev;
unset_supplies:
+ mutex_lock(®ulator_list_mutex);
unset_regulator_supplies(rdev);
-
-scrub:
- regulator_ena_gpio_free(rdev);
- device_unregister(&rdev->dev);
- /* device core frees rdev */
- rdev = ERR_PTR(ret);
- goto out;
-
+ mutex_unlock(®ulator_list_mutex);
wash:
+ kfree(rdev->constraints);
+ mutex_lock(®ulator_list_mutex);
regulator_ena_gpio_free(rdev);
+ mutex_unlock(®ulator_list_mutex);
clean:
kfree(rdev);
- rdev = ERR_PTR(ret);
- goto out;
+ kfree(config);
+ return ERR_PTR(ret);
}
EXPORT_SYMBOL_GPL(regulator_register);
@@ -4032,8 +4099,8 @@
WARN_ON(rdev->open_count);
unset_regulator_supplies(rdev);
list_del(&rdev->list);
- mutex_unlock(®ulator_list_mutex);
regulator_ena_gpio_free(rdev);
+ mutex_unlock(®ulator_list_mutex);
device_unregister(&rdev->dev);
}
EXPORT_SYMBOL_GPL(regulator_unregister);
@@ -4386,7 +4453,7 @@
if (c && c->always_on)
return 0;
- if (c && !(c->valid_ops_mask & REGULATOR_CHANGE_STATUS))
+ if (!regulator_ops_is_valid(rdev, REGULATOR_CHANGE_STATUS))
return 0;
mutex_lock(&rdev->mutex);
diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 2cb5cc3..d7da81a 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -65,6 +65,13 @@
FAN53555_CHIP_ID_03,
FAN53555_CHIP_ID_04,
FAN53555_CHIP_ID_05,
+ FAN53555_CHIP_ID_08 = 8,
+};
+
+/* IC mask revision */
+enum {
+ FAN53555_CHIP_REV_00 = 0x3,
+ FAN53555_CHIP_REV_13 = 0xf,
};
enum {
@@ -217,9 +224,26 @@
/* Init voltage range and step */
switch (di->chip_id) {
case FAN53555_CHIP_ID_00:
+ switch (di->chip_rev) {
+ case FAN53555_CHIP_REV_00:
+ di->vsel_min = 600000;
+ di->vsel_step = 10000;
+ break;
+ case FAN53555_CHIP_REV_13:
+ di->vsel_min = 800000;
+ di->vsel_step = 10000;
+ break;
+ default:
+ dev_err(di->dev,
+ "Chip ID %d with rev %d not supported!\n",
+ di->chip_id, di->chip_rev);
+ return -EINVAL;
+ }
+ break;
case FAN53555_CHIP_ID_01:
case FAN53555_CHIP_ID_03:
case FAN53555_CHIP_ID_05:
+ case FAN53555_CHIP_ID_08:
di->vsel_min = 600000;
di->vsel_step = 10000;
break;
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index b1e32e7..bcf38fd 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -460,7 +460,7 @@
if (ret != 0)
return ret;
- *enable = val & rdev->desc->bypass_mask;
+ *enable = (val & rdev->desc->bypass_mask) == rdev->desc->bypass_val_on;
return 0;
}
diff --git a/drivers/regulator/lp3971.c b/drivers/regulator/lp3971.c
index 15c25c6..204b5c5 100644
--- a/drivers/regulator/lp3971.c
+++ b/drivers/regulator/lp3971.c
@@ -365,8 +365,8 @@
mutex_lock(&lp3971->io_lock);
ret = lp3971_i2c_read(lp3971->i2c, reg, 1, &tmp);
- tmp = (tmp & ~mask) | val;
if (ret == 0) {
+ tmp = (tmp & ~mask) | val;
ret = lp3971_i2c_write(lp3971->i2c, reg, 1, &tmp);
dev_dbg(lp3971->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg,
(unsigned)val&0xff);
diff --git a/drivers/regulator/lp3972.c b/drivers/regulator/lp3972.c
index 3a7e96e..ff0c275 100644
--- a/drivers/regulator/lp3972.c
+++ b/drivers/regulator/lp3972.c
@@ -211,8 +211,8 @@
mutex_lock(&lp3972->io_lock);
ret = lp3972_i2c_read(lp3972->i2c, reg, 1, &tmp);
- tmp = (tmp & ~mask) | val;
if (ret == 0) {
+ tmp = (tmp & ~mask) | val;
ret = lp3972_i2c_write(lp3972->i2c, reg, 1, &tmp);
dev_dbg(lp3972->dev, "reg write 0x%02x -> 0x%02x\n", (int)reg,
(unsigned)val & 0xff);
diff --git a/drivers/regulator/lp873x-regulator.c b/drivers/regulator/lp873x-regulator.c
new file mode 100644
index 0000000..b4ffd11
--- /dev/null
+++ b/drivers/regulator/lp873x-regulator.c
@@ -0,0 +1,241 @@
+/*
+ * Regulator driver for LP873X PMIC
+ *
+ * Copyright (C) 2016 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether expressed or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License version 2 for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/lp873x.h>
+
+#define LP873X_REGULATOR(_name, _id, _of, _ops, _n, _vr, _vm, _er, _em, \
+ _delay, _lr, _nlr, _cr) \
+ [_id] = { \
+ .desc = { \
+ .name = _name, \
+ .id = _id, \
+ .of_match = of_match_ptr(_of), \
+ .regulators_node = of_match_ptr("regulators"),\
+ .ops = &_ops, \
+ .n_voltages = _n, \
+ .type = REGULATOR_VOLTAGE, \
+ .owner = THIS_MODULE, \
+ .vsel_reg = _vr, \
+ .vsel_mask = _vm, \
+ .enable_reg = _er, \
+ .enable_mask = _em, \
+ .ramp_delay = _delay, \
+ .linear_ranges = _lr, \
+ .n_linear_ranges = _nlr, \
+ }, \
+ .ctrl2_reg = _cr, \
+ }
+
+struct lp873x_regulator {
+ struct regulator_desc desc;
+ unsigned int ctrl2_reg;
+};
+
+static const struct lp873x_regulator regulators[];
+
+static const struct regulator_linear_range buck0_buck1_ranges[] = {
+ REGULATOR_LINEAR_RANGE(0, 0x0, 0x13, 0),
+ REGULATOR_LINEAR_RANGE(700000, 0x14, 0x17, 10000),
+ REGULATOR_LINEAR_RANGE(735000, 0x18, 0x9d, 5000),
+ REGULATOR_LINEAR_RANGE(1420000, 0x9e, 0xff, 20000),
+};
+
+static const struct regulator_linear_range ldo0_ldo1_ranges[] = {
+ REGULATOR_LINEAR_RANGE(800000, 0x0, 0x19, 100000),
+};
+
+static unsigned int lp873x_buck_ramp_delay[] = {
+ 30000, 15000, 10000, 7500, 3800, 1900, 940, 470
+};
+
+/* LP873X BUCK current limit */
+static const unsigned int lp873x_buck_uA[] = {
+ 1500000, 2000000, 2500000, 3000000, 3500000, 4000000,
+};
+
+static int lp873x_buck_set_ramp_delay(struct regulator_dev *rdev,
+ int ramp_delay)
+{
+ int id = rdev_get_id(rdev);
+ struct lp873x *lp873 = rdev_get_drvdata(rdev);
+ unsigned int reg;
+ int ret;
+
+ if (ramp_delay <= 470)
+ reg = 7;
+ else if (ramp_delay <= 940)
+ reg = 6;
+ else if (ramp_delay <= 1900)
+ reg = 5;
+ else if (ramp_delay <= 3800)
+ reg = 4;
+ else if (ramp_delay <= 7500)
+ reg = 3;
+ else if (ramp_delay <= 10000)
+ reg = 2;
+ else if (ramp_delay <= 15000)
+ reg = 1;
+ else
+ reg = 0;
+
+ ret = regmap_update_bits(lp873->regmap, regulators[id].ctrl2_reg,
+ LP873X_BUCK0_CTRL_2_BUCK0_SLEW_RATE,
+ reg << __ffs(LP873X_BUCK0_CTRL_2_BUCK0_SLEW_RATE));
+ if (ret) {
+ dev_err(lp873->dev, "SLEW RATE write failed: %d\n", ret);
+ return ret;
+ }
+
+ rdev->constraints->ramp_delay = lp873x_buck_ramp_delay[reg];
+
+ return 0;
+}
+
+static int lp873x_buck_set_current_limit(struct regulator_dev *rdev,
+ int min_uA, int max_uA)
+{
+ int id = rdev_get_id(rdev);
+ struct lp873x *lp873 = rdev_get_drvdata(rdev);
+ int i;
+
+ for (i = ARRAY_SIZE(lp873x_buck_uA) - 1; i >= 0; i--) {
+ if (lp873x_buck_uA[i] >= min_uA &&
+ lp873x_buck_uA[i] <= max_uA)
+ return regmap_update_bits(lp873->regmap,
+ regulators[id].ctrl2_reg,
+ LP873X_BUCK0_CTRL_2_BUCK0_ILIM,
+ i << __ffs(LP873X_BUCK0_CTRL_2_BUCK0_ILIM));
+ }
+
+ return -EINVAL;
+}
+
+static int lp873x_buck_get_current_limit(struct regulator_dev *rdev)
+{
+ int id = rdev_get_id(rdev);
+ struct lp873x *lp873 = rdev_get_drvdata(rdev);
+ int ret;
+ unsigned int val;
+
+ ret = regmap_read(lp873->regmap, regulators[id].ctrl2_reg, &val);
+ if (ret)
+ return ret;
+
+ val = (val & LP873X_BUCK0_CTRL_2_BUCK0_ILIM) >>
+ __ffs(LP873X_BUCK0_CTRL_2_BUCK0_ILIM);
+
+ return (val < ARRAY_SIZE(lp873x_buck_uA)) ?
+ lp873x_buck_uA[val] : -EINVAL;
+}
+
+/* Operations permitted on BUCK0, BUCK1 */
+static struct regulator_ops lp873x_buck01_ops = {
+ .is_enabled = regulator_is_enabled_regmap,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear_range,
+ .map_voltage = regulator_map_voltage_linear_range,
+ .set_voltage_time_sel = regulator_set_voltage_time_sel,
+ .set_ramp_delay = lp873x_buck_set_ramp_delay,
+ .set_current_limit = lp873x_buck_set_current_limit,
+ .get_current_limit = lp873x_buck_get_current_limit,
+};
+
+/* Operations permitted on LDO0 and LDO1 */
+static struct regulator_ops lp873x_ldo01_ops = {
+ .is_enabled = regulator_is_enabled_regmap,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear_range,
+ .map_voltage = regulator_map_voltage_linear_range,
+};
+
+static const struct lp873x_regulator regulators[] = {
+ LP873X_REGULATOR("BUCK0", LP873X_BUCK_0, "buck0", lp873x_buck01_ops,
+ 256, LP873X_REG_BUCK0_VOUT,
+ LP873X_BUCK0_VOUT_BUCK0_VSET, LP873X_REG_BUCK0_CTRL_1,
+ LP873X_BUCK0_CTRL_1_BUCK0_EN, 10000,
+ buck0_buck1_ranges, 4, LP873X_REG_BUCK0_CTRL_2),
+ LP873X_REGULATOR("BUCK1", LP873X_BUCK_1, "buck1", lp873x_buck01_ops,
+ 256, LP873X_REG_BUCK1_VOUT,
+ LP873X_BUCK1_VOUT_BUCK1_VSET, LP873X_REG_BUCK1_CTRL_1,
+ LP873X_BUCK1_CTRL_1_BUCK1_EN, 10000,
+ buck0_buck1_ranges, 4, LP873X_REG_BUCK1_CTRL_2),
+ LP873X_REGULATOR("LDO0", LP873X_LDO_0, "ldo0", lp873x_ldo01_ops, 26,
+ LP873X_REG_LDO0_VOUT, LP873X_LDO0_VOUT_LDO0_VSET,
+ LP873X_REG_LDO0_CTRL,
+ LP873X_LDO0_CTRL_LDO0_EN, 0, ldo0_ldo1_ranges, 1,
+ 0xFF),
+ LP873X_REGULATOR("LDO1", LP873X_LDO_1, "ldo1", lp873x_ldo01_ops, 26,
+ LP873X_REG_LDO1_VOUT, LP873X_LDO1_VOUT_LDO1_VSET,
+ LP873X_REG_LDO1_CTRL,
+ LP873X_LDO1_CTRL_LDO1_EN, 0, ldo0_ldo1_ranges, 1,
+ 0xFF),
+};
+
+static int lp873x_regulator_probe(struct platform_device *pdev)
+{
+ struct lp873x *lp873 = dev_get_drvdata(pdev->dev.parent);
+ struct regulator_config config = { };
+ struct regulator_dev *rdev;
+ int i;
+
+ platform_set_drvdata(pdev, lp873);
+
+ config.dev = &pdev->dev;
+ config.dev->of_node = lp873->dev->of_node;
+ config.driver_data = lp873;
+ config.regmap = lp873->regmap;
+
+ for (i = 0; i < ARRAY_SIZE(regulators); i++) {
+ rdev = devm_regulator_register(&pdev->dev, ®ulators[i].desc,
+ &config);
+ if (IS_ERR(rdev)) {
+ dev_err(lp873->dev, "failed to register %s regulator\n",
+ pdev->name);
+ return PTR_ERR(rdev);
+ }
+ }
+
+ return 0;
+}
+
+static const struct platform_device_id lp873x_regulator_id_table[] = {
+ { "lp873x-regulator", },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, lp873x_regulator_id_table);
+
+static struct platform_driver lp873x_regulator_driver = {
+ .driver = {
+ .name = "lp873x-pmic",
+ },
+ .probe = lp873x_regulator_probe,
+ .id_table = lp873x_regulator_id_table,
+};
+module_platform_driver(lp873x_regulator_driver);
+
+MODULE_AUTHOR("J Keerthy <j-keerthy@ti.com>");
+MODULE_DESCRIPTION("LP873X voltage regulator driver");
+MODULE_ALIAS("platform:lp873x-pmic");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/max14577.c b/drivers/regulator/max14577-regulator.c
similarity index 100%
rename from drivers/regulator/max14577.c
rename to drivers/regulator/max14577-regulator.c
diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 73a3356..321e804 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -81,6 +81,7 @@
int suspend_fps_pd_slot;
int suspend_fps_pu_slot;
int current_mode;
+ int ramp_rate_setting;
};
struct max77620_regulator {
@@ -307,6 +308,43 @@
return 0;
}
+static int max77620_set_slew_rate(struct max77620_regulator *pmic, int id,
+ int slew_rate)
+{
+ struct max77620_regulator_info *rinfo = pmic->rinfo[id];
+ unsigned int val;
+ int ret;
+ u8 mask;
+
+ if (rinfo->type == MAX77620_REGULATOR_TYPE_SD) {
+ if (slew_rate <= 13750)
+ val = 0;
+ else if (slew_rate <= 27500)
+ val = 1;
+ else if (slew_rate <= 55000)
+ val = 2;
+ else
+ val = 3;
+ val <<= MAX77620_SD_SR_SHIFT;
+ mask = MAX77620_SD_SR_MASK;
+ } else {
+ if (slew_rate <= 5000)
+ val = 1;
+ else
+ val = 0;
+ mask = MAX77620_LDO_SLEW_RATE_MASK;
+ }
+
+ ret = regmap_update_bits(pmic->rmap, rinfo->cfg_addr, mask, val);
+ if (ret < 0) {
+ dev_err(pmic->dev, "Regulator %d slew rate set failed: %d\n",
+ id, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int max77620_init_pmic(struct max77620_regulator *pmic, int id)
{
struct max77620_regulator_pdata *rpdata = &pmic->reg_pdata[id];
@@ -351,6 +389,13 @@
if (ret < 0)
return ret;
+ if (rpdata->ramp_rate_setting) {
+ ret = max77620_set_slew_rate(pmic, id,
+ rpdata->ramp_rate_setting);
+ if (ret < 0)
+ return ret;
+ }
+
return 0;
}
@@ -502,35 +547,16 @@
{
struct max77620_regulator *pmic = rdev_get_drvdata(rdev);
int id = rdev_get_id(rdev);
- struct max77620_regulator_info *rinfo = pmic->rinfo[id];
- int ret, val;
- u8 mask;
+ struct max77620_regulator_pdata *rpdata = &pmic->reg_pdata[id];
- if (rinfo->type == MAX77620_REGULATOR_TYPE_SD) {
- if (ramp_delay <= 13750)
- val = 0;
- else if (ramp_delay <= 27500)
- val = 1;
- else if (ramp_delay <= 55000)
- val = 2;
- else
- val = 3;
- val <<= MAX77620_SD_SR_SHIFT;
- mask = MAX77620_SD_SR_MASK;
- } else {
- if (ramp_delay <= 5000)
- val = 1;
- else
- val = 0;
- mask = MAX77620_LDO_SLEW_RATE_MASK;
- }
+ /* Device specific ramp rate setting tells that platform has
+ * different ramp rate from advertised value. In this case,
+ * do not configure anything and just return success.
+ */
+ if (rpdata->ramp_rate_setting)
+ return 0;
- ret = regmap_update_bits(pmic->rmap, rinfo->cfg_addr, mask, val);
- if (ret < 0)
- dev_err(pmic->dev, "Reg 0x%02x update failed: %d\n",
- rinfo->cfg_addr, ret);
-
- return ret;
+ return max77620_set_slew_rate(pmic, id, ramp_delay);
}
static int max77620_of_parse_cb(struct device_node *np,
@@ -563,6 +589,9 @@
np, "maxim,suspend-fps-power-down-slot", &pval);
rpdata->suspend_fps_pd_slot = (!ret) ? pval : -1;
+ ret = of_property_read_u32(np, "maxim,ramp-rate-setting", &pval);
+ rpdata->ramp_rate_setting = (!ret) ? pval : 0;
+
return max77620_init_pmic(pmic, desc->id);
}
diff --git a/drivers/regulator/max77686-regulator.c b/drivers/regulator/max77686-regulator.c
index 17ccf36..ac4fa58 100644
--- a/drivers/regulator/max77686-regulator.c
+++ b/drivers/regulator/max77686-regulator.c
@@ -41,6 +41,8 @@
#define MAX77686_LDO_LOW_UVSTEP 25000
#define MAX77686_BUCK_MINUV 750000
#define MAX77686_BUCK_UVSTEP 50000
+#define MAX77686_BUCK_ENABLE_TIME 40 /* us */
+#define MAX77686_DVS_ENABLE_TIME 22 /* us */
#define MAX77686_RAMP_DELAY 100000 /* uV/us */
#define MAX77686_DVS_RAMP_DELAY 27500 /* uV/us */
#define MAX77686_DVS_MINUV 600000
@@ -422,6 +424,7 @@
.min_uV = MAX77686_BUCK_MINUV, \
.uV_step = MAX77686_BUCK_UVSTEP, \
.ramp_delay = MAX77686_RAMP_DELAY, \
+ .enable_time = MAX77686_BUCK_ENABLE_TIME, \
.n_voltages = MAX77686_VSEL_MASK + 1, \
.vsel_reg = MAX77686_REG_BUCK5OUT + (num - 5) * 2, \
.vsel_mask = MAX77686_VSEL_MASK, \
@@ -439,6 +442,7 @@
.min_uV = MAX77686_BUCK_MINUV, \
.uV_step = MAX77686_BUCK_UVSTEP, \
.ramp_delay = MAX77686_RAMP_DELAY, \
+ .enable_time = MAX77686_BUCK_ENABLE_TIME, \
.n_voltages = MAX77686_VSEL_MASK + 1, \
.vsel_reg = MAX77686_REG_BUCK1OUT, \
.vsel_mask = MAX77686_VSEL_MASK, \
@@ -456,6 +460,7 @@
.min_uV = MAX77686_DVS_MINUV, \
.uV_step = MAX77686_DVS_UVSTEP, \
.ramp_delay = MAX77686_DVS_RAMP_DELAY, \
+ .enable_time = MAX77686_DVS_ENABLE_TIME, \
.n_voltages = MAX77686_DVS_VSEL_MASK + 1, \
.vsel_reg = MAX77686_REG_BUCK2DVS1 + (num - 2) * 10, \
.vsel_mask = MAX77686_DVS_VSEL_MASK, \
@@ -553,17 +558,7 @@
.id_table = max77686_pmic_id,
};
-static int __init max77686_pmic_init(void)
-{
- return platform_driver_register(&max77686_pmic_driver);
-}
-subsys_initcall(max77686_pmic_init);
-
-static void __exit max77686_pmic_cleanup(void)
-{
- platform_driver_unregister(&max77686_pmic_driver);
-}
-module_exit(max77686_pmic_cleanup);
+module_platform_driver(max77686_pmic_driver);
MODULE_DESCRIPTION("MAXIM 77686 Regulator Driver");
MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>");
diff --git a/drivers/regulator/max77693.c b/drivers/regulator/max77693-regulator.c
similarity index 100%
rename from drivers/regulator/max77693.c
rename to drivers/regulator/max77693-regulator.c
diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
index c07ee13..1d35393 100644
--- a/drivers/regulator/max77802-regulator.c
+++ b/drivers/regulator/max77802-regulator.c
@@ -5,7 +5,7 @@
* Simon Glass <sjg@chromium.org>
*
* Copyright (C) 2012 Samsung Electronics
- * Chiwoong Byun <woong.byun@smasung.com>
+ * Chiwoong Byun <woong.byun@samsung.com>
* Jonghwa Lee <jonghwa3.lee@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index 5b75b7c..08d2f13 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c
@@ -38,6 +38,9 @@
#include <linux/i2c.h>
#include <linux/slab.h>
#include <linux/regmap.h>
+#include <linux/thermal.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
/* Register definitions */
#define MAX8973_VOUT 0x0
@@ -74,6 +77,7 @@
#define MAX8973_WDTMR_ENABLE BIT(6)
#define MAX8973_DISCH_ENBABLE BIT(5)
#define MAX8973_FT_ENABLE BIT(4)
+#define MAX77621_T_JUNCTION_120 BIT(7)
#define MAX8973_CKKADV_TRIP_MASK 0xC
#define MAX8973_CKKADV_TRIP_DISABLE 0xC
@@ -93,6 +97,12 @@
#define MAX8973_VOLATGE_STEP 6250
#define MAX8973_BUCK_N_VOLTAGE 0x80
+#define MAX77621_CHIPID_TJINT_S BIT(0)
+
+#define MAX77621_NORMAL_OPERATING_TEMP 100000
+#define MAX77621_TJINT_WARNING_TEMP_120 120000
+#define MAX77621_TJINT_WARNING_TEMP_140 140000
+
enum device_id {
MAX8973,
MAX77621
@@ -112,6 +122,9 @@
int curr_gpio_val;
struct regulator_ops ops;
enum device_id id;
+ int junction_temp_warning;
+ int irq;
+ struct thermal_zone_device *tz_device;
};
/*
@@ -391,6 +404,10 @@
if (pdata->control_flags & MAX8973_CONTROL_FREQ_SHIFT_9PER_ENABLE)
control1 |= MAX8973_FREQSHIFT_9PER;
+ if ((pdata->junction_temp_warning == MAX77621_TJINT_WARNING_TEMP_120) &&
+ (max->id == MAX77621))
+ control2 |= MAX77621_T_JUNCTION_120;
+
if (!(pdata->control_flags & MAX8973_CONTROL_PULL_DOWN_ENABLE))
control2 |= MAX8973_DISCH_ENBABLE;
@@ -457,6 +474,79 @@
return ret;
}
+static int max8973_thermal_read_temp(void *data, int *temp)
+{
+ struct max8973_chip *mchip = data;
+ unsigned int val;
+ int ret;
+
+ ret = regmap_read(mchip->regmap, MAX8973_CHIPID1, &val);
+ if (ret < 0) {
+ dev_err(mchip->dev, "Failed to read register CHIPID1, %d", ret);
+ return ret;
+ }
+
+ /* +1 degC to trigger cool devive */
+ if (val & MAX77621_CHIPID_TJINT_S)
+ *temp = mchip->junction_temp_warning + 1000;
+ else
+ *temp = MAX77621_NORMAL_OPERATING_TEMP;
+
+ return 0;
+}
+
+static irqreturn_t max8973_thermal_irq(int irq, void *data)
+{
+ struct max8973_chip *mchip = data;
+
+ thermal_zone_device_update(mchip->tz_device);
+
+ return IRQ_HANDLED;
+}
+
+static const struct thermal_zone_of_device_ops max77621_tz_ops = {
+ .get_temp = max8973_thermal_read_temp,
+};
+
+static int max8973_thermal_init(struct max8973_chip *mchip)
+{
+ struct thermal_zone_device *tzd;
+ struct irq_data *irq_data;
+ unsigned long irq_flags = 0;
+ int ret;
+
+ if (mchip->id != MAX77621)
+ return 0;
+
+ tzd = devm_thermal_zone_of_sensor_register(mchip->dev, 0, mchip,
+ &max77621_tz_ops);
+ if (IS_ERR(tzd)) {
+ ret = PTR_ERR(tzd);
+ dev_err(mchip->dev, "Failed to register thermal sensor: %d\n",
+ ret);
+ return ret;
+ }
+
+ if (mchip->irq <= 0)
+ return 0;
+
+ irq_data = irq_get_irq_data(mchip->irq);
+ if (irq_data)
+ irq_flags = irqd_get_trigger_type(irq_data);
+
+ ret = devm_request_threaded_irq(mchip->dev, mchip->irq, NULL,
+ max8973_thermal_irq,
+ IRQF_ONESHOT | IRQF_SHARED | irq_flags,
+ dev_name(mchip->dev), mchip);
+ if (ret < 0) {
+ dev_err(mchip->dev, "Failed to request irq %d, %d\n",
+ mchip->irq, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static const struct regmap_config max8973_regmap_config = {
.reg_bits = 8,
.val_bits = 8,
@@ -521,6 +611,11 @@
pdata->control_flags |= MAX8973_CONTROL_CLKADV_TRIP_DISABLED;
}
+ pdata->junction_temp_warning = MAX77621_TJINT_WARNING_TEMP_140;
+ ret = of_property_read_u32(np, "junction-warn-millicelsius", &pval);
+ if (!ret && (pval <= MAX77621_TJINT_WARNING_TEMP_120))
+ pdata->junction_temp_warning = MAX77621_TJINT_WARNING_TEMP_120;
+
return pdata;
}
@@ -608,6 +703,7 @@
max->enable_external_control = pdata->enable_ext_control;
max->curr_gpio_val = pdata->dvs_def_state;
max->curr_vout_reg = MAX8973_VOUT + pdata->dvs_def_state;
+ max->junction_temp_warning = pdata->junction_temp_warning;
if (gpio_is_valid(max->enable_gpio))
max->enable_external_control = true;
@@ -718,6 +814,7 @@
return ret;
}
+ max8973_thermal_init(max);
return 0;
}
diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997-regulator.c
similarity index 99%
rename from drivers/regulator/max8997.c
rename to drivers/regulator/max8997-regulator.c
index ea0196d..efabc0e 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997-regulator.c
@@ -2,7 +2,7 @@
* max8997.c - Regulator driver for the Maxim 8997/8966
*
* Copyright (C) 2011 Samsung Electronics
- * MyungJoo Ham <myungjoo.ham@smasung.com>
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 6b0aa80..cd828db 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -45,9 +45,9 @@
/* Voltage change possible? */
if (constraints->min_uV != constraints->max_uV)
constraints->valid_ops_mask |= REGULATOR_CHANGE_VOLTAGE;
- /* Only one voltage? Then make sure it's set. */
- if (constraints->min_uV && constraints->max_uV &&
- constraints->min_uV == constraints->max_uV)
+
+ /* Do we have a voltage range, if so try to apply it? */
+ if (constraints->min_uV && constraints->max_uV)
constraints->apply_uV = true;
if (!of_property_read_u32(np, "regulator-microvolt-offset", &pval))
diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c
index 6efc7ee..f11d41d 100644
--- a/drivers/regulator/palmas-regulator.c
+++ b/drivers/regulator/palmas-regulator.c
@@ -944,6 +944,8 @@
if (id == PALMAS_REG_LDO9) {
desc->ops = &palmas_ops_ldo9;
desc->bypass_reg = desc->enable_reg;
+ desc->bypass_val_on =
+ PALMAS_LDO9_CTRL_LDO_BYPASS_EN;
desc->bypass_mask =
PALMAS_LDO9_CTRL_LDO_BYPASS_EN;
}
@@ -1055,6 +1057,8 @@
id == TPS65917_REG_LDO2) {
desc->ops = &tps65917_ops_ldo_1_2;
desc->bypass_reg = desc->enable_reg;
+ desc->bypass_val_on =
+ TPS65917_LDO1_CTRL_BYPASS_EN;
desc->bypass_mask =
TPS65917_LDO1_CTRL_BYPASS_EN;
}
@@ -1206,6 +1210,7 @@
desc->enable_mask = SMPS10_BOOST_EN;
desc->bypass_reg = PALMAS_BASE_TO_REG(PALMAS_SMPS_BASE,
PALMAS_SMPS10_CTRL);
+ desc->bypass_val_on = SMPS10_BYPASS_EN;
desc->bypass_mask = SMPS10_BYPASS_EN;
desc->min_uV = 3750000;
desc->uV_step = 1250000;
@@ -1462,10 +1467,10 @@
.ldo_register = tps65917_ldo_registration,
};
-static void palmas_dt_to_pdata(struct device *dev,
- struct device_node *node,
- struct palmas_pmic_platform_data *pdata,
- struct palmas_pmic_driver_data *ddata)
+static int palmas_dt_to_pdata(struct device *dev,
+ struct device_node *node,
+ struct palmas_pmic_platform_data *pdata,
+ struct palmas_pmic_driver_data *ddata)
{
struct device_node *regulators;
u32 prop;
@@ -1474,7 +1479,7 @@
regulators = of_get_child_by_name(node, "regulators");
if (!regulators) {
dev_info(dev, "regulator node not found\n");
- return;
+ return 0;
}
ret = of_regulator_match(dev, regulators, ddata->palmas_matches,
@@ -1482,25 +1487,29 @@
of_node_put(regulators);
if (ret < 0) {
dev_err(dev, "Error parsing regulator init data: %d\n", ret);
- return;
+ return 0;
}
for (idx = 0; idx < ddata->max_reg; idx++) {
- if (!ddata->palmas_matches[idx].init_data ||
- !ddata->palmas_matches[idx].of_node)
+ static struct of_regulator_match *match;
+ struct palmas_reg_init *rinit;
+ struct device_node *np;
+
+ match = &ddata->palmas_matches[idx];
+ np = match->of_node;
+
+ if (!match->init_data || !np)
continue;
- pdata->reg_data[idx] = ddata->palmas_matches[idx].init_data;
+ rinit = devm_kzalloc(dev, sizeof(*rinit), GFP_KERNEL);
+ if (!rinit)
+ return -ENOMEM;
- pdata->reg_init[idx] = devm_kzalloc(dev,
- sizeof(struct palmas_reg_init), GFP_KERNEL);
+ pdata->reg_data[idx] = match->init_data;
+ pdata->reg_init[idx] = rinit;
- pdata->reg_init[idx]->warm_reset =
- of_property_read_bool(ddata->palmas_matches[idx].of_node,
- "ti,warm-reset");
-
- ret = of_property_read_u32(ddata->palmas_matches[idx].of_node,
- "ti,roof-floor", &prop);
+ rinit->warm_reset = of_property_read_bool(np, "ti,warm-reset");
+ ret = of_property_read_u32(np, "ti,roof-floor", &prop);
/* EINVAL: Property not found */
if (ret != -EINVAL) {
int econtrol;
@@ -1522,31 +1531,29 @@
WARN_ON(1);
dev_warn(dev,
"%s: Invalid roof-floor option: %u\n",
- palmas_matches[idx].name, prop);
+ match->name, prop);
break;
}
}
- pdata->reg_init[idx]->roof_floor = econtrol;
+ rinit->roof_floor = econtrol;
}
- ret = of_property_read_u32(ddata->palmas_matches[idx].of_node,
- "ti,mode-sleep", &prop);
+ ret = of_property_read_u32(np, "ti,mode-sleep", &prop);
if (!ret)
- pdata->reg_init[idx]->mode_sleep = prop;
+ rinit->mode_sleep = prop;
- ret = of_property_read_bool(ddata->palmas_matches[idx].of_node,
- "ti,smps-range");
+ ret = of_property_read_bool(np, "ti,smps-range");
if (ret)
- pdata->reg_init[idx]->vsel =
- PALMAS_SMPS12_VOLTAGE_RANGE;
+ rinit->vsel = PALMAS_SMPS12_VOLTAGE_RANGE;
if (idx == PALMAS_REG_LDO8)
pdata->enable_ldo8_tracking = of_property_read_bool(
- ddata->palmas_matches[idx].of_node,
- "ti,enable-ldo8-tracking");
+ np, "ti,enable-ldo8-tracking");
}
pdata->ldo6_vibrator = of_property_read_bool(node, "ti,ldo6-vibrator");
+
+ return 0;
}
static const struct of_device_id of_palmas_match_tbl[] = {
@@ -1628,7 +1635,9 @@
platform_set_drvdata(pdev, pmic);
pmic->palmas->pmic_ddata = driver_data;
- palmas_dt_to_pdata(&pdev->dev, node, pdata, driver_data);
+ ret = palmas_dt_to_pdata(&pdev->dev, node, pdata, driver_data);
+ if (ret)
+ return ret;
ret = palmas_smps_read(palmas, PALMAS_SMPS_CTRL, ®);
if (ret)
diff --git a/drivers/regulator/pv88080-regulator.c b/drivers/regulator/pv88080-regulator.c
new file mode 100644
index 0000000..d710756
--- /dev/null
+++ b/drivers/regulator/pv88080-regulator.c
@@ -0,0 +1,419 @@
+/*
+ * pv88080-regulator.c - Regulator device driver for PV88080
+ * Copyright (C) 2016 Powerventure Semiconductor Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include "pv88080-regulator.h"
+
+#define PV88080_MAX_REGULATORS 3
+
+/* PV88080 REGULATOR IDs */
+enum {
+ /* BUCKs */
+ PV88080_ID_BUCK1,
+ PV88080_ID_BUCK2,
+ PV88080_ID_BUCK3,
+};
+
+struct pv88080_regulator {
+ struct regulator_desc desc;
+ /* Current limiting */
+ unsigned int n_current_limits;
+ const int *current_limits;
+ unsigned int limit_mask;
+ unsigned int conf;
+ unsigned int conf2;
+ unsigned int conf5;
+};
+
+struct pv88080 {
+ struct device *dev;
+ struct regmap *regmap;
+ struct regulator_dev *rdev[PV88080_MAX_REGULATORS];
+};
+
+struct pv88080_buck_voltage {
+ int min_uV;
+ int max_uV;
+ int uV_step;
+};
+
+static const struct regmap_config pv88080_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+};
+
+/* Current limits array (in uA) for BUCK1, BUCK2, BUCK3.
+ * Entry indexes corresponds to register values.
+ */
+
+static const int pv88080_buck1_limits[] = {
+ 3230000, 5130000, 6960000, 8790000
+};
+
+static const int pv88080_buck23_limits[] = {
+ 1496000, 2393000, 3291000, 4189000
+};
+
+static const struct pv88080_buck_voltage pv88080_buck_vol[2] = {
+ {
+ .min_uV = 600000,
+ .max_uV = 1393750,
+ .uV_step = 6250,
+ },
+ {
+ .min_uV = 1400000,
+ .max_uV = 2193750,
+ .uV_step = 6250,
+ },
+};
+
+static unsigned int pv88080_buck_get_mode(struct regulator_dev *rdev)
+{
+ struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+ unsigned int data;
+ int ret, mode = 0;
+
+ ret = regmap_read(rdev->regmap, info->conf, &data);
+ if (ret < 0)
+ return ret;
+
+ switch (data & PV88080_BUCK1_MODE_MASK) {
+ case PV88080_BUCK_MODE_SYNC:
+ mode = REGULATOR_MODE_FAST;
+ break;
+ case PV88080_BUCK_MODE_AUTO:
+ mode = REGULATOR_MODE_NORMAL;
+ break;
+ case PV88080_BUCK_MODE_SLEEP:
+ mode = REGULATOR_MODE_STANDBY;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return mode;
+}
+
+static int pv88080_buck_set_mode(struct regulator_dev *rdev,
+ unsigned int mode)
+{
+ struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+ int val = 0;
+
+ switch (mode) {
+ case REGULATOR_MODE_FAST:
+ val = PV88080_BUCK_MODE_SYNC;
+ break;
+ case REGULATOR_MODE_NORMAL:
+ val = PV88080_BUCK_MODE_AUTO;
+ break;
+ case REGULATOR_MODE_STANDBY:
+ val = PV88080_BUCK_MODE_SLEEP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return regmap_update_bits(rdev->regmap, info->conf,
+ PV88080_BUCK1_MODE_MASK, val);
+}
+
+static int pv88080_set_current_limit(struct regulator_dev *rdev, int min,
+ int max)
+{
+ struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+ int i;
+
+ /* search for closest to maximum */
+ for (i = info->n_current_limits; i >= 0; i--) {
+ if (min <= info->current_limits[i]
+ && max >= info->current_limits[i]) {
+ return regmap_update_bits(rdev->regmap,
+ info->conf,
+ info->limit_mask,
+ i << PV88080_BUCK1_ILIM_SHIFT);
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int pv88080_get_current_limit(struct regulator_dev *rdev)
+{
+ struct pv88080_regulator *info = rdev_get_drvdata(rdev);
+ unsigned int data;
+ int ret;
+
+ ret = regmap_read(rdev->regmap, info->conf, &data);
+ if (ret < 0)
+ return ret;
+
+ data = (data & info->limit_mask) >> PV88080_BUCK1_ILIM_SHIFT;
+ return info->current_limits[data];
+}
+
+static struct regulator_ops pv88080_buck_ops = {
+ .get_mode = pv88080_buck_get_mode,
+ .set_mode = pv88080_buck_set_mode,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .list_voltage = regulator_list_voltage_linear,
+ .set_current_limit = pv88080_set_current_limit,
+ .get_current_limit = pv88080_get_current_limit,
+};
+
+#define PV88080_BUCK(chip, regl_name, min, step, max, limits_array) \
+{\
+ .desc = {\
+ .id = chip##_ID_##regl_name,\
+ .name = __stringify(chip##_##regl_name),\
+ .of_match = of_match_ptr(#regl_name),\
+ .regulators_node = of_match_ptr("regulators"),\
+ .type = REGULATOR_VOLTAGE,\
+ .owner = THIS_MODULE,\
+ .ops = &pv88080_buck_ops,\
+ .min_uV = min, \
+ .uV_step = step, \
+ .n_voltages = ((max) - (min))/(step) + 1, \
+ .enable_reg = PV88080_REG_##regl_name##_CONF0, \
+ .enable_mask = PV88080_##regl_name##_EN, \
+ .vsel_reg = PV88080_REG_##regl_name##_CONF0, \
+ .vsel_mask = PV88080_V##regl_name##_MASK, \
+ },\
+ .current_limits = limits_array, \
+ .n_current_limits = ARRAY_SIZE(limits_array), \
+ .limit_mask = PV88080_##regl_name##_ILIM_MASK, \
+ .conf = PV88080_REG_##regl_name##_CONF1, \
+ .conf2 = PV88080_REG_##regl_name##_CONF2, \
+ .conf5 = PV88080_REG_##regl_name##_CONF5, \
+}
+
+static struct pv88080_regulator pv88080_regulator_info[] = {
+ PV88080_BUCK(PV88080, BUCK1, 600000, 6250, 1393750,
+ pv88080_buck1_limits),
+ PV88080_BUCK(PV88080, BUCK2, 600000, 6250, 1393750,
+ pv88080_buck23_limits),
+ PV88080_BUCK(PV88080, BUCK3, 600000, 6250, 1393750,
+ pv88080_buck23_limits),
+};
+
+static irqreturn_t pv88080_irq_handler(int irq, void *data)
+{
+ struct pv88080 *chip = data;
+ int i, reg_val, err, ret = IRQ_NONE;
+
+ err = regmap_read(chip->regmap, PV88080_REG_EVENT_A, ®_val);
+ if (err < 0)
+ goto error_i2c;
+
+ if (reg_val & PV88080_E_VDD_FLT) {
+ for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+ if (chip->rdev[i] != NULL) {
+ regulator_notifier_call_chain(chip->rdev[i],
+ REGULATOR_EVENT_UNDER_VOLTAGE,
+ NULL);
+ }
+ }
+
+ err = regmap_write(chip->regmap, PV88080_REG_EVENT_A,
+ PV88080_E_VDD_FLT);
+ if (err < 0)
+ goto error_i2c;
+
+ ret = IRQ_HANDLED;
+ }
+
+ if (reg_val & PV88080_E_OVER_TEMP) {
+ for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+ if (chip->rdev[i] != NULL) {
+ regulator_notifier_call_chain(chip->rdev[i],
+ REGULATOR_EVENT_OVER_TEMP,
+ NULL);
+ }
+ }
+
+ err = regmap_write(chip->regmap, PV88080_REG_EVENT_A,
+ PV88080_E_OVER_TEMP);
+ if (err < 0)
+ goto error_i2c;
+
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+
+error_i2c:
+ dev_err(chip->dev, "I2C error : %d\n", err);
+ return IRQ_NONE;
+}
+
+/*
+ * I2C driver interface functions
+ */
+static int pv88080_i2c_probe(struct i2c_client *i2c,
+ const struct i2c_device_id *id)
+{
+ struct regulator_init_data *init_data = dev_get_platdata(&i2c->dev);
+ struct pv88080 *chip;
+ struct regulator_config config = { };
+ int i, error, ret;
+ unsigned int conf2, conf5;
+
+ chip = devm_kzalloc(&i2c->dev, sizeof(struct pv88080), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ chip->dev = &i2c->dev;
+ chip->regmap = devm_regmap_init_i2c(i2c, &pv88080_regmap_config);
+ if (IS_ERR(chip->regmap)) {
+ error = PTR_ERR(chip->regmap);
+ dev_err(chip->dev, "Failed to allocate register map: %d\n",
+ error);
+ return error;
+ }
+
+ i2c_set_clientdata(i2c, chip);
+
+ if (i2c->irq != 0) {
+ ret = regmap_write(chip->regmap, PV88080_REG_MASK_A, 0xFF);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to mask A reg: %d\n", ret);
+ return ret;
+ }
+ ret = regmap_write(chip->regmap, PV88080_REG_MASK_B, 0xFF);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to mask B reg: %d\n", ret);
+ return ret;
+ }
+ ret = regmap_write(chip->regmap, PV88080_REG_MASK_C, 0xFF);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to mask C reg: %d\n", ret);
+ return ret;
+ }
+
+ ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+ pv88080_irq_handler,
+ IRQF_TRIGGER_LOW|IRQF_ONESHOT,
+ "pv88080", chip);
+ if (ret != 0) {
+ dev_err(chip->dev, "Failed to request IRQ: %d\n",
+ i2c->irq);
+ return ret;
+ }
+
+ ret = regmap_update_bits(chip->regmap, PV88080_REG_MASK_A,
+ PV88080_M_VDD_FLT | PV88080_M_OVER_TEMP, 0);
+ if (ret < 0) {
+ dev_err(chip->dev,
+ "Failed to update mask reg: %d\n", ret);
+ return ret;
+ }
+
+ } else {
+ dev_warn(chip->dev, "No IRQ configured\n");
+ }
+
+ config.dev = chip->dev;
+ config.regmap = chip->regmap;
+
+ for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+ if (init_data)
+ config.init_data = &init_data[i];
+
+ ret = regmap_read(chip->regmap,
+ pv88080_regulator_info[i].conf2, &conf2);
+ if (ret < 0)
+ return ret;
+
+ conf2 = ((conf2 >> PV88080_BUCK_VDAC_RANGE_SHIFT) &
+ PV88080_BUCK_VDAC_RANGE_MASK);
+
+ ret = regmap_read(chip->regmap,
+ pv88080_regulator_info[i].conf5, &conf5);
+ if (ret < 0)
+ return ret;
+
+ conf5 = ((conf5 >> PV88080_BUCK_VRANGE_GAIN_SHIFT) &
+ PV88080_BUCK_VRANGE_GAIN_MASK);
+
+ pv88080_regulator_info[i].desc.min_uV =
+ pv88080_buck_vol[conf2].min_uV * (conf5+1);
+ pv88080_regulator_info[i].desc.uV_step =
+ pv88080_buck_vol[conf2].uV_step * (conf5+1);
+ pv88080_regulator_info[i].desc.n_voltages =
+ ((pv88080_buck_vol[conf2].max_uV * (conf5+1))
+ - (pv88080_regulator_info[i].desc.min_uV))
+ /(pv88080_regulator_info[i].desc.uV_step) + 1;
+
+ config.driver_data = (void *)&pv88080_regulator_info[i];
+ chip->rdev[i] = devm_regulator_register(chip->dev,
+ &pv88080_regulator_info[i].desc, &config);
+ if (IS_ERR(chip->rdev[i])) {
+ dev_err(chip->dev,
+ "Failed to register PV88080 regulator\n");
+ return PTR_ERR(chip->rdev[i]);
+ }
+ }
+
+ return 0;
+}
+
+static const struct i2c_device_id pv88080_i2c_id[] = {
+ {"pv88080", 0},
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, pv88080_i2c_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id pv88080_dt_ids[] = {
+ { .compatible = "pvs,pv88080", .data = &pv88080_i2c_id[0] },
+ {},
+};
+MODULE_DEVICE_TABLE(of, pv88080_dt_ids);
+#endif
+
+static struct i2c_driver pv88080_regulator_driver = {
+ .driver = {
+ .name = "pv88080",
+ .of_match_table = of_match_ptr(pv88080_dt_ids),
+ },
+ .probe = pv88080_i2c_probe,
+ .id_table = pv88080_i2c_id,
+};
+
+module_i2c_driver(pv88080_regulator_driver);
+
+MODULE_AUTHOR("James Ban <James.Ban.opensource@diasemi.com>");
+MODULE_DESCRIPTION("Regulator device driver for Powerventure PV88080");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/pv88080-regulator.h b/drivers/regulator/pv88080-regulator.h
new file mode 100644
index 0000000..5e9afde
--- /dev/null
+++ b/drivers/regulator/pv88080-regulator.h
@@ -0,0 +1,92 @@
+/*
+ * pv88080-regulator.h - Regulator definitions for PV88080
+ * Copyright (C) 2016 Powerventure Semiconductor Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __PV88080_REGISTERS_H__
+#define __PV88080_REGISTERS_H__
+
+/* System Control and Event Registers */
+#define PV88080_REG_EVENT_A 0x04
+#define PV88080_REG_MASK_A 0x09
+#define PV88080_REG_MASK_B 0x0a
+#define PV88080_REG_MASK_C 0x0b
+
+/* Regulator Registers */
+#define PV88080_REG_BUCK1_CONF0 0x27
+#define PV88080_REG_BUCK1_CONF1 0x28
+#define PV88080_REG_BUCK1_CONF2 0x59
+#define PV88080_REG_BUCK1_CONF5 0x5c
+#define PV88080_REG_BUCK2_CONF0 0x29
+#define PV88080_REG_BUCK2_CONF1 0x2a
+#define PV88080_REG_BUCK2_CONF2 0x61
+#define PV88080_REG_BUCK2_CONF5 0x64
+#define PV88080_REG_BUCK3_CONF0 0x2b
+#define PV88080_REG_BUCK3_CONF1 0x2c
+#define PV88080_REG_BUCK3_CONF2 0x69
+#define PV88080_REG_BUCK3_CONF5 0x6c
+
+/* PV88080_REG_EVENT_A (addr=0x04) */
+#define PV88080_E_VDD_FLT 0x01
+#define PV88080_E_OVER_TEMP 0x02
+
+/* PV88080_REG_MASK_A (addr=0x09) */
+#define PV88080_M_VDD_FLT 0x01
+#define PV88080_M_OVER_TEMP 0x02
+
+/* PV88080_REG_BUCK1_CONF0 (addr=0x27) */
+#define PV88080_BUCK1_EN 0x80
+#define PV88080_VBUCK1_MASK 0x7F
+/* PV88080_REG_BUCK2_CONF0 (addr=0x29) */
+#define PV88080_BUCK2_EN 0x80
+#define PV88080_VBUCK2_MASK 0x7F
+/* PV88080_REG_BUCK3_CONF0 (addr=0x2b) */
+#define PV88080_BUCK3_EN 0x80
+#define PV88080_VBUCK3_MASK 0x7F
+
+/* PV88080_REG_BUCK1_CONF1 (addr=0x28) */
+#define PV88080_BUCK1_ILIM_SHIFT 2
+#define PV88080_BUCK1_ILIM_MASK 0x0C
+#define PV88080_BUCK1_MODE_MASK 0x03
+
+/* PV88080_REG_BUCK2_CONF1 (addr=0x2a) */
+#define PV88080_BUCK2_ILIM_SHIFT 2
+#define PV88080_BUCK2_ILIM_MASK 0x0C
+#define PV88080_BUCK2_MODE_MASK 0x03
+
+/* PV88080_REG_BUCK3_CONF1 (addr=0x2c) */
+#define PV88080_BUCK3_ILIM_SHIFT 2
+#define PV88080_BUCK3_ILIM_MASK 0x0C
+#define PV88080_BUCK3_MODE_MASK 0x03
+
+#define PV88080_BUCK_MODE_SLEEP 0x00
+#define PV88080_BUCK_MODE_AUTO 0x01
+#define PV88080_BUCK_MODE_SYNC 0x02
+
+/* PV88080_REG_BUCK2_CONF2 (addr=0x61) */
+/* PV88080_REG_BUCK3_CONF2 (addr=0x69) */
+#define PV88080_BUCK_VDAC_RANGE_SHIFT 7
+#define PV88080_BUCK_VDAC_RANGE_MASK 0x01
+
+#define PV88080_BUCK_VDAC_RANGE_1 0x00
+#define PV88080_BUCK_VDAC_RANGE_2 0x01
+
+/* PV88080_REG_BUCK2_CONF5 (addr=0x64) */
+/* PV88080_REG_BUCK3_CONF5 (addr=0x6c) */
+#define PV88080_BUCK_VRANGE_GAIN_SHIFT 0
+#define PV88080_BUCK_VRANGE_GAIN_MASK 0x01
+
+#define PV88080_BUCK_VRANGE_GAIN_1 0x00
+#define PV88080_BUCK_VRANGE_GAIN_2 0x01
+
+#endif /* __PV88080_REGISTERS_H__ */
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index 4689d62..fafa348 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -59,18 +59,18 @@
unsigned selector)
{
struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev);
- unsigned int pwm_reg_period;
+ struct pwm_args pargs;
int dutycycle;
int ret;
- pwm_reg_period = pwm_get_period(drvdata->pwm);
+ pwm_get_args(drvdata->pwm, &pargs);
- dutycycle = (pwm_reg_period *
+ dutycycle = (pargs.period *
drvdata->duty_cycle_table[selector].dutycycle) / 100;
- ret = pwm_config(drvdata->pwm, dutycycle, pwm_reg_period);
+ ret = pwm_config(drvdata->pwm, dutycycle, pargs.period);
if (ret) {
- dev_err(&rdev->dev, "Failed to configure PWM\n");
+ dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret);
return ret;
}
@@ -113,18 +113,6 @@
return pwm_is_enabled(drvdata->pwm);
}
-/**
- * Continuous voltage call-backs
- */
-static int pwm_voltage_to_duty_cycle_percentage(struct regulator_dev *rdev, int req_uV)
-{
- int min_uV = rdev->constraints->min_uV;
- int max_uV = rdev->constraints->max_uV;
- int diff = max_uV - min_uV;
-
- return ((req_uV * 100) - (min_uV * 100)) / diff;
-}
-
static int pwm_regulator_get_voltage(struct regulator_dev *rdev)
{
struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev);
@@ -138,21 +126,42 @@
{
struct pwm_regulator_data *drvdata = rdev_get_drvdata(rdev);
unsigned int ramp_delay = rdev->constraints->ramp_delay;
- unsigned int period = pwm_get_period(drvdata->pwm);
- int duty_cycle;
+ struct pwm_args pargs;
+ unsigned int req_diff = min_uV - rdev->constraints->min_uV;
+ unsigned int diff;
+ unsigned int duty_pulse;
+ u64 req_period;
+ u32 rem;
int ret;
- duty_cycle = pwm_voltage_to_duty_cycle_percentage(rdev, min_uV);
+ pwm_get_args(drvdata->pwm, &pargs);
+ diff = rdev->constraints->max_uV - rdev->constraints->min_uV;
- ret = pwm_config(drvdata->pwm, (period / 100) * duty_cycle, period);
+ /* First try to find out if we get the iduty cycle time which is
+ * factor of PWM period time. If (request_diff_to_min * pwm_period)
+ * is perfect divided by voltage_range_diff then it is possible to
+ * get duty cycle time which is factor of PWM period. This will help
+ * to get output voltage nearer to requested value as there is no
+ * calculation loss.
+ */
+ req_period = req_diff * pargs.period;
+ div_u64_rem(req_period, diff, &rem);
+ if (!rem) {
+ do_div(req_period, diff);
+ duty_pulse = (unsigned int)req_period;
+ } else {
+ duty_pulse = (pargs.period / 100) * ((req_diff * 100) / diff);
+ }
+
+ ret = pwm_config(drvdata->pwm, duty_pulse, pargs.period);
if (ret) {
- dev_err(&rdev->dev, "Failed to configure PWM\n");
+ dev_err(&rdev->dev, "Failed to configure PWM: %d\n", ret);
return ret;
}
ret = pwm_enable(drvdata->pwm);
if (ret) {
- dev_err(&rdev->dev, "Failed to enable PWM\n");
+ dev_err(&rdev->dev, "Failed to enable PWM: %d\n", ret);
return ret;
}
drvdata->volt_uV = min_uV;
@@ -200,8 +209,7 @@
if ((length < sizeof(*duty_cycle_table)) ||
(length % sizeof(*duty_cycle_table))) {
- dev_err(&pdev->dev,
- "voltage-table length(%d) is invalid\n",
+ dev_err(&pdev->dev, "voltage-table length(%d) is invalid\n",
length);
return -EINVAL;
}
@@ -214,7 +222,7 @@
(u32 *)duty_cycle_table,
length / sizeof(u32));
if (ret) {
- dev_err(&pdev->dev, "Failed to read voltage-table\n");
+ dev_err(&pdev->dev, "Failed to read voltage-table: %d\n", ret);
return ret;
}
@@ -277,16 +285,24 @@
drvdata->pwm = devm_pwm_get(&pdev->dev, NULL);
if (IS_ERR(drvdata->pwm)) {
- dev_err(&pdev->dev, "Failed to get PWM\n");
- return PTR_ERR(drvdata->pwm);
+ ret = PTR_ERR(drvdata->pwm);
+ dev_err(&pdev->dev, "Failed to get PWM: %d\n", ret);
+ return ret;
}
+ /*
+ * FIXME: pwm_apply_args() should be removed when switching to the
+ * atomic PWM API.
+ */
+ pwm_apply_args(drvdata->pwm);
+
regulator = devm_regulator_register(&pdev->dev,
&drvdata->desc, &config);
if (IS_ERR(regulator)) {
- dev_err(&pdev->dev, "Failed to register regulator %s\n",
- drvdata->desc.name);
- return PTR_ERR(regulator);
+ ret = PTR_ERR(regulator);
+ dev_err(&pdev->dev, "Failed to register regulator %s: %d\n",
+ drvdata->desc.name, ret);
+ return ret;
}
return 0;
diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index 88a5dc8..84cce21 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c
@@ -246,6 +246,7 @@
/* Minimum voltage stepper delay for each step. */
#define SPMI_FTSMPS_STEP_DELAY 8
+#define SPMI_DEFAULT_STEP_DELAY 20
/*
* The ratio SPMI_FTSMPS_STEP_MARGIN_NUM/SPMI_FTSMPS_STEP_MARGIN_DEN is used to
@@ -254,13 +255,6 @@
#define SPMI_FTSMPS_STEP_MARGIN_NUM 4
#define SPMI_FTSMPS_STEP_MARGIN_DEN 5
-/*
- * This voltage in uV is returned by get_voltage functions when there is no way
- * to determine the current voltage level. It is needed because the regulator
- * framework treats a 0 uV voltage as an error.
- */
-#define VOLTAGE_UNKNOWN 1
-
/* VSET value to decide the range of ULT SMPS */
#define ULT_SMPS_RANGE_SPLIT 0x60
@@ -539,12 +533,12 @@
}
static int spmi_regulator_select_voltage(struct spmi_regulator *vreg,
- int min_uV, int max_uV, u8 *range_sel, u8 *voltage_sel,
- unsigned *selector)
+ int min_uV, int max_uV)
{
const struct spmi_voltage_range *range;
int uV = min_uV;
int lim_min_uV, lim_max_uV, i, range_id, range_max_uV;
+ int selector, voltage_sel;
/* Check if request voltage is outside of physically settable range. */
lim_min_uV = vreg->set_points->range[0].set_point_min_uV;
@@ -570,14 +564,13 @@
range_id = i;
range = &vreg->set_points->range[range_id];
- *range_sel = range->range_sel;
/*
* Force uV to be an allowed set point by applying a ceiling function to
* the uV value.
*/
- *voltage_sel = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
- uV = *voltage_sel * range->step_uV + range->min_uV;
+ voltage_sel = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
+ uV = voltage_sel * range->step_uV + range->min_uV;
if (uV > max_uV) {
dev_err(vreg->dev,
@@ -587,12 +580,48 @@
return -EINVAL;
}
- *selector = 0;
+ selector = 0;
for (i = 0; i < range_id; i++)
- *selector += vreg->set_points->range[i].n_voltages;
- *selector += (uV - range->set_point_min_uV) / range->step_uV;
+ selector += vreg->set_points->range[i].n_voltages;
+ selector += (uV - range->set_point_min_uV) / range->step_uV;
- return 0;
+ return selector;
+}
+
+static int spmi_sw_selector_to_hw(struct spmi_regulator *vreg,
+ unsigned selector, u8 *range_sel,
+ u8 *voltage_sel)
+{
+ const struct spmi_voltage_range *range, *end;
+
+ range = vreg->set_points->range;
+ end = range + vreg->set_points->count;
+
+ for (; range < end; range++) {
+ if (selector < range->n_voltages) {
+ *voltage_sel = selector;
+ *range_sel = range->range_sel;
+ return 0;
+ }
+
+ selector -= range->n_voltages;
+ }
+
+ return -EINVAL;
+}
+
+static int spmi_hw_selector_to_sw(struct spmi_regulator *vreg, u8 hw_sel,
+ const struct spmi_voltage_range *range)
+{
+ int sw_sel = hw_sel;
+ const struct spmi_voltage_range *r = vreg->set_points->range;
+
+ while (r != range) {
+ sw_sel += r->n_voltages;
+ r++;
+ }
+
+ return sw_sel;
}
static const struct spmi_voltage_range *
@@ -614,12 +643,11 @@
}
static int spmi_regulator_select_voltage_same_range(struct spmi_regulator *vreg,
- int min_uV, int max_uV, u8 *range_sel, u8 *voltage_sel,
- unsigned *selector)
+ int min_uV, int max_uV)
{
const struct spmi_voltage_range *range;
int uV = min_uV;
- int i;
+ int i, selector;
range = spmi_regulator_find_range(vreg);
if (!range)
@@ -637,8 +665,8 @@
* Force uV to be an allowed set point by applying a ceiling function to
* the uV value.
*/
- *voltage_sel = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
- uV = *voltage_sel * range->step_uV + range->min_uV;
+ uV = DIV_ROUND_UP(uV - range->min_uV, range->step_uV);
+ uV = uV * range->step_uV + range->min_uV;
if (uV > max_uV) {
/*
@@ -648,43 +676,49 @@
goto different_range;
}
- *selector = 0;
+ selector = 0;
for (i = 0; i < vreg->set_points->count; i++) {
if (uV >= vreg->set_points->range[i].set_point_min_uV
&& uV <= vreg->set_points->range[i].set_point_max_uV) {
- *selector +=
+ selector +=
(uV - vreg->set_points->range[i].set_point_min_uV)
/ vreg->set_points->range[i].step_uV;
break;
}
- *selector += vreg->set_points->range[i].n_voltages;
+ selector += vreg->set_points->range[i].n_voltages;
}
- if (*selector >= vreg->set_points->n_voltages)
+ if (selector >= vreg->set_points->n_voltages)
goto different_range;
- return 0;
+ return selector;
different_range:
- return spmi_regulator_select_voltage(vreg, min_uV, max_uV,
- range_sel, voltage_sel, selector);
+ return spmi_regulator_select_voltage(vreg, min_uV, max_uV);
}
-static int spmi_regulator_common_set_voltage(struct regulator_dev *rdev,
- int min_uV, int max_uV, unsigned *selector)
+static int spmi_regulator_common_map_voltage(struct regulator_dev *rdev,
+ int min_uV, int max_uV)
+{
+ struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+
+ /*
+ * Favor staying in the current voltage range if possible. This avoids
+ * voltage spikes that occur when changing the voltage range.
+ */
+ return spmi_regulator_select_voltage_same_range(vreg, min_uV, max_uV);
+}
+
+static int
+spmi_regulator_common_set_voltage(struct regulator_dev *rdev, unsigned selector)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
int ret;
u8 buf[2];
u8 range_sel, voltage_sel;
- /*
- * Favor staying in the current voltage range if possible. This avoids
- * voltage spikes that occur when changing the voltage range.
- */
- ret = spmi_regulator_select_voltage_same_range(vreg, min_uV, max_uV,
- &range_sel, &voltage_sel, selector);
+ ret = spmi_sw_selector_to_hw(vreg, selector, &range_sel, &voltage_sel);
if (ret)
return ret;
@@ -719,24 +753,24 @@
range = spmi_regulator_find_range(vreg);
if (!range)
- return VOLTAGE_UNKNOWN;
+ return -EINVAL;
- return range->step_uV * voltage_sel + range->min_uV;
+ return spmi_hw_selector_to_sw(vreg, voltage_sel, range);
+}
+
+static int spmi_regulator_single_map_voltage(struct regulator_dev *rdev,
+ int min_uV, int max_uV)
+{
+ struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
+
+ return spmi_regulator_select_voltage(vreg, min_uV, max_uV);
}
static int spmi_regulator_single_range_set_voltage(struct regulator_dev *rdev,
- int min_uV, int max_uV, unsigned *selector)
+ unsigned selector)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
- int ret;
- u8 range_sel, sel;
-
- ret = spmi_regulator_select_voltage(vreg, min_uV, max_uV, &range_sel,
- &sel, selector);
- if (ret) {
- dev_err(vreg->dev, "could not set voltage, ret=%d\n", ret);
- return ret;
- }
+ u8 sel = selector;
/*
* Certain types of regulators do not have a range select register so
@@ -748,27 +782,24 @@
static int spmi_regulator_single_range_get_voltage(struct regulator_dev *rdev)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
- const struct spmi_voltage_range *range = vreg->set_points->range;
- u8 voltage_sel;
+ u8 selector;
+ int ret;
- spmi_vreg_read(vreg, SPMI_COMMON_REG_VOLTAGE_SET, &voltage_sel, 1);
+ ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_VOLTAGE_SET, &selector, 1);
+ if (ret)
+ return ret;
- return range->step_uV * voltage_sel + range->min_uV;
+ return selector;
}
static int spmi_regulator_ult_lo_smps_set_voltage(struct regulator_dev *rdev,
- int min_uV, int max_uV, unsigned *selector)
+ unsigned selector)
{
struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
int ret;
u8 range_sel, voltage_sel;
- /*
- * Favor staying in the current voltage range if possible. This avoids
- * voltage spikes that occur when changing the voltage range.
- */
- ret = spmi_regulator_select_voltage_same_range(vreg, min_uV, max_uV,
- &range_sel, &voltage_sel, selector);
+ ret = spmi_sw_selector_to_hw(vreg, selector, &range_sel, &voltage_sel);
if (ret)
return ret;
@@ -783,7 +814,7 @@
voltage_sel |= ULT_SMPS_RANGE_SPLIT;
return spmi_vreg_update_bits(vreg, SPMI_COMMON_REG_VOLTAGE_SET,
- voltage_sel, 0xff);
+ voltage_sel, 0xff);
}
static int spmi_regulator_ult_lo_smps_get_voltage(struct regulator_dev *rdev)
@@ -796,12 +827,12 @@
range = spmi_regulator_find_range(vreg);
if (!range)
- return VOLTAGE_UNKNOWN;
+ return -EINVAL;
if (range->range_sel == 1)
voltage_sel &= ~ULT_SMPS_RANGE_SPLIT;
- return range->step_uV * voltage_sel + range->min_uV;
+ return spmi_hw_selector_to_sw(vreg, voltage_sel, range);
}
static int spmi_regulator_common_list_voltage(struct regulator_dev *rdev,
@@ -1007,8 +1038,10 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_common_set_voltage,
- .get_voltage = spmi_regulator_common_get_voltage,
+ .set_voltage_sel = spmi_regulator_common_set_voltage,
+ .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel,
+ .get_voltage_sel = spmi_regulator_common_get_voltage,
+ .map_voltage = spmi_regulator_common_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_mode = spmi_regulator_common_set_mode,
.get_mode = spmi_regulator_common_get_mode,
@@ -1020,8 +1053,9 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_common_set_voltage,
- .get_voltage = spmi_regulator_common_get_voltage,
+ .set_voltage_sel = spmi_regulator_common_set_voltage,
+ .get_voltage_sel = spmi_regulator_common_get_voltage,
+ .map_voltage = spmi_regulator_common_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_mode = spmi_regulator_common_set_mode,
.get_mode = spmi_regulator_common_get_mode,
@@ -1036,8 +1070,9 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_common_set_voltage,
- .get_voltage = spmi_regulator_common_get_voltage,
+ .set_voltage_sel = spmi_regulator_common_set_voltage,
+ .get_voltage_sel = spmi_regulator_common_get_voltage,
+ .map_voltage = spmi_regulator_common_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_bypass = spmi_regulator_common_set_bypass,
.get_bypass = spmi_regulator_common_get_bypass,
@@ -1056,8 +1091,9 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_single_range_set_voltage,
- .get_voltage = spmi_regulator_single_range_get_voltage,
+ .set_voltage_sel = spmi_regulator_single_range_set_voltage,
+ .get_voltage_sel = spmi_regulator_single_range_get_voltage,
+ .map_voltage = spmi_regulator_single_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_input_current_limit = spmi_regulator_set_ilim,
};
@@ -1066,9 +1102,10 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_common_set_voltage,
+ .set_voltage_sel = spmi_regulator_common_set_voltage,
.set_voltage_time_sel = spmi_regulator_set_voltage_time_sel,
- .get_voltage = spmi_regulator_common_get_voltage,
+ .get_voltage_sel = spmi_regulator_common_get_voltage,
+ .map_voltage = spmi_regulator_common_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_mode = spmi_regulator_common_set_mode,
.get_mode = spmi_regulator_common_get_mode,
@@ -1080,8 +1117,9 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_ult_lo_smps_set_voltage,
- .get_voltage = spmi_regulator_ult_lo_smps_get_voltage,
+ .set_voltage_sel = spmi_regulator_ult_lo_smps_set_voltage,
+ .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel,
+ .get_voltage_sel = spmi_regulator_ult_lo_smps_get_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_mode = spmi_regulator_common_set_mode,
.get_mode = spmi_regulator_common_get_mode,
@@ -1093,8 +1131,10 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_single_range_set_voltage,
- .get_voltage = spmi_regulator_single_range_get_voltage,
+ .set_voltage_sel = spmi_regulator_single_range_set_voltage,
+ .set_voltage_time_sel = spmi_regulator_set_voltage_time_sel,
+ .get_voltage_sel = spmi_regulator_single_range_get_voltage,
+ .map_voltage = spmi_regulator_single_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_mode = spmi_regulator_common_set_mode,
.get_mode = spmi_regulator_common_get_mode,
@@ -1106,8 +1146,9 @@
.enable = spmi_regulator_common_enable,
.disable = spmi_regulator_common_disable,
.is_enabled = spmi_regulator_common_is_enabled,
- .set_voltage = spmi_regulator_single_range_set_voltage,
- .get_voltage = spmi_regulator_single_range_get_voltage,
+ .set_voltage_sel = spmi_regulator_single_range_set_voltage,
+ .get_voltage_sel = spmi_regulator_single_range_get_voltage,
+ .map_voltage = spmi_regulator_single_map_voltage,
.list_voltage = spmi_regulator_common_list_voltage,
.set_mode = spmi_regulator_common_set_mode,
.get_mode = spmi_regulator_common_get_mode,
@@ -1201,7 +1242,7 @@
ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_DIG_MAJOR_REV, version,
ARRAY_SIZE(version));
if (ret) {
- dev_err(vreg->dev, "could not read version registers\n");
+ dev_dbg(vreg->dev, "could not read version registers\n");
return ret;
}
dig_major_rev = version[SPMI_COMMON_REG_DIG_MAJOR_REV
@@ -1245,11 +1286,11 @@
return 0;
}
-static int spmi_regulator_ftsmps_init_slew_rate(struct spmi_regulator *vreg)
+static int spmi_regulator_init_slew_rate(struct spmi_regulator *vreg)
{
int ret;
u8 reg = 0;
- int step, delay, slew_rate;
+ int step, delay, slew_rate, step_delay;
const struct spmi_voltage_range *range;
ret = spmi_vreg_read(vreg, SPMI_COMMON_REG_STEP_CTRL, ®, 1);
@@ -1262,6 +1303,15 @@
if (!range)
return -EINVAL;
+ switch (vreg->logical_type) {
+ case SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS:
+ step_delay = SPMI_FTSMPS_STEP_DELAY;
+ break;
+ default:
+ step_delay = SPMI_DEFAULT_STEP_DELAY;
+ break;
+ }
+
step = reg & SPMI_FTSMPS_STEP_CTRL_STEP_MASK;
step >>= SPMI_FTSMPS_STEP_CTRL_STEP_SHIFT;
@@ -1270,7 +1320,7 @@
/* slew_rate has units of uV/us */
slew_rate = SPMI_FTSMPS_CLOCK_RATE * range->step_uV * (1 << step);
- slew_rate /= 1000 * (SPMI_FTSMPS_STEP_DELAY << delay);
+ slew_rate /= 1000 * (step_delay << delay);
slew_rate *= SPMI_FTSMPS_STEP_MARGIN_NUM;
slew_rate /= SPMI_FTSMPS_STEP_MARGIN_DEN;
@@ -1411,10 +1461,16 @@
return ret;
}
- if (vreg->logical_type == SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS) {
- ret = spmi_regulator_ftsmps_init_slew_rate(vreg);
+ switch (vreg->logical_type) {
+ case SPMI_REGULATOR_LOGICAL_TYPE_FTSMPS:
+ case SPMI_REGULATOR_LOGICAL_TYPE_ULT_LO_SMPS:
+ case SPMI_REGULATOR_LOGICAL_TYPE_ULT_HO_SMPS:
+ case SPMI_REGULATOR_LOGICAL_TYPE_SMPS:
+ ret = spmi_regulator_init_slew_rate(vreg);
if (ret)
return ret;
+ default:
+ break;
}
if (vreg->logical_type != SPMI_REGULATOR_LOGICAL_TYPE_VS)
@@ -1510,10 +1566,61 @@
{ }
};
+static const struct spmi_regulator_data pm8994_regulators[] = {
+ { "s1", 0x1400, "vdd_s1", },
+ { "s2", 0x1700, "vdd_s2", },
+ { "s3", 0x1a00, "vdd_s3", },
+ { "s4", 0x1d00, "vdd_s4", },
+ { "s5", 0x2000, "vdd_s5", },
+ { "s6", 0x2300, "vdd_s6", },
+ { "s7", 0x2600, "vdd_s7", },
+ { "s8", 0x2900, "vdd_s8", },
+ { "s9", 0x2c00, "vdd_s9", },
+ { "s10", 0x2f00, "vdd_s10", },
+ { "s11", 0x3200, "vdd_s11", },
+ { "s12", 0x3500, "vdd_s12", },
+ { "l1", 0x4000, "vdd_l1", },
+ { "l2", 0x4100, "vdd_l2_l26_l28", },
+ { "l3", 0x4200, "vdd_l3_l11", },
+ { "l4", 0x4300, "vdd_l4_l27_l31", },
+ { "l5", 0x4400, "vdd_l5_l7", },
+ { "l6", 0x4500, "vdd_l6_l12_l32", },
+ { "l7", 0x4600, "vdd_l5_l7", },
+ { "l8", 0x4700, "vdd_l8_l16_l30", },
+ { "l9", 0x4800, "vdd_l9_l10_l18_l22", },
+ { "l10", 0x4900, "vdd_l9_l10_l18_l22", },
+ { "l11", 0x4a00, "vdd_l3_l11", },
+ { "l12", 0x4b00, "vdd_l6_l12_l32", },
+ { "l13", 0x4c00, "vdd_l13_l19_l23_l24", },
+ { "l14", 0x4d00, "vdd_l14_l15", },
+ { "l15", 0x4e00, "vdd_l14_l15", },
+ { "l16", 0x4f00, "vdd_l8_l16_l30", },
+ { "l17", 0x5000, "vdd_l17_l29", },
+ { "l18", 0x5100, "vdd_l9_l10_l18_l22", },
+ { "l19", 0x5200, "vdd_l13_l19_l23_l24", },
+ { "l20", 0x5300, "vdd_l20_l21", },
+ { "l21", 0x5400, "vdd_l20_l21", },
+ { "l22", 0x5500, "vdd_l9_l10_l18_l22", },
+ { "l23", 0x5600, "vdd_l13_l19_l23_l24", },
+ { "l24", 0x5700, "vdd_l13_l19_l23_l24", },
+ { "l25", 0x5800, "vdd_l25", },
+ { "l26", 0x5900, "vdd_l2_l26_l28", },
+ { "l27", 0x5a00, "vdd_l4_l27_l31", },
+ { "l28", 0x5b00, "vdd_l2_l26_l28", },
+ { "l29", 0x5c00, "vdd_l17_l29", },
+ { "l30", 0x5d00, "vdd_l8_l16_l30", },
+ { "l31", 0x5e00, "vdd_l4_l27_l31", },
+ { "l32", 0x5f00, "vdd_l6_l12_l32", },
+ { "lvs1", 0x8000, "vdd_lvs_1_2", },
+ { "lvs2", 0x8100, "vdd_lvs_1_2", },
+ { }
+};
+
static const struct of_device_id qcom_spmi_regulator_match[] = {
{ .compatible = "qcom,pm8841-regulators", .data = &pm8841_regulators },
{ .compatible = "qcom,pm8916-regulators", .data = &pm8916_regulators },
{ .compatible = "qcom,pm8941-regulators", .data = &pm8941_regulators },
+ { .compatible = "qcom,pm8994-regulators", .data = &pm8994_regulators },
{ }
};
MODULE_DEVICE_TABLE(of, qcom_spmi_regulator_match);
@@ -1573,7 +1680,7 @@
ret = spmi_regulator_match(vreg, reg->force_type);
if (ret)
- goto err;
+ continue;
config.dev = dev;
config.driver_data = vreg;
diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index d86a3dc..40d07ba0 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -55,6 +55,42 @@
/* max steps for increase voltage of Buck1/2, equal 100mv*/
#define MAX_STEPS_ONE_TIME 8
+#define RK8XX_DESC(_id, _match, _supply, _min, _max, _step, _vreg, \
+ _vmask, _ereg, _emask, _etime) \
+ [_id] = { \
+ .name = (_match), \
+ .supply_name = (_supply), \
+ .of_match = of_match_ptr(_match), \
+ .regulators_node = of_match_ptr("regulators"), \
+ .type = REGULATOR_VOLTAGE, \
+ .id = (_id), \
+ .n_voltages = (((_max) - (_min)) / (_step) + 1), \
+ .owner = THIS_MODULE, \
+ .min_uV = (_min) * 1000, \
+ .uV_step = (_step) * 1000, \
+ .vsel_reg = (_vreg), \
+ .vsel_mask = (_vmask), \
+ .enable_reg = (_ereg), \
+ .enable_mask = (_emask), \
+ .enable_time = (_etime), \
+ .ops = &rk808_reg_ops, \
+ }
+
+#define RK8XX_DESC_SWITCH(_id, _match, _supply, _ereg, _emask) \
+ [_id] = { \
+ .name = (_match), \
+ .supply_name = (_supply), \
+ .of_match = of_match_ptr(_match), \
+ .regulators_node = of_match_ptr("regulators"), \
+ .type = REGULATOR_VOLTAGE, \
+ .id = (_id), \
+ .enable_reg = (_ereg), \
+ .enable_mask = (_emask), \
+ .owner = THIS_MODULE, \
+ .ops = &rk808_switch_ops \
+ }
+
+
struct rk808_regulator_data {
struct gpio_desc *dvs_gpio[2];
};
@@ -66,27 +102,11 @@
RK808_BUCK4_CONFIG_REG,
};
-static const struct regulator_linear_range rk808_buck_voltage_ranges[] = {
- REGULATOR_LINEAR_RANGE(712500, 0, 63, 12500),
-};
-
-static const struct regulator_linear_range rk808_buck4_voltage_ranges[] = {
- REGULATOR_LINEAR_RANGE(1800000, 0, 15, 100000),
-};
-
-static const struct regulator_linear_range rk808_ldo_voltage_ranges[] = {
- REGULATOR_LINEAR_RANGE(1800000, 0, 16, 100000),
-};
-
static const struct regulator_linear_range rk808_ldo3_voltage_ranges[] = {
REGULATOR_LINEAR_RANGE(800000, 0, 13, 100000),
REGULATOR_LINEAR_RANGE(2500000, 15, 15, 0),
};
-static const struct regulator_linear_range rk808_ldo6_voltage_ranges[] = {
- REGULATOR_LINEAR_RANGE(800000, 0, 17, 100000),
-};
-
static int rk808_buck1_2_get_voltage_sel_regmap(struct regulator_dev *rdev)
{
struct rk808_regulator_data *pdata = rdev_get_drvdata(rdev);
@@ -242,6 +262,21 @@
static int rk808_set_suspend_voltage(struct regulator_dev *rdev, int uv)
{
unsigned int reg;
+ int sel = regulator_map_voltage_linear(rdev, uv, uv);
+
+ if (sel < 0)
+ return -EINVAL;
+
+ reg = rdev->desc->vsel_reg + RK808_SLP_REG_OFFSET;
+
+ return regmap_update_bits(rdev->regmap, reg,
+ rdev->desc->vsel_mask,
+ sel);
+}
+
+static int rk808_set_suspend_voltage_range(struct regulator_dev *rdev, int uv)
+{
+ unsigned int reg;
int sel = regulator_map_voltage_linear_range(rdev, uv, uv);
if (sel < 0)
@@ -277,8 +312,8 @@
}
static struct regulator_ops rk808_buck1_2_ops = {
- .list_voltage = regulator_list_voltage_linear_range,
- .map_voltage = regulator_map_voltage_linear_range,
+ .list_voltage = regulator_list_voltage_linear,
+ .map_voltage = regulator_map_voltage_linear,
.get_voltage_sel = rk808_buck1_2_get_voltage_sel_regmap,
.set_voltage_sel = rk808_buck1_2_set_voltage_sel,
.set_voltage_time_sel = rk808_buck1_2_set_voltage_time_sel,
@@ -292,6 +327,19 @@
};
static struct regulator_ops rk808_reg_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .map_voltage = regulator_map_voltage_linear,
+ .get_voltage_sel = regulator_get_voltage_sel_regmap,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .set_suspend_voltage = rk808_set_suspend_voltage,
+ .set_suspend_enable = rk808_set_suspend_enable,
+ .set_suspend_disable = rk808_set_suspend_disable,
+};
+
+static struct regulator_ops rk808_reg_ops_ranges = {
.list_voltage = regulator_list_voltage_linear_range,
.map_voltage = regulator_map_voltage_linear_range,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -299,7 +347,7 @@
.enable = regulator_enable_regmap,
.disable = regulator_disable_regmap,
.is_enabled = regulator_is_enabled_regmap,
- .set_suspend_voltage = rk808_set_suspend_voltage,
+ .set_suspend_voltage = rk808_set_suspend_voltage_range,
.set_suspend_enable = rk808_set_suspend_enable,
.set_suspend_disable = rk808_set_suspend_disable,
};
@@ -316,12 +364,14 @@
{
.name = "DCDC_REG1",
.supply_name = "vcc1",
+ .of_match = of_match_ptr("DCDC_REG1"),
+ .regulators_node = of_match_ptr("regulators"),
.id = RK808_ID_DCDC1,
.ops = &rk808_buck1_2_ops,
.type = REGULATOR_VOLTAGE,
+ .min_uV = 712500,
+ .uV_step = 12500,
.n_voltages = 64,
- .linear_ranges = rk808_buck_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_buck_voltage_ranges),
.vsel_reg = RK808_BUCK1_ON_VSEL_REG,
.vsel_mask = RK808_BUCK_VSEL_MASK,
.enable_reg = RK808_DCDC_EN_REG,
@@ -330,12 +380,14 @@
}, {
.name = "DCDC_REG2",
.supply_name = "vcc2",
+ .of_match = of_match_ptr("DCDC_REG2"),
+ .regulators_node = of_match_ptr("regulators"),
.id = RK808_ID_DCDC2,
.ops = &rk808_buck1_2_ops,
.type = REGULATOR_VOLTAGE,
+ .min_uV = 712500,
+ .uV_step = 12500,
.n_voltages = 64,
- .linear_ranges = rk808_buck_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_buck_voltage_ranges),
.vsel_reg = RK808_BUCK2_ON_VSEL_REG,
.vsel_mask = RK808_BUCK_VSEL_MASK,
.enable_reg = RK808_DCDC_EN_REG,
@@ -344,6 +396,8 @@
}, {
.name = "DCDC_REG3",
.supply_name = "vcc3",
+ .of_match = of_match_ptr("DCDC_REG3"),
+ .regulators_node = of_match_ptr("regulators"),
.id = RK808_ID_DCDC3,
.ops = &rk808_switch_ops,
.type = REGULATOR_VOLTAGE,
@@ -351,55 +405,23 @@
.enable_reg = RK808_DCDC_EN_REG,
.enable_mask = BIT(2),
.owner = THIS_MODULE,
- }, {
- .name = "DCDC_REG4",
- .supply_name = "vcc4",
- .id = RK808_ID_DCDC4,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 16,
- .linear_ranges = rk808_buck4_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_buck4_voltage_ranges),
- .vsel_reg = RK808_BUCK4_ON_VSEL_REG,
- .vsel_mask = RK808_BUCK4_VSEL_MASK,
- .enable_reg = RK808_DCDC_EN_REG,
- .enable_mask = BIT(3),
- .owner = THIS_MODULE,
- }, {
- .name = "LDO_REG1",
- .supply_name = "vcc6",
- .id = RK808_ID_LDO1,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 17,
- .linear_ranges = rk808_ldo_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
- .vsel_reg = RK808_LDO1_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(0),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
- .name = "LDO_REG2",
- .supply_name = "vcc6",
- .id = RK808_ID_LDO2,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 17,
- .linear_ranges = rk808_ldo_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
- .vsel_reg = RK808_LDO2_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(1),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
+ },
+ RK8XX_DESC(RK808_ID_DCDC4, "DCDC_REG4", "vcc4", 1800, 3300, 100,
+ RK808_BUCK4_ON_VSEL_REG, RK808_BUCK4_VSEL_MASK,
+ RK808_DCDC_EN_REG, BIT(3), 0),
+ RK8XX_DESC(RK808_ID_LDO1, "LDO_REG1", "vcc6", 1800, 3400, 100,
+ RK808_LDO1_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(0), 400),
+ RK8XX_DESC(RK808_ID_LDO2, "LDO_REG2", "vcc6", 1800, 3400, 100,
+ RK808_LDO2_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(1), 400),
+ {
.name = "LDO_REG3",
.supply_name = "vcc7",
+ .of_match = of_match_ptr("LDO_REG3"),
+ .regulators_node = of_match_ptr("regulators"),
.id = RK808_ID_LDO3,
- .ops = &rk808_reg_ops,
+ .ops = &rk808_reg_ops_ranges,
.type = REGULATOR_VOLTAGE,
.n_voltages = 16,
.linear_ranges = rk808_ldo3_voltage_ranges,
@@ -410,117 +432,26 @@
.enable_mask = BIT(2),
.enable_time = 400,
.owner = THIS_MODULE,
- }, {
- .name = "LDO_REG4",
- .supply_name = "vcc9",
- .id = RK808_ID_LDO4,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 17,
- .linear_ranges = rk808_ldo_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
- .vsel_reg = RK808_LDO4_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(3),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
- .name = "LDO_REG5",
- .supply_name = "vcc9",
- .id = RK808_ID_LDO5,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 17,
- .linear_ranges = rk808_ldo_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
- .vsel_reg = RK808_LDO5_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(4),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
- .name = "LDO_REG6",
- .supply_name = "vcc10",
- .id = RK808_ID_LDO6,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 18,
- .linear_ranges = rk808_ldo6_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo6_voltage_ranges),
- .vsel_reg = RK808_LDO6_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(5),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
- .name = "LDO_REG7",
- .supply_name = "vcc7",
- .id = RK808_ID_LDO7,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 18,
- .linear_ranges = rk808_ldo6_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo6_voltage_ranges),
- .vsel_reg = RK808_LDO7_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(6),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
- .name = "LDO_REG8",
- .supply_name = "vcc11",
- .id = RK808_ID_LDO8,
- .ops = &rk808_reg_ops,
- .type = REGULATOR_VOLTAGE,
- .n_voltages = 17,
- .linear_ranges = rk808_ldo_voltage_ranges,
- .n_linear_ranges = ARRAY_SIZE(rk808_ldo_voltage_ranges),
- .vsel_reg = RK808_LDO8_ON_VSEL_REG,
- .vsel_mask = RK808_LDO_VSEL_MASK,
- .enable_reg = RK808_LDO_EN_REG,
- .enable_mask = BIT(7),
- .enable_time = 400,
- .owner = THIS_MODULE,
- }, {
- .name = "SWITCH_REG1",
- .supply_name = "vcc8",
- .id = RK808_ID_SWITCH1,
- .ops = &rk808_switch_ops,
- .type = REGULATOR_VOLTAGE,
- .enable_reg = RK808_DCDC_EN_REG,
- .enable_mask = BIT(5),
- .owner = THIS_MODULE,
- }, {
- .name = "SWITCH_REG2",
- .supply_name = "vcc12",
- .id = RK808_ID_SWITCH2,
- .ops = &rk808_switch_ops,
- .type = REGULATOR_VOLTAGE,
- .enable_reg = RK808_DCDC_EN_REG,
- .enable_mask = BIT(6),
- .owner = THIS_MODULE,
},
-};
-
-static struct of_regulator_match rk808_reg_matches[] = {
- [RK808_ID_DCDC1] = { .name = "DCDC_REG1" },
- [RK808_ID_DCDC2] = { .name = "DCDC_REG2" },
- [RK808_ID_DCDC3] = { .name = "DCDC_REG3" },
- [RK808_ID_DCDC4] = { .name = "DCDC_REG4" },
- [RK808_ID_LDO1] = { .name = "LDO_REG1" },
- [RK808_ID_LDO2] = { .name = "LDO_REG2" },
- [RK808_ID_LDO3] = { .name = "LDO_REG3" },
- [RK808_ID_LDO4] = { .name = "LDO_REG4" },
- [RK808_ID_LDO5] = { .name = "LDO_REG5" },
- [RK808_ID_LDO6] = { .name = "LDO_REG6" },
- [RK808_ID_LDO7] = { .name = "LDO_REG7" },
- [RK808_ID_LDO8] = { .name = "LDO_REG8" },
- [RK808_ID_SWITCH1] = { .name = "SWITCH_REG1" },
- [RK808_ID_SWITCH2] = { .name = "SWITCH_REG2" },
+ RK8XX_DESC(RK808_ID_LDO4, "LDO_REG4", "vcc9", 1800, 3400, 100,
+ RK808_LDO4_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(3), 400),
+ RK8XX_DESC(RK808_ID_LDO5, "LDO_REG5", "vcc9", 1800, 3400, 100,
+ RK808_LDO5_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(4), 400),
+ RK8XX_DESC(RK808_ID_LDO6, "LDO_REG6", "vcc10", 800, 2500, 100,
+ RK808_LDO6_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(5), 400),
+ RK8XX_DESC(RK808_ID_LDO7, "LDO_REG7", "vcc7", 800, 2500, 100,
+ RK808_LDO7_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(6), 400),
+ RK8XX_DESC(RK808_ID_LDO8, "LDO_REG8", "vcc11", 1800, 3400, 100,
+ RK808_LDO8_ON_VSEL_REG, RK808_LDO_VSEL_MASK, RK808_LDO_EN_REG,
+ BIT(7), 400),
+ RK8XX_DESC_SWITCH(RK808_ID_SWITCH1, "SWITCH_REG1", "vcc8",
+ RK808_DCDC_EN_REG, BIT(5)),
+ RK8XX_DESC_SWITCH(RK808_ID_SWITCH2, "SWITCH_REG2", "vcc12",
+ RK808_DCDC_EN_REG, BIT(6)),
};
static int rk808_regulator_dt_parse_pdata(struct device *dev,
@@ -529,17 +460,12 @@
struct rk808_regulator_data *pdata)
{
struct device_node *np;
- int tmp, ret, i;
+ int tmp, ret = 0, i;
np = of_get_child_by_name(client_dev->of_node, "regulators");
if (!np)
return -ENXIO;
- ret = of_regulator_match(dev, np, rk808_reg_matches,
- RK808_NUM_REGULATORS);
- if (ret < 0)
- goto dt_parse_end;
-
for (i = 0; i < ARRAY_SIZE(pdata->dvs_gpio); i++) {
pdata->dvs_gpio[i] =
devm_gpiod_get_index_optional(client_dev, "dvs", i,
@@ -586,18 +512,12 @@
platform_set_drvdata(pdev, pdata);
+ config.dev = &client->dev;
+ config.driver_data = pdata;
+ config.regmap = rk808->regmap;
+
/* Instantiate the regulators */
for (i = 0; i < RK808_NUM_REGULATORS; i++) {
- if (!rk808_reg_matches[i].init_data ||
- !rk808_reg_matches[i].of_node)
- continue;
-
- config.dev = &client->dev;
- config.driver_data = pdata;
- config.regmap = rk808->regmap;
- config.of_node = rk808_reg_matches[i].of_node;
- config.init_data = rk808_reg_matches[i].init_data;
-
rk808_rdev = devm_regulator_register(&pdev->dev,
&rk808_reg[i], &config);
if (IS_ERR(rk808_rdev)) {
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 6dfa350..02fb6b4 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -267,6 +267,7 @@
.ops = &s2mps11_ldo_ops, \
.type = REGULATOR_VOLTAGE, \
.owner = THIS_MODULE, \
+ .ramp_delay = RAMP_DELAY_12_MVUS, \
.min_uV = MIN_800_MV, \
.uV_step = step, \
.n_voltages = S2MPS11_LDO_N_VOLTAGES, \
@@ -1237,17 +1238,7 @@
.id_table = s2mps11_pmic_id,
};
-static int __init s2mps11_pmic_init(void)
-{
- return platform_driver_register(&s2mps11_pmic_driver);
-}
-subsys_initcall(s2mps11_pmic_init);
-
-static void __exit s2mps11_pmic_exit(void)
-{
- platform_driver_unregister(&s2mps11_pmic_driver);
-}
-module_exit(s2mps11_pmic_exit);
+module_platform_driver(s2mps11_pmic_driver);
/* Module information */
MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
diff --git a/drivers/regulator/tps6524x-regulator.c b/drivers/regulator/tps6524x-regulator.c
index 9d6ea3a..67cac26 100644
--- a/drivers/regulator/tps6524x-regulator.c
+++ b/drivers/regulator/tps6524x-regulator.c
@@ -600,7 +600,7 @@
memset(hw, 0, sizeof(struct tps6524x));
hw->dev = dev;
- hw->spi = spi_dev_get(spi);
+ hw->spi = spi;
mutex_init(&hw->lock);
for (i = 0; i < N_REGULATORS; i++, info++, init_data++) {
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 955a6fb..faeb5ee 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -21,7 +21,7 @@
#include <linux/regulator/machine.h>
#include <linux/regulator/of_regulator.h>
#include <linux/i2c/twl.h>
-
+#include <linux/delay.h>
/*
* The TWL4030/TW5030/TPS659x0/TWL6030 family chips include power management, a
@@ -188,6 +188,74 @@
return grp && (val == TWL6030_CFG_STATE_ON);
}
+#define PB_I2C_BUSY BIT(0)
+#define PB_I2C_BWEN BIT(1)
+
+/* Wait until buffer empty/ready to send a word on power bus. */
+static int twl4030_wait_pb_ready(void)
+{
+
+ int ret;
+ int timeout = 10;
+ u8 val;
+
+ do {
+ ret = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &val,
+ TWL4030_PM_MASTER_PB_CFG);
+ if (ret < 0)
+ return ret;
+
+ if (!(val & PB_I2C_BUSY))
+ return 0;
+
+ mdelay(1);
+ timeout--;
+ } while (timeout);
+
+ return -ETIMEDOUT;
+}
+
+/* Send a word over the powerbus */
+static int twl4030_send_pb_msg(unsigned msg)
+{
+ u8 val;
+ int ret;
+
+ /* save powerbus configuration */
+ ret = twl_i2c_read_u8(TWL_MODULE_PM_MASTER, &val,
+ TWL4030_PM_MASTER_PB_CFG);
+ if (ret < 0)
+ return ret;
+
+ /* Enable i2c access to powerbus */
+ ret = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, val | PB_I2C_BWEN,
+ TWL4030_PM_MASTER_PB_CFG);
+ if (ret < 0)
+ return ret;
+
+ ret = twl4030_wait_pb_ready();
+ if (ret < 0)
+ return ret;
+
+ ret = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, msg >> 8,
+ TWL4030_PM_MASTER_PB_WORD_MSB);
+ if (ret < 0)
+ return ret;
+
+ ret = twl_i2c_write_u8(TWL_MODULE_PM_MASTER, msg & 0xff,
+ TWL4030_PM_MASTER_PB_WORD_LSB);
+ if (ret < 0)
+ return ret;
+
+ ret = twl4030_wait_pb_ready();
+ if (ret < 0)
+ return ret;
+
+ /* Restore powerbus configuration */
+ return twl_i2c_write_u8(TWL_MODULE_PM_MASTER, val,
+ TWL4030_PM_MASTER_PB_CFG);
+}
+
static int twl4030reg_enable(struct regulator_dev *rdev)
{
struct twlreg_info *info = rdev_get_drvdata(rdev);
@@ -303,7 +371,6 @@
{
struct twlreg_info *info = rdev_get_drvdata(rdev);
unsigned message;
- int status;
/* We can only set the mode through state machine commands... */
switch (mode) {
@@ -317,20 +384,19 @@
return -EINVAL;
}
- /* Ensure the resource is associated with some group */
- status = twlreg_grp(rdev);
- if (status < 0)
- return status;
- if (!(status & (P3_GRP_4030 | P2_GRP_4030 | P1_GRP_4030)))
- return -EACCES;
+ return twl4030_send_pb_msg(message);
+}
- status = twl_i2c_write_u8(TWL_MODULE_PM_MASTER,
- message >> 8, TWL4030_PM_MASTER_PB_WORD_MSB);
- if (status < 0)
- return status;
-
- return twl_i2c_write_u8(TWL_MODULE_PM_MASTER,
- message & 0xff, TWL4030_PM_MASTER_PB_WORD_LSB);
+static inline unsigned int twl4030reg_map_mode(unsigned int mode)
+{
+ switch (mode) {
+ case RES_STATE_ACTIVE:
+ return REGULATOR_MODE_NORMAL;
+ case RES_STATE_SLEEP:
+ return REGULATOR_MODE_STANDBY;
+ default:
+ return -EINVAL;
+ }
}
static int twl6030reg_set_mode(struct regulator_dev *rdev, unsigned mode)
@@ -835,10 +901,11 @@
#define TWL4030_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
remap_conf) \
TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
- remap_conf, TWL4030, twl4030fixed_ops)
+ remap_conf, TWL4030, twl4030fixed_ops, \
+ twl4030reg_map_mode)
#define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \
TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \
- 0x0, TWL6030, twl6030fixed_ops)
+ 0x0, TWL6030, twl6030fixed_ops, 0x0)
#define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) \
static const struct twlreg_info TWL4030_INFO_##label = { \
@@ -855,6 +922,7 @@
.type = REGULATOR_VOLTAGE, \
.owner = THIS_MODULE, \
.enable_time = turnon_delay, \
+ .of_map_mode = twl4030reg_map_mode, \
}, \
}
@@ -870,6 +938,7 @@
.type = REGULATOR_VOLTAGE, \
.owner = THIS_MODULE, \
.enable_time = turnon_delay, \
+ .of_map_mode = twl4030reg_map_mode, \
}, \
}
@@ -915,7 +984,7 @@
}
#define TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, remap_conf, \
- family, operations) \
+ family, operations, map_mode) \
static const struct twlreg_info TWLFIXED_INFO_##label = { \
.base = offset, \
.id = num, \
@@ -930,6 +999,7 @@
.owner = THIS_MODULE, \
.min_uV = mVolts * 1000, \
.enable_time = turnon_delay, \
+ .of_map_mode = map_mode, \
}, \
}
diff --git a/drivers/soc/qcom/spm.c b/drivers/soc/qcom/spm.c
index 5548a31..1fcbb22 100644
--- a/drivers/soc/qcom/spm.c
+++ b/drivers/soc/qcom/spm.c
@@ -274,7 +274,7 @@
return per_cpu(cpu_spm_drv, cpu) ? 0 : -ENXIO;
}
-static struct cpuidle_ops qcom_cpuidle_ops __initdata = {
+static const struct cpuidle_ops qcom_cpuidle_ops __initconst = {
.suspend = qcom_idle_enter,
.init = qcom_cpuidle_init,
};
diff --git a/drivers/staging/unisys/visorbus/visorchipset.c b/drivers/staging/unisys/visorbus/visorchipset.c
index 5fbda7b..9cf4f84 100644
--- a/drivers/staging/unisys/visorbus/visorchipset.c
+++ b/drivers/staging/unisys/visorbus/visorchipset.c
@@ -2425,7 +2425,7 @@
{
unsigned int eax, ebx, ecx, edx;
- if (cpu_has_hypervisor) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
/* check the ID */
cpuid(UNISYS_SPAR_LEAF_ID, &eax, &ebx, &ecx, &edx);
return (ebx == UNISYS_SPAR_ID_EBX) &&
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 3c3dc4a..d89d60c 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -338,31 +338,9 @@
hot & critical. The critical trip point default value is set by
underlying BIOS/Firmware.
-config INT340X_THERMAL
- tristate "ACPI INT340X thermal drivers"
- depends on X86 && ACPI
- select THERMAL_GOV_USER_SPACE
- select ACPI_THERMAL_REL
- select ACPI_FAN
- select INTEL_SOC_DTS_IOSF_CORE
- select THERMAL_WRITABLE_TRIPS
- help
- Newer laptops and tablets that use ACPI may have thermal sensors and
- other devices with thermal control capabilities outside the core
- CPU/SOC, for thermal safety reasons.
- They are exposed for the OS to use via the INT3400 ACPI device object
- as the master, and INT3401~INT340B ACPI device objects as the slaves.
- Enable this to expose the temperature information and cooling ability
- from these objects to userspace via the normal thermal framework.
- This means that a wide range of applications and GUI widgets can show
- the information to the user or use this information for making
- decisions. For example, the Intel Thermal Daemon can use this
- information to allow the user to select his laptop to run without
- turning on the fans.
-
-config ACPI_THERMAL_REL
- tristate
- depends on ACPI
+menu "ACPI INT340X thermal drivers"
+source drivers/thermal/int340x_thermal/Kconfig
+endmenu
config INTEL_PCH_THERMAL
tristate "Intel PCH Thermal Reporting Driver"
diff --git a/drivers/thermal/int340x_thermal/Kconfig b/drivers/thermal/int340x_thermal/Kconfig
new file mode 100644
index 0000000..0582bd1
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/Kconfig
@@ -0,0 +1,42 @@
+#
+# ACPI INT340x thermal drivers configuration
+#
+
+config INT340X_THERMAL
+ tristate "ACPI INT340X thermal drivers"
+ depends on X86 && ACPI
+ select THERMAL_GOV_USER_SPACE
+ select ACPI_THERMAL_REL
+ select ACPI_FAN
+ select INTEL_SOC_DTS_IOSF_CORE
+ help
+ Newer laptops and tablets that use ACPI may have thermal sensors and
+ other devices with thermal control capabilities outside the core
+ CPU/SOC, for thermal safety reasons.
+ They are exposed for the OS to use via the INT3400 ACPI device object
+ as the master, and INT3401~INT340B ACPI device objects as the slaves.
+ Enable this to expose the temperature information and cooling ability
+ from these objects to userspace via the normal thermal framework.
+ This means that a wide range of applications and GUI widgets can show
+ the information to the user or use this information for making
+ decisions. For example, the Intel Thermal Daemon can use this
+ information to allow the user to select his laptop to run without
+ turning on the fans.
+
+config ACPI_THERMAL_REL
+ tristate
+ depends on ACPI
+
+if INT340X_THERMAL
+
+config INT3406_THERMAL
+ tristate "ACPI INT3406 display thermal driver"
+ depends on ACPI_VIDEO
+ help
+ The display thermal device represents the LED/LCD display panel
+ that may or may not include touch support. The main function of
+ the display thermal device is to allow control of the display
+ brightness in order to address a thermal condition or to reduce
+ power consumed by display device.
+
+endif
diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
index ba77a34..df0df05 100644
--- a/drivers/thermal/int340x_thermal/Makefile
+++ b/drivers/thermal/int340x_thermal/Makefile
@@ -3,4 +3,5 @@
obj-$(CONFIG_INT340X_THERMAL) += int3402_thermal.o
obj-$(CONFIG_INT340X_THERMAL) += int3403_thermal.o
obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device.o
+obj-$(CONFIG_INT3406_THERMAL) += int3406_thermal.o
obj-$(CONFIG_ACPI_THERMAL_REL) += acpi_thermal_rel.o
diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c b/drivers/thermal/int340x_thermal/int3406_thermal.c
new file mode 100644
index 0000000..13d431c
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3406_thermal.c
@@ -0,0 +1,236 @@
+/*
+ * INT3406 thermal driver for display participant device
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/thermal.h>
+#include <acpi/video.h>
+
+#define INT3406_BRIGHTNESS_LIMITS_CHANGED 0x80
+
+struct int3406_thermal_data {
+ int upper_limit;
+ int upper_limit_index;
+ int lower_limit;
+ int lower_limit_index;
+ acpi_handle handle;
+ struct acpi_video_device_brightness *br;
+ struct backlight_device *raw_bd;
+ struct thermal_cooling_device *cooling_dev;
+};
+
+static int int3406_thermal_to_raw(int level, struct int3406_thermal_data *d)
+{
+ int max_level = d->br->levels[d->br->count - 1];
+ int raw_max = d->raw_bd->props.max_brightness;
+
+ return level * raw_max / max_level;
+}
+
+static int int3406_thermal_to_acpi(int level, struct int3406_thermal_data *d)
+{
+ int raw_max = d->raw_bd->props.max_brightness;
+ int max_level = d->br->levels[d->br->count - 1];
+
+ return level * max_level / raw_max;
+}
+
+static int
+int3406_thermal_get_max_state(struct thermal_cooling_device *cooling_dev,
+ unsigned long *state)
+{
+ struct int3406_thermal_data *d = cooling_dev->devdata;
+ int index = d->lower_limit_index ? d->lower_limit_index : 2;
+
+ *state = d->br->count - 1 - index;
+ return 0;
+}
+
+static int
+int3406_thermal_set_cur_state(struct thermal_cooling_device *cooling_dev,
+ unsigned long state)
+{
+ struct int3406_thermal_data *d = cooling_dev->devdata;
+ int level, raw_level;
+
+ if (state > d->br->count - 3)
+ return -EINVAL;
+
+ state = d->br->count - 1 - state;
+ level = d->br->levels[state];
+
+ if ((d->upper_limit && level > d->upper_limit) ||
+ (d->lower_limit && level < d->lower_limit))
+ return -EINVAL;
+
+ raw_level = int3406_thermal_to_raw(level, d);
+ return backlight_device_set_brightness(d->raw_bd, raw_level);
+}
+
+static int
+int3406_thermal_get_cur_state(struct thermal_cooling_device *cooling_dev,
+ unsigned long *state)
+{
+ struct int3406_thermal_data *d = cooling_dev->devdata;
+ int raw_level, level, i;
+ int *levels = d->br->levels;
+
+ raw_level = d->raw_bd->props.brightness;
+ level = int3406_thermal_to_acpi(raw_level, d);
+
+ /*
+ * There is no 1:1 mapping between the firmware interface level with the
+ * raw interface level, we will have to find one that is close enough.
+ */
+ for (i = 2; i < d->br->count; i++) {
+ if (level < levels[i]) {
+ if (i == 2)
+ break;
+ if ((level - levels[i - 1]) < (levels[i] - level))
+ i--;
+ break;
+ }
+ }
+
+ *state = d->br->count - 1 - i;
+ return 0;
+}
+
+static const struct thermal_cooling_device_ops video_cooling_ops = {
+ .get_max_state = int3406_thermal_get_max_state,
+ .get_cur_state = int3406_thermal_get_cur_state,
+ .set_cur_state = int3406_thermal_set_cur_state,
+};
+
+static int int3406_thermal_get_index(int *array, int nr, int value)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (array[i] == value)
+ break;
+ }
+ return i == nr ? -ENOENT : i;
+}
+
+static void int3406_thermal_get_limit(struct int3406_thermal_data *d)
+{
+ acpi_status status;
+ unsigned long long lower_limit, upper_limit;
+ int index;
+
+ status = acpi_evaluate_integer(d->handle, "DDDL", NULL, &lower_limit);
+ if (ACPI_SUCCESS(status)) {
+ index = int3406_thermal_get_index(d->br->levels, d->br->count,
+ lower_limit);
+ if (index > 0) {
+ d->lower_limit = (int)lower_limit;
+ d->lower_limit_index = index;
+ }
+ }
+
+ status = acpi_evaluate_integer(d->handle, "DDPC", NULL, &upper_limit);
+ if (ACPI_SUCCESS(status)) {
+ index = int3406_thermal_get_index(d->br->levels, d->br->count,
+ upper_limit);
+ if (index > 0) {
+ d->upper_limit = (int)upper_limit;
+ d->upper_limit_index = index;
+ }
+ }
+}
+
+static void int3406_notify(acpi_handle handle, u32 event, void *data)
+{
+ if (event == INT3406_BRIGHTNESS_LIMITS_CHANGED)
+ int3406_thermal_get_limit(data);
+}
+
+static int int3406_thermal_probe(struct platform_device *pdev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct int3406_thermal_data *d;
+ struct backlight_device *bd;
+ int ret;
+
+ if (!ACPI_HANDLE(&pdev->dev))
+ return -ENODEV;
+
+ d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+ d->handle = ACPI_HANDLE(&pdev->dev);
+
+ bd = backlight_device_get_by_type(BACKLIGHT_RAW);
+ if (!bd)
+ return -ENODEV;
+ d->raw_bd = bd;
+
+ ret = acpi_video_get_levels(ACPI_COMPANION(&pdev->dev), &d->br);
+ if (ret)
+ return ret;
+
+ int3406_thermal_get_limit(d);
+
+ d->cooling_dev = thermal_cooling_device_register(acpi_device_bid(adev),
+ d, &video_cooling_ops);
+ if (IS_ERR(d->cooling_dev))
+ goto err;
+
+ ret = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
+ int3406_notify, d);
+ if (ret)
+ goto err_cdev;
+
+ platform_set_drvdata(pdev, d);
+
+ return 0;
+
+err_cdev:
+ thermal_cooling_device_unregister(d->cooling_dev);
+err:
+ kfree(d->br);
+ return -ENODEV;
+}
+
+static int int3406_thermal_remove(struct platform_device *pdev)
+{
+ struct int3406_thermal_data *d = platform_get_drvdata(pdev);
+
+ thermal_cooling_device_unregister(d->cooling_dev);
+ kfree(d->br);
+ return 0;
+}
+
+static const struct acpi_device_id int3406_thermal_match[] = {
+ {"INT3406", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3406_thermal_match);
+
+static struct platform_driver int3406_thermal_driver = {
+ .probe = int3406_thermal_probe,
+ .remove = int3406_thermal_remove,
+ .driver = {
+ .name = "int3406 thermal",
+ .owner = THIS_MODULE,
+ .acpi_match_table = int3406_thermal_match,
+ },
+};
+
+module_platform_driver(int3406_thermal_driver);
+
+MODULE_DESCRIPTION("INT3406 Thermal driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index bddc8b1..288318a 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -164,6 +164,30 @@
return sprintf(buf, "%d\n", bd->props.brightness);
}
+int backlight_device_set_brightness(struct backlight_device *bd,
+ unsigned long brightness)
+{
+ int rc = -ENXIO;
+
+ mutex_lock(&bd->ops_lock);
+ if (bd->ops) {
+ if (brightness > bd->props.max_brightness)
+ rc = -EINVAL;
+ else {
+ pr_debug("set brightness to %lu\n", brightness);
+ bd->props.brightness = brightness;
+ backlight_update_status(bd);
+ rc = 0;
+ }
+ }
+ mutex_unlock(&bd->ops_lock);
+
+ backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
+
+ return rc;
+}
+EXPORT_SYMBOL(backlight_device_set_brightness);
+
static ssize_t brightness_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
@@ -175,24 +199,9 @@
if (rc)
return rc;
- rc = -ENXIO;
+ rc = backlight_device_set_brightness(bd, brightness);
- mutex_lock(&bd->ops_lock);
- if (bd->ops) {
- if (brightness > bd->props.max_brightness)
- rc = -EINVAL;
- else {
- pr_debug("set brightness to %lu\n", brightness);
- bd->props.brightness = brightness;
- backlight_update_status(bd);
- rc = count;
- }
- }
- mutex_unlock(&bd->ops_lock);
-
- backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
-
- return rc;
+ return rc ? rc : count;
}
static DEVICE_ATTR_RW(brightness);
@@ -380,7 +389,7 @@
}
EXPORT_SYMBOL(backlight_device_register);
-bool backlight_device_registered(enum backlight_type type)
+struct backlight_device *backlight_device_get_by_type(enum backlight_type type)
{
bool found = false;
struct backlight_device *bd;
@@ -394,9 +403,9 @@
}
mutex_unlock(&backlight_dev_list_mutex);
- return found;
+ return found ? bd : NULL;
}
-EXPORT_SYMBOL(backlight_device_registered);
+EXPORT_SYMBOL(backlight_device_get_by_type);
/**
* backlight_device_unregister - unregisters a backlight device object.
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 983280e..e5a391a 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -761,7 +761,7 @@
config FB_EFI
bool "EFI-based Framebuffer Support"
- depends on (FB = y) && X86 && EFI
+ depends on (FB = y) && !IA64 && EFI
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index 95d293b..f4c045c 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -6,16 +6,14 @@
*
*/
-#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/efi.h>
#include <linux/errno.h>
#include <linux/fb.h>
#include <linux/platform_device.h>
#include <linux/screen_info.h>
-#include <linux/dmi.h>
-#include <linux/pci.h>
#include <video/vga.h>
-#include <asm/sysfb.h>
+#include <asm/efi.h>
static bool request_mem_succeeded = false;
@@ -85,21 +83,13 @@
static int efifb_setup(char *options)
{
char *this_opt;
- int i;
if (options && *options) {
while ((this_opt = strsep(&options, ",")) != NULL) {
if (!*this_opt) continue;
- for (i = 0; i < M_UNKNOWN; i++) {
- if (efifb_dmi_list[i].base != 0 &&
- !strcmp(this_opt, efifb_dmi_list[i].optname)) {
- screen_info.lfb_base = efifb_dmi_list[i].base;
- screen_info.lfb_linelength = efifb_dmi_list[i].stride;
- screen_info.lfb_width = efifb_dmi_list[i].width;
- screen_info.lfb_height = efifb_dmi_list[i].height;
- }
- }
+ efifb_setup_from_dmi(&screen_info, this_opt);
+
if (!strncmp(this_opt, "base:", 5))
screen_info.lfb_base = simple_strtoul(this_opt+5, NULL, 0);
else if (!strncmp(this_opt, "stride:", 7))
@@ -338,5 +328,4 @@
.remove = efifb_remove,
};
-module_platform_driver(efifb_driver);
-MODULE_LICENSE("GPL");
+builtin_platform_driver(efifb_driver);
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index be7e56a..e9d2135 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -316,7 +316,6 @@
.get_next_high_mono_count = xen_efi_get_next_high_mono_count,
.reset_system = NULL, /* Functionality provided by Xen. */
.set_virtual_address_map = NULL, /* Not used under Xen. */
- .memmap = NULL, /* Not used under Xen. */
.flags = 0 /* Initialized later. */
};
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index d48e0d2..5f22e74 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -157,7 +157,7 @@
return 0;
}
-long
+static long
efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p)
{
void __user *arg = (void __user *)p;
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 553c5d2..9cb54a3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -216,8 +216,7 @@
INIT_LIST_HEAD(&efivarfs_list);
- err = efivar_init(efivarfs_callback, (void *)sb, false,
- true, &efivarfs_list);
+ err = efivar_init(efivarfs_callback, (void *)sb, true, &efivarfs_list);
if (err)
__efivar_entry_iter(efivarfs_destroy, &efivarfs_list, NULL, NULL);
diff --git a/fs/namei.c b/fs/namei.c
index 30145f8..42f8ca0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1794,30 +1794,49 @@
return hash_64(hash, 32);
}
+/*
+ * This is George Marsaglia's XORSHIFT generator.
+ * It implements a maximum-period LFSR in only a few
+ * instructions. It also has the property (required
+ * by hash_name()) that mix_hash(0) = 0.
+ */
+static inline unsigned long mix_hash(unsigned long hash)
+{
+ hash ^= hash << 13;
+ hash ^= hash >> 7;
+ hash ^= hash << 17;
+ return hash;
+}
+
#else /* 32-bit case */
#define fold_hash(x) (x)
+static inline unsigned long mix_hash(unsigned long hash)
+{
+ hash ^= hash << 13;
+ hash ^= hash >> 17;
+ hash ^= hash << 5;
+ return hash;
+}
+
#endif
unsigned int full_name_hash(const unsigned char *name, unsigned int len)
{
- unsigned long a, mask;
- unsigned long hash = 0;
+ unsigned long a, hash = 0;
for (;;) {
a = load_unaligned_zeropad(name);
if (len < sizeof(unsigned long))
break;
- hash += a;
- hash *= 9;
+ hash = mix_hash(hash + a);
name += sizeof(unsigned long);
len -= sizeof(unsigned long);
if (!len)
goto done;
}
- mask = bytemask_from_count(len);
- hash += mask & a;
+ hash += a & bytemask_from_count(len);
done:
return fold_hash(hash);
}
@@ -1835,7 +1854,7 @@
hash = a = 0;
len = -sizeof(unsigned long);
do {
- hash = (hash + a) * 9;
+ hash = mix_hash(hash + a);
len += sizeof(unsigned long);
a = load_unaligned_zeropad(name+len);
b = a ^ REPEAT_BYTE('/');
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 3a93250..4d40e9b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -87,7 +87,7 @@
.package.elements = (eles) \
}
-bool acpi_dev_present(const char *hid);
+bool acpi_dev_found(const char *hid);
#ifdef CONFIG_ACPI
diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h
index 29c6912..797ae2e 100644
--- a/include/acpi/acpi_drivers.h
+++ b/include/acpi/acpi_drivers.h
@@ -78,7 +78,6 @@
/* ACPI PCI Interrupt Link (pci_link.c) */
-int acpi_irq_penalty_init(void);
int acpi_pci_link_allocate_irq(acpi_handle handle, int index, int *triggering,
int *polarity, char **name);
int acpi_pci_link_free_irq(acpi_handle handle);
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index d1e34d1..562603d 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -96,7 +96,7 @@
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_predefined_override
acpi_status
acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
- char **new_val);
+ acpi_string *new_val);
#endif
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_table_override
@@ -108,7 +108,7 @@
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_physical_table_override
acpi_status
acpi_os_physical_table_override(struct acpi_table_header *existing_table,
- acpi_physical_address * new_address,
+ acpi_physical_address *new_address,
u32 *new_table_length);
#endif
@@ -203,7 +203,7 @@
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_physical_address
acpi_status
acpi_os_get_physical_address(void *logical_address,
- acpi_physical_address * physical_address);
+ acpi_physical_address *physical_address);
#endif
/*
@@ -379,14 +379,14 @@
acpi_os_get_table_by_name(char *signature,
u32 instance,
struct acpi_table_header **table,
- acpi_physical_address * address);
+ acpi_physical_address *address);
#endif
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_table_by_index
acpi_status
acpi_os_get_table_by_index(u32 index,
struct acpi_table_header **table,
- u32 *instance, acpi_physical_address * address);
+ u32 *instance, acpi_physical_address *address);
#endif
#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_table_by_address
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 1755697..4e4c214 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20160108
+#define ACPI_CA_VERSION 0x20160422
#include <acpi/acconfig.h>
#include <acpi/actypes.h>
@@ -192,7 +192,7 @@
/*
* Optionally support group module level code.
*/
-ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, TRUE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, FALSE);
/*
* Optionally use 32-bit FADT addresses if and when there is a conflict
@@ -484,8 +484,8 @@
ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_reallocate_root_table(void))
ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
- acpi_find_root_pointer(acpi_physical_address *
- rsdp_address))
+ acpi_find_root_pointer(acpi_physical_address
+ *rsdp_address))
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_get_table_header(acpi_string signature,
u32 instance,
@@ -530,7 +530,7 @@
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_get_handle(acpi_handle parent,
acpi_string pathname,
- acpi_handle * ret_handle))
+ acpi_handle *ret_handle))
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_attach_data(acpi_handle object,
acpi_object_handler handler,
@@ -575,15 +575,15 @@
acpi_get_next_object(acpi_object_type type,
acpi_handle parent,
acpi_handle child,
- acpi_handle * out_handle))
+ acpi_handle *out_handle))
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_get_type(acpi_handle object,
- acpi_object_type * out_type))
+ acpi_object_type *out_type))
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_get_parent(acpi_handle object,
- acpi_handle * out_handle))
+ acpi_handle *out_handle))
/*
* Handler interfaces
@@ -755,7 +755,7 @@
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
acpi_get_gpe_device(u32 gpe_index,
- acpi_handle * gpe_device))
+ acpi_handle *gpe_device))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
acpi_install_gpe_block(acpi_handle gpe_device,
@@ -771,8 +771,8 @@
* Resource interfaces
*/
typedef
-acpi_status(*acpi_walk_resource_callback) (struct acpi_resource * resource,
- void *context);
+acpi_status (*acpi_walk_resource_callback) (struct acpi_resource * resource,
+ void *context);
ACPI_EXTERNAL_RETURN_STATUS(acpi_status
acpi_get_vendor_resource(acpi_handle device,
@@ -938,7 +938,8 @@
ACPI_APP_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(1)
void ACPI_INTERNAL_VAR_XFACE
acpi_log_error(const char *format, ...))
- acpi_status acpi_initialize_debugger(void);
+
+acpi_status acpi_initialize_debugger(void);
void acpi_terminate_debugger(void);
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index cf2acb8..16c1892 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h
@@ -417,6 +417,7 @@
u8 type; \
u8 producer_consumer; /* For values, see Producer/Consumer above */\
u8 slave_mode; \
+ u8 connection_sharing; \
u8 type_revision_id; \
u16 type_data_length; \
u16 vendor_length; \
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 0cb1a00..c19700e 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -223,7 +223,7 @@
/*******************************************************************************
*
* FADT - Fixed ACPI Description Table (Signature "FACP")
- * Version 4
+ * Version 6
*
******************************************************************************/
@@ -413,4 +413,6 @@
#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id))
#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt))
+#define ACPI_FADT_CONFORMANCE "ACPI 6.1 (FADT version 6)"
+
#endif /* __ACTBL_H__ */
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 16e0136..796d6ba 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -236,7 +236,8 @@
ACPI_EINJ_CHECK_BUSY_STATUS = 6,
ACPI_EINJ_GET_COMMAND_STATUS = 7,
ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
- ACPI_EINJ_ACTION_RESERVED = 9, /* 9 and greater are reserved */
+ ACPI_EINJ_GET_EXECUTE_TIMINGS = 9,
+ ACPI_EINJ_ACTION_RESERVED = 10, /* 10 and greater are reserved */
ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
};
@@ -348,7 +349,8 @@
ACPI_ERST_GET_ERROR_RANGE = 13,
ACPI_ERST_GET_ERROR_LENGTH = 14,
ACPI_ERST_GET_ERROR_ATTRIBUTES = 15,
- ACPI_ERST_ACTION_RESERVED = 16 /* 16 and greater are reserved */
+ ACPI_ERST_EXECUTE_TIMINGS = 16,
+ ACPI_ERST_ACTION_RESERVED = 17 /* 17 and greater are reserved */
};
/* Values for Instruction field above */
@@ -427,7 +429,8 @@
ACPI_HEST_TYPE_AER_ENDPOINT = 7,
ACPI_HEST_TYPE_AER_BRIDGE = 8,
ACPI_HEST_TYPE_GENERIC_ERROR = 9,
- ACPI_HEST_TYPE_RESERVED = 10 /* 10 and greater are reserved */
+ ACPI_HEST_TYPE_GENERIC_ERROR_V2 = 10,
+ ACPI_HEST_TYPE_RESERVED = 11 /* 11 and greater are reserved */
};
/*
@@ -506,7 +509,11 @@
ACPI_HEST_NOTIFY_NMI = 4,
ACPI_HEST_NOTIFY_CMCI = 5, /* ACPI 5.0 */
ACPI_HEST_NOTIFY_MCE = 6, /* ACPI 5.0 */
- ACPI_HEST_NOTIFY_RESERVED = 7 /* 7 and greater are reserved */
+ ACPI_HEST_NOTIFY_GPIO = 7, /* ACPI 6.0 */
+ ACPI_HEST_NOTIFY_SEA = 8, /* ACPI 6.1 */
+ ACPI_HEST_NOTIFY_SEI = 9, /* ACPI 6.1 */
+ ACPI_HEST_NOTIFY_GSIV = 10, /* ACPI 6.1 */
+ ACPI_HEST_NOTIFY_RESERVED = 11 /* 11 and greater are reserved */
};
/* Values for config_write_enable bitfield above */
@@ -603,6 +610,24 @@
u32 error_block_length;
};
+/* 10: Generic Hardware Error Source, version 2 */
+
+struct acpi_hest_generic_v2 {
+ struct acpi_hest_header header;
+ u16 related_source_id;
+ u8 reserved;
+ u8 enabled;
+ u32 records_to_preallocate;
+ u32 max_sections_per_record;
+ u32 max_raw_data_length;
+ struct acpi_generic_address error_status_address;
+ struct acpi_hest_notify notify;
+ u32 error_block_length;
+ struct acpi_generic_address read_ack_register;
+ u64 read_ack_preserve;
+ u64 read_ack_write;
+};
+
/* Generic Error Status block */
struct acpi_hest_generic_status {
@@ -634,6 +659,33 @@
u8 fru_text[20];
};
+/* Extension for revision 0x0300 */
+
+struct acpi_hest_generic_data_v300 {
+ u8 section_type[16];
+ u32 error_severity;
+ u16 revision;
+ u8 validation_bits;
+ u8 flags;
+ u32 error_data_length;
+ u8 fru_id[16];
+ u8 fru_text[20];
+ u64 time_stamp;
+};
+
+/* Values for error_severity above */
+
+#define ACPI_HEST_GEN_ERROR_RECOVERABLE 0
+#define ACPI_HEST_GEN_ERROR_FATAL 1
+#define ACPI_HEST_GEN_ERROR_CORRECTED 2
+#define ACPI_HEST_GEN_ERROR_NONE 3
+
+/* Flags for validation_bits above */
+
+#define ACPI_HEST_GEN_VALID_FRU_ID (1)
+#define ACPI_HEST_GEN_VALID_FRU_STRING (1<<1)
+#define ACPI_HEST_GEN_VALID_TIMESTAMP (1<<2)
+
/*******************************************************************************
*
* MADT - Multiple APIC Description Table
@@ -934,7 +986,7 @@
/*******************************************************************************
*
- * NFIT - NVDIMM Interface Table (ACPI 6.0)
+ * NFIT - NVDIMM Interface Table (ACPI 6.0+)
* Version 1
*
******************************************************************************/
@@ -1015,6 +1067,7 @@
#define ACPI_NFIT_MEM_NOT_ARMED (1<<3) /* 03: Memory Device is not armed */
#define ACPI_NFIT_MEM_HEALTH_OBSERVED (1<<4) /* 04: Memory Device observed SMART/health events */
#define ACPI_NFIT_MEM_HEALTH_ENABLED (1<<5) /* 05: SMART/health events enabled */
+#define ACPI_NFIT_MEM_MAP_FAILED (1<<6) /* 06: Mapping to SPA failed */
/* 2: Interleave Structure */
@@ -1046,7 +1099,10 @@
u16 subsystem_vendor_id;
u16 subsystem_device_id;
u16 subsystem_revision_id;
- u8 reserved[6]; /* Reserved, must be zero */
+ u8 valid_fields;
+ u8 manufacturing_location;
+ u16 manufacturing_date;
+ u8 reserved[2]; /* Reserved, must be zero */
u32 serial_number;
u16 code;
u16 windows;
@@ -1061,7 +1117,11 @@
/* Flags */
-#define ACPI_NFIT_CONTROL_BUFFERED (1) /* Block Data Windows implementation is buffered */
+#define ACPI_NFIT_CONTROL_BUFFERED (1) /* Block Data Windows implementation is buffered */
+
+/* valid_fields bits */
+
+#define ACPI_NFIT_CONTROL_MFG_INFO_VALID (1) /* Manufacturing fields are valid */
/* 5: NVDIMM Block Data Window Region Structure */
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index a4ef625..c93dbad 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -321,7 +321,7 @@
* DBG2 - Debug Port Table 2
* Version 0 (Both main table and subtables)
*
- * Conforms to "Microsoft Debug Port Table 2 (DBG2)", May 22 2012.
+ * Conforms to "Microsoft Debug Port Table 2 (DBG2)", December 10, 2015
*
******************************************************************************/
@@ -371,6 +371,11 @@
#define ACPI_DBG2_16550_COMPATIBLE 0x0000
#define ACPI_DBG2_16550_SUBSET 0x0001
+#define ACPI_DBG2_ARM_PL011 0x0003
+#define ACPI_DBG2_ARM_SBSA_32BIT 0x000D
+#define ACPI_DBG2_ARM_SBSA_GENERIC 0x000E
+#define ACPI_DBG2_ARM_DCC 0x000F
+#define ACPI_DBG2_BCM2835 0x0010
#define ACPI_DBG2_1394_STANDARD 0x0000
@@ -399,7 +404,7 @@
* Version 1
*
* Conforms to "Intel Virtualization Technology for Directed I/O",
- * Version 2.2, Sept. 2013
+ * Version 2.3, October 2014
*
******************************************************************************/
@@ -413,6 +418,8 @@
/* Masks for Flags field above */
#define ACPI_DMAR_INTR_REMAP (1)
+#define ACPI_DMAR_X2APIC_OPT_OUT (1<<1)
+#define ACPI_DMAR_X2APIC_MODE (1<<2)
/* DMAR subtable header */
@@ -655,7 +662,7 @@
* IORT - IO Remapping Table
*
* Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049A, 2015
+ * Document number: ARM DEN 0049B, October 2015
*
******************************************************************************/
@@ -685,7 +692,8 @@
ACPI_IORT_NODE_ITS_GROUP = 0x00,
ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
- ACPI_IORT_NODE_SMMU = 0x03
+ ACPI_IORT_NODE_SMMU = 0x03,
+ ACPI_IORT_NODE_SMMU_V3 = 0x04
};
struct acpi_iort_id_mapping {
@@ -775,6 +783,23 @@
#define ACPI_IORT_SMMU_DVM_SUPPORTED (1)
#define ACPI_IORT_SMMU_COHERENT_WALK (1<<1)
+struct acpi_iort_smmu_v3 {
+ u64 base_address; /* SMMUv3 base address */
+ u32 flags;
+ u32 reserved;
+ u64 vatos_address;
+ u32 model; /* O: generic SMMUv3 */
+ u32 event_gsiv;
+ u32 pri_gsiv;
+ u32 gerr_gsiv;
+ u32 sync_gsiv;
+};
+
+/* Masks for Flags field above */
+
+#define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE (1)
+#define ACPI_IORT_SMMU_V3_HTTU_OVERRIDE (1<<1)
+
/*******************************************************************************
*
* IVRS - I/O Virtualization Reporting Structure
@@ -1102,10 +1127,10 @@
/*******************************************************************************
*
* SPCR - Serial Port Console Redirection table
- * Version 1
+ * Version 2
*
* Conforms to "Serial Port Console Redirection Table",
- * Version 1.00, January 11, 2002
+ * Version 1.03, August 10, 2015
*
******************************************************************************/
@@ -1137,6 +1162,8 @@
#define ACPI_SPCR_DO_NOT_DISABLE (1)
+/* Values for Interface Type: See the definition of the DBG2 table */
+
/*******************************************************************************
*
* SPMI - Server Platform Management Interface table
diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index ddf5e66..ebc1f4f 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -184,7 +184,7 @@
struct acpi_table_header header; /* Common ACPI table header */
};
-/* FPDT subtable header */
+/* FPDT subtable header (Performance Record Structure) */
struct acpi_fpdt_header {
u16 type;
@@ -205,6 +205,57 @@
/* 0: Firmware Basic Boot Performance Record */
+struct acpi_fpdt_boot_pointer {
+ struct acpi_fpdt_header header;
+ u8 reserved[4];
+ u64 address;
+};
+
+/* 1: S3 Performance Table Pointer Record */
+
+struct acpi_fpdt_s3pt_pointer {
+ struct acpi_fpdt_header header;
+ u8 reserved[4];
+ u64 address;
+};
+
+/*
+ * S3PT - S3 Performance Table. This table is pointed to by the
+ * S3 Pointer Record above.
+ */
+struct acpi_table_s3pt {
+ u8 signature[4]; /* "S3PT" */
+ u32 length;
+};
+
+/*
+ * S3PT Subtables (Not part of the actual FPDT)
+ */
+
+/* Values for Type field in S3PT header */
+
+enum acpi_s3pt_type {
+ ACPI_S3PT_TYPE_RESUME = 0,
+ ACPI_S3PT_TYPE_SUSPEND = 1,
+ ACPI_FPDT_BOOT_PERFORMANCE = 2
+};
+
+struct acpi_s3pt_resume {
+ struct acpi_fpdt_header header;
+ u32 resume_count;
+ u64 full_resume;
+ u64 average_resume;
+};
+
+struct acpi_s3pt_suspend {
+ struct acpi_fpdt_header header;
+ u64 suspend_start;
+ u64 suspend_end;
+};
+
+/*
+ * FPDT Boot Performance Record (Not part of the actual FPDT)
+ */
struct acpi_fpdt_boot {
struct acpi_fpdt_header header;
u8 reserved[4];
@@ -215,52 +266,6 @@
u64 exit_services_exit;
};
-/* 1: S3 Performance Table Pointer Record */
-
-struct acpi_fpdt_s3pt_ptr {
- struct acpi_fpdt_header header;
- u8 reserved[4];
- u64 address;
-};
-
-/*
- * S3PT - S3 Performance Table. This table is pointed to by the
- * FPDT S3 Pointer Record above.
- */
-struct acpi_table_s3pt {
- u8 signature[4]; /* "S3PT" */
- u32 length;
-};
-
-/*
- * S3PT Subtables
- */
-struct acpi_s3pt_header {
- u16 type;
- u8 length;
- u8 revision;
-};
-
-/* Values for Type field above */
-
-enum acpi_s3pt_type {
- ACPI_S3PT_TYPE_RESUME = 0,
- ACPI_S3PT_TYPE_SUSPEND = 1
-};
-
-struct acpi_s3pt_resume {
- struct acpi_s3pt_header header;
- u32 resume_count;
- u64 full_resume;
- u64 average_resume;
-};
-
-struct acpi_s3pt_suspend {
- struct acpi_s3pt_header header;
- u64 suspend_start;
- u64 suspend_end;
-};
-
/*******************************************************************************
*
* GTDT - Generic Timer Description Table (ACPI 5.1)
@@ -476,7 +481,8 @@
enum acpi_pcct_type {
ACPI_PCCT_TYPE_GENERIC_SUBSPACE = 0,
ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE = 1,
- ACPI_PCCT_TYPE_RESERVED = 2 /* 2 and greater are reserved */
+ ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2 = 2, /* ACPI 6.1 */
+ ACPI_PCCT_TYPE_RESERVED = 3 /* 3 and greater are reserved */
};
/*
@@ -515,6 +521,26 @@
u16 min_turnaround_time;
};
+/* 2: HW-reduced Communications Subspace Type 2 (ACPI 6.1) */
+
+struct acpi_pcct_hw_reduced_type2 {
+ struct acpi_subtable_header header;
+ u32 doorbell_interrupt;
+ u8 flags;
+ u8 reserved;
+ u64 base_address;
+ u64 length;
+ struct acpi_generic_address doorbell_register;
+ u64 preserve_mask;
+ u64 write_mask;
+ u32 latency;
+ u32 max_access_rate;
+ u16 min_turnaround_time;
+ struct acpi_generic_address doorbell_ack_register;
+ u64 ack_preserve_mask;
+ u64 ack_write_mask;
+};
+
/* Values for doorbell flags above */
#define ACPI_PCCT_INTERRUPT_POLARITY (1)
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index db46546..cb389ef 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -630,7 +630,8 @@
#define ACPI_NOTIFY_SHUTDOWN_REQUEST (u8) 0x0C
#define ACPI_NOTIFY_AFFINITY_UPDATE (u8) 0x0D
-#define ACPI_NOTIFY_MAX 0x0D
+#define ACPI_GENERIC_NOTIFY_MAX 0x0D
+#define ACPI_SPECIFIC_NOTIFY_MAX 0x84
/*
* Types associated with ACPI names and objects. The first group of
@@ -892,7 +893,7 @@
/* Sleep function dispatch */
-typedef acpi_status(*acpi_sleep_function) (u8 sleep_state);
+typedef acpi_status (*acpi_sleep_function) (u8 sleep_state);
struct acpi_sleep_functions {
acpi_sleep_function legacy_function;
@@ -994,7 +995,7 @@
* Predefined Namespace items
*/
struct acpi_predefined_names {
- char *name;
+ const char *name;
u8 type;
char *val;
};
@@ -1071,20 +1072,21 @@
typedef
void (*acpi_object_handler) (acpi_handle object, void *data);
-typedef acpi_status(*acpi_init_handler) (acpi_handle object, u32 function);
+typedef
+acpi_status (*acpi_init_handler) (acpi_handle object, u32 function);
#define ACPI_INIT_DEVICE_INI 1
typedef
-acpi_status(*acpi_exception_handler) (acpi_status aml_status,
- acpi_name name,
- u16 opcode,
- u32 aml_offset, void *context);
+acpi_status (*acpi_exception_handler) (acpi_status aml_status,
+ acpi_name name,
+ u16 opcode,
+ u32 aml_offset, void *context);
/* Table Event handler (Load, load_table, etc.) and types */
typedef
-acpi_status(*acpi_table_handler) (u32 event, void *table, void *context);
+acpi_status (*acpi_table_handler) (u32 event, void *table, void *context);
#define ACPI_TABLE_LOAD 0x0
#define ACPI_TABLE_UNLOAD 0x1
@@ -1093,12 +1095,12 @@
/* Address Spaces (For Operation Regions) */
typedef
-acpi_status(*acpi_adr_space_handler) (u32 function,
- acpi_physical_address address,
- u32 bit_width,
- u64 *value,
- void *handler_context,
- void *region_context);
+acpi_status (*acpi_adr_space_handler) (u32 function,
+ acpi_physical_address address,
+ u32 bit_width,
+ u64 *value,
+ void *handler_context,
+ void *region_context);
#define ACPI_DEFAULT_HANDLER NULL
@@ -1111,18 +1113,18 @@
};
typedef
-acpi_status(*acpi_adr_space_setup) (acpi_handle region_handle,
- u32 function,
- void *handler_context,
- void **region_context);
+acpi_status (*acpi_adr_space_setup) (acpi_handle region_handle,
+ u32 function,
+ void *handler_context,
+ void **region_context);
#define ACPI_REGION_ACTIVATE 0
#define ACPI_REGION_DEACTIVATE 1
typedef
-acpi_status(*acpi_walk_callback) (acpi_handle object,
- u32 nesting_level,
- void *context, void **return_value);
+acpi_status (*acpi_walk_callback) (acpi_handle object,
+ u32 nesting_level,
+ void *context, void **return_value);
typedef
u32 (*acpi_interface_handler) (acpi_string interface_name, u32 supported);
@@ -1227,7 +1229,7 @@
* struct acpi_memory_list is used only if the ACPICA local cache is enabled
*/
struct acpi_memory_list {
- char *list_name;
+ const char *list_name;
void *list_head;
u16 object_size;
u16 max_depth;
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 7c0595b..86b5a84 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -66,17 +66,28 @@
*
*****************************************************************************/
+/* Common application configuration. All single threaded except for acpi_exec. */
+
+#if (defined ACPI_ASL_COMPILER) || \
+ (defined ACPI_BIN_APP) || \
+ (defined ACPI_DUMP_APP) || \
+ (defined ACPI_HELP_APP) || \
+ (defined ACPI_NAMES_APP) || \
+ (defined ACPI_SRC_APP) || \
+ (defined ACPI_XTRACT_APP) || \
+ (defined ACPI_EXAMPLE_APP)
+#define ACPI_APPLICATION
+#define ACPI_SINGLE_THREADED
+#endif
+
/* iASL configuration */
#ifdef ACPI_ASL_COMPILER
-#define ACPI_APPLICATION
#define ACPI_DEBUG_OUTPUT
#define ACPI_CONSTANT_EVAL_ONLY
#define ACPI_LARGE_NAMESPACE_NODE
#define ACPI_DATA_TABLE_DISASSEMBLY
-#define ACPI_SINGLE_THREADED
#define ACPI_32BIT_PHYSICAL_ADDRESS
-
#define ACPI_DISASSEMBLER 1
#endif
@@ -89,21 +100,6 @@
#define ACPI_DBG_TRACK_ALLOCATIONS
#endif
-/*
- * acpi_bin/acpi_dump/acpi_help/acpi_names/acpi_src/acpi_xtract/Example
- * configuration. All single threaded.
- */
-#if (defined ACPI_BIN_APP) || \
- (defined ACPI_DUMP_APP) || \
- (defined ACPI_HELP_APP) || \
- (defined ACPI_NAMES_APP) || \
- (defined ACPI_SRC_APP) || \
- (defined ACPI_XTRACT_APP) || \
- (defined ACPI_EXAMPLE_APP)
-#define ACPI_APPLICATION
-#define ACPI_SINGLE_THREADED
-#endif
-
/* acpi_help configuration. Error messages disabled. */
#ifdef ACPI_HELP_APP
@@ -138,11 +134,16 @@
#define ACPI_REDUCED_HARDWARE 1
#endif
-/* Linkable ACPICA library */
+/* Linkable ACPICA library. Two versions, one with full debug. */
#ifdef ACPI_LIBRARY
#define ACPI_USE_LOCAL_CACHE
-#define ACPI_FULL_DEBUG
+#define ACPI_DEBUGGER 1
+#define ACPI_DISASSEMBLER 1
+
+#ifdef _DEBUG
+#define ACPI_DEBUG_OUTPUT
+#endif
#endif
/* Common for all ACPICA applications */
@@ -218,6 +219,9 @@
#elif defined(__HAIKU__)
#include "achaiku.h"
+#elif defined(__QNX__)
+#include "acqnx.h"
+
#else
/* Unknown environment */
diff --git a/include/acpi/platform/acmsvcex.h b/include/acpi/platform/acmsvcex.h
deleted file mode 100644
index 28084a1..0000000
--- a/include/acpi/platform/acmsvcex.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/******************************************************************************
- *
- * Name: acmsvcex.h - Extra VC specific defines, etc.
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2016, Intel Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACMSVCEX_H__
-#define __ACMSVCEX_H__
-
-/* Debug support. */
-
-#ifdef _DEBUG
-#define _CRTDBG_MAP_ALLOC /* Enables specific file/lineno for leaks */
-#include <crtdbg.h>
-#endif
-
-#endif /* __ACMSVCEX_H__ */
diff --git a/include/acpi/platform/acwinex.h b/include/acpi/platform/acwinex.h
deleted file mode 100644
index a00b3e4..0000000
--- a/include/acpi/platform/acwinex.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/******************************************************************************
- *
- * Name: acwinex.h - Extra OS specific defines, etc.
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2016, Intel Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions, and the following disclaimer,
- * without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- * substantially similar to the "NO WARRANTY" disclaimer below
- * ("Disclaimer") and any redistribution must be conditioned upon
- * including a substantially similar Disclaimer requirement for further
- * binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- * of any contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#ifndef __ACWINEX_H__
-#define __ACWINEX_H__
-
-/* Windows uses VC */
-
-#endif /* __ACWINEX_H__ */
diff --git a/include/acpi/video.h b/include/acpi/video.h
index 5ca2f2c..70a41f7 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -4,6 +4,19 @@
#include <linux/errno.h> /* for ENODEV */
#include <linux/types.h> /* for bool */
+struct acpi_video_brightness_flags {
+ u8 _BCL_no_ac_battery_levels:1; /* no AC/Battery levels in _BCL */
+ u8 _BCL_reversed:1; /* _BCL package is in a reversed order */
+ u8 _BQC_use_index:1; /* _BQC returns an index value */
+};
+
+struct acpi_video_device_brightness {
+ int curr;
+ int count;
+ int *levels;
+ struct acpi_video_brightness_flags flags;
+};
+
struct acpi_device;
#define ACPI_VIDEO_CLASS "video"
@@ -37,6 +50,8 @@
* may change over time and should not be cached.
*/
extern bool acpi_video_handles_brightness_key_presses(void);
+extern int acpi_video_get_levels(struct acpi_device *device,
+ struct acpi_video_device_brightness **dev_br);
#else
static inline int acpi_video_register(void) { return 0; }
static inline void acpi_video_unregister(void) { return; }
@@ -56,6 +71,11 @@
{
return false;
}
+static inline int acpi_video_get_levels(struct acpi_device *device,
+ struct acpi_video_device_brightness **dev_br)
+{
+ return -ENODEV;
+}
#endif
#endif
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index d6d5dc9..3fc94a0 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -53,7 +53,7 @@
/*
* lock for writing
*/
-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+static inline void __down_write(struct rw_semaphore *sem)
{
long tmp;
@@ -63,9 +63,16 @@
rwsem_down_write_failed(sem);
}
-static inline void __down_write(struct rw_semaphore *sem)
+static inline int __down_write_killable(struct rw_semaphore *sem)
{
- __down_write_nested(sem, 0);
+ long tmp;
+
+ tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
+ (atomic_long_t *)&sem->count);
+ if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+ if (IS_ERR(rwsem_down_write_failed_killable(sem)))
+ return -EINTR;
+ return 0;
}
static inline int __down_write_trylock(struct rw_semaphore *sem)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 339125b..6a67ab9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -245,7 +245,9 @@
#define INIT_TASK_DATA(align) \
. = ALIGN(align); \
- *(.data..init_task)
+ VMLINUX_SYMBOL(__start_init_task) = .; \
+ *(.data..init_task) \
+ VMLINUX_SYMBOL(__end_init_task) = .;
/*
* Read only Data
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 06ed7e5..288fac5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -190,14 +190,6 @@
}
#endif
-#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE
-void acpi_initrd_override(void *data, size_t size);
-#else
-static inline void acpi_initrd_override(void *data, size_t size)
-{
-}
-#endif
-
#define BAD_MADT_ENTRY(entry, end) ( \
(!entry) || (unsigned long)entry + sizeof(*entry) > end || \
((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
@@ -216,6 +208,7 @@
int acpi_mps_check (void);
int acpi_numa_init (void);
+void early_acpi_table_init(void *data, size_t size);
int acpi_table_init (void);
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
int __init acpi_parse_entries(char *id, unsigned long table_size,
@@ -278,6 +271,7 @@
extern u32 acpi_irq_handled;
extern u32 acpi_irq_not_handled;
extern unsigned int acpi_sci_irq;
+extern bool acpi_no_s5;
#define INVALID_ACPI_IRQ ((unsigned)-1)
static inline bool acpi_sci_irq_valid(void)
{
@@ -311,7 +305,6 @@
int acpi_pci_irq_enable (struct pci_dev *dev);
void acpi_penalize_isa_irq(int irq, int active);
bool acpi_isa_irq_available(int irq);
-void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
void acpi_pci_irq_disable (struct pci_dev *dev);
extern int ec_read(u8 addr, u8 *val);
@@ -359,7 +352,6 @@
extern char acpi_video_backlight_string[];
extern long acpi_is_video_device(acpi_handle handle);
extern int acpi_blacklisted(void);
-extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
extern void acpi_osi_setup(char *str);
extern bool acpi_osi_is_win8(void);
@@ -596,6 +588,7 @@
return NULL;
}
+static inline void early_acpi_table_init(void *data, size_t size) { }
static inline void acpi_early_init(void) { }
static inline void acpi_subsystem_init(void) { }
diff --git a/include/linux/apple-gmux.h b/include/linux/apple-gmux.h
index b2d32e0..714186d 100644
--- a/include/linux/apple-gmux.h
+++ b/include/linux/apple-gmux.h
@@ -35,7 +35,7 @@
*/
static inline bool apple_gmux_present(void)
{
- return acpi_dev_present(GMUX_ACPI_HID);
+ return acpi_dev_found(GMUX_ACPI_HID);
}
#else /* !CONFIG_APPLE_GMUX */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 506c353..e451534 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -560,11 +560,11 @@
/**
* atomic_fetch_or - perform *p |= mask and return old value of *p
- * @p: pointer to atomic_t
* @mask: mask to OR on the atomic_t
+ * @p: pointer to atomic_t
*/
#ifndef atomic_fetch_or
-static inline int atomic_fetch_or(atomic_t *p, int mask)
+static inline int atomic_fetch_or(int mask, atomic_t *p)
{
int old, val = atomic_read(p);
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 1e7a69a..5f2fd61 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -141,9 +141,10 @@
struct backlight_device *bd);
extern void backlight_force_update(struct backlight_device *bd,
enum backlight_update_reason reason);
-extern bool backlight_device_registered(enum backlight_type type);
extern int backlight_register_notifier(struct notifier_block *nb);
extern int backlight_unregister_notifier(struct notifier_block *nb);
+extern struct backlight_device *backlight_device_get_by_type(enum backlight_type type);
+extern int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness);
#define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index f9b1fab..21597dc 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -59,25 +59,7 @@
* CPU notifier priorities.
*/
enum {
- /*
- * SCHED_ACTIVE marks a cpu which is coming up active during
- * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
- * notifier. CPUSET_ACTIVE adjusts cpuset according to
- * cpu_active mask right after SCHED_ACTIVE. During
- * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
- * ordered in the similar way.
- *
- * This ordering guarantees consistent cpu_active mask and
- * migration behavior to all cpu notifiers.
- */
- CPU_PRI_SCHED_ACTIVE = INT_MAX,
- CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
- CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
- CPU_PRI_CPUSET_INACTIVE = INT_MIN,
-
- /* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
- CPU_PRI_MIGRATION = 10,
/* bring up workqueues before normal notifiers and down after */
CPU_PRI_WORKQUEUE_UP = 5,
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
deleted file mode 100644
index 0414009..0000000
--- a/include/linux/cpufreq-dt.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) 2014 Marvell
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __CPUFREQ_DT_H__
-#define __CPUFREQ_DT_H__
-
-struct cpufreq_dt_platform_data {
- /*
- * True when each CPU has its own clock to control its
- * frequency, false when all CPUs are controlled by a single
- * clock.
- */
- bool independent_clocks;
-};
-
-#endif /* __CPUFREQ_DT_H__ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 718e872..4e81e08 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -102,6 +102,17 @@
*/
struct rw_semaphore rwsem;
+ /*
+ * Fast switch flags:
+ * - fast_switch_possible should be set by the driver if it can
+ * guarantee that frequency can be changed on any CPU sharing the
+ * policy and that the change will affect all of the policy CPUs then.
+ * - fast_switch_enabled is to be set by governors that support fast
+ * freqnency switching with the help of cpufreq_enable_fast_switch().
+ */
+ bool fast_switch_possible;
+ bool fast_switch_enabled;
+
/* Synchronization for frequency transitions */
bool transition_ongoing; /* Tracks transition status */
spinlock_t transition_lock;
@@ -156,6 +167,8 @@
int cpufreq_update_policy(unsigned int cpu);
bool have_governor_per_policy(void);
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
+void cpufreq_enable_fast_switch(struct cpufreq_policy *policy);
+void cpufreq_disable_fast_switch(struct cpufreq_policy *policy);
#else
static inline unsigned int cpufreq_get(unsigned int cpu)
{
@@ -236,6 +249,8 @@
unsigned int relation); /* Deprecated */
int (*target_index)(struct cpufreq_policy *policy,
unsigned int index);
+ unsigned int (*fast_switch)(struct cpufreq_policy *policy,
+ unsigned int target_freq);
/*
* Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
* unset.
@@ -426,6 +441,20 @@
#define CPUFREQ_POLICY_POWERSAVE (1)
#define CPUFREQ_POLICY_PERFORMANCE (2)
+/*
+ * The polling frequency depends on the capability of the processor. Default
+ * polling frequency is 1000 times the transition latency of the processor. The
+ * ondemand governor will work on any processor with transition latency <= 10ms,
+ * using appropriate sampling rate.
+ *
+ * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL)
+ * the ondemand governor will not work. All times here are in us (microseconds).
+ */
+#define MIN_SAMPLING_RATE_RATIO (2)
+#define LATENCY_MULTIPLIER (1000)
+#define MIN_LATENCY_MULTIPLIER (20)
+#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000)
+
/* Governor Events */
#define CPUFREQ_GOV_START 1
#define CPUFREQ_GOV_STOP 2
@@ -450,6 +479,8 @@
};
/* Pass a target to the cpufreq driver */
+unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq);
int cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation);
@@ -462,6 +493,29 @@
struct cpufreq_governor *cpufreq_default_governor(void);
struct cpufreq_governor *cpufreq_fallback_governor(void);
+/* Governor attribute set */
+struct gov_attr_set {
+ struct kobject kobj;
+ struct list_head policy_list;
+ struct mutex update_lock;
+ int usage_count;
+};
+
+/* sysfs ops for cpufreq governors */
+extern const struct sysfs_ops governor_sysfs_ops;
+
+void gov_attr_set_init(struct gov_attr_set *attr_set, struct list_head *list_node);
+void gov_attr_set_get(struct gov_attr_set *attr_set, struct list_head *list_node);
+unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *list_node);
+
+/* Governor sysfs attribute */
+struct governor_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct gov_attr_set *attr_set, char *buf);
+ ssize_t (*store)(struct gov_attr_set *attr_set, const char *buf,
+ size_t count);
+};
+
/*********************************************************************
* FREQUENCY TABLE HELPERS *
*********************************************************************/
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 5d68e15..386374d 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -8,6 +8,7 @@
CPUHP_BRINGUP_CPU,
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
+ CPUHP_AP_SCHED_STARTING,
CPUHP_AP_NOTIFY_STARTING,
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
@@ -16,6 +17,7 @@
CPUHP_AP_NOTIFY_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
+ CPUHP_AP_ACTIVE,
CPUHP_ONLINE,
};
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 40cee6b..e828cf6 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -743,12 +743,10 @@
static inline void
set_cpu_online(unsigned int cpu, bool online)
{
- if (online) {
+ if (online)
cpumask_set_cpu(cpu, &__cpu_online_mask);
- cpumask_set_cpu(cpu, &__cpu_active_mask);
- } else {
+ else
cpumask_clear_cpu(cpu, &__cpu_online_mask);
- }
}
static inline void
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 6fa02a2..2de4e2e 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -19,6 +19,13 @@
#define DEVFREQ_NAME_LEN 16
+/* DEVFREQ notifier interface */
+#define DEVFREQ_TRANSITION_NOTIFIER (0)
+
+/* Transition notifiers of DEVFREQ_TRANSITION_NOTIFIER */
+#define DEVFREQ_PRECHANGE (0)
+#define DEVFREQ_POSTCHANGE (1)
+
struct devfreq;
/**
@@ -143,6 +150,7 @@
* @trans_table: Statistics of devfreq transitions
* @time_in_state: Statistics of devfreq states
* @last_stat_updated: The last time stat updated
+ * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier
*
* This structure stores the devfreq information for a give device.
*
@@ -177,6 +185,13 @@
unsigned int *trans_table;
unsigned long *time_in_state;
unsigned long last_stat_updated;
+
+ struct srcu_notifier_head transition_notifier_list;
+};
+
+struct devfreq_freqs {
+ unsigned long old;
+ unsigned long new;
};
#if defined(CONFIG_PM_DEVFREQ)
@@ -207,6 +222,22 @@
struct devfreq *devfreq);
extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
struct devfreq *devfreq);
+extern int devfreq_register_notifier(struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list);
+extern int devfreq_unregister_notifier(struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list);
+extern int devm_devfreq_register_notifier(struct device *dev,
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list);
+extern void devm_devfreq_unregister_notifier(struct device *dev,
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list);
+extern struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
+ int index);
/**
* devfreq_update_stats() - update the last_status pointer in struct devfreq
@@ -241,6 +272,39 @@
};
#endif
+#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE)
+/**
+ * struct devfreq_passive_data - void *data fed to struct devfreq
+ * and devfreq_add_device
+ * @parent: the devfreq instance of parent device.
+ * @get_target_freq: Optional callback, Returns desired operating frequency
+ * for the device using passive governor. That is called
+ * when passive governor should decide the next frequency
+ * by using the new frequency of parent devfreq device
+ * using governors except for passive governor.
+ * If the devfreq device has the specific method to decide
+ * the next frequency, should use this callback.
+ * @this: the devfreq instance of own device.
+ * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER list
+ *
+ * The devfreq_passive_data have to set the devfreq instance of parent
+ * device with governors except for the passive governor. But, don't need to
+ * initialize the 'this' and 'nb' field because the devfreq core will handle
+ * them.
+ */
+struct devfreq_passive_data {
+ /* Should set the devfreq instance of parent device */
+ struct devfreq *parent;
+
+ /* Optional callback to decide the next frequency of passvice device */
+ int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+
+ /* For passive governor's internal use. Don't need to set them */
+ struct devfreq *this;
+ struct notifier_block nb;
+};
+#endif
+
#else /* !CONFIG_PM_DEVFREQ */
static inline struct devfreq *devfreq_add_device(struct device *dev,
struct devfreq_dev_profile *profile,
@@ -307,6 +371,41 @@
{
}
+static inline int devfreq_register_notifier(struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ return 0;
+}
+
+static inline int devfreq_unregister_notifier(struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ return 0;
+}
+
+static inline int devm_devfreq_register_notifier(struct device *dev,
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+ return 0;
+}
+
+static inline void devm_devfreq_unregister_notifier(struct device *dev,
+ struct devfreq *devfreq,
+ struct notifier_block *nb,
+ unsigned int list)
+{
+}
+
+static inline struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev,
+ int index)
+{
+ return ERR_PTR(-ENODEV);
+}
+
static inline int devfreq_update_stats(struct devfreq *df)
{
return -EINVAL;
diff --git a/include/linux/device.h b/include/linux/device.h
index 002c597..b130304 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -956,11 +956,6 @@
return !!dev->power.async_suspend;
}
-static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
-{
- dev->power.ignore_children = enable;
-}
-
static inline void dev_pm_syscore_device(struct device *dev, bool val)
{
#ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 1626474..df7acb5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -21,6 +21,7 @@
#include <linux/pfn.h>
#include <linux/pstore.h>
#include <linux/reboot.h>
+#include <linux/screen_info.h>
#include <asm/page.h>
@@ -124,6 +125,13 @@
} efi_capsule_header_t;
/*
+ * EFI capsule flags
+ */
+#define EFI_CAPSULE_PERSIST_ACROSS_RESET 0x00010000
+#define EFI_CAPSULE_POPULATE_SYSTEM_TABLE 0x00020000
+#define EFI_CAPSULE_INITIATE_RESET 0x00040000
+
+/*
* Allocation types for calls to boottime->allocate_pages.
*/
#define EFI_ALLOCATE_ANY_PAGES 0
@@ -282,9 +290,10 @@
efi_status_t (*handle_protocol)(efi_handle_t, efi_guid_t *, void **);
void *__reserved;
void *register_protocol_notify;
- void *locate_handle;
+ efi_status_t (*locate_handle)(int, efi_guid_t *, void *,
+ unsigned long *, efi_handle_t *);
void *locate_device_path;
- void *install_configuration_table;
+ efi_status_t (*install_configuration_table)(efi_guid_t *, void *);
void *load_image;
void *start_image;
void *exit;
@@ -623,6 +632,27 @@
EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+#define EFI_MEMORY_ATTRIBUTES_TABLE_GUID \
+ EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, \
+ 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20)
+
+#define EFI_CONSOLE_OUT_DEVICE_GUID \
+ EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, \
+ 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d)
+
+/*
+ * This GUID is used to pass to the kernel proper the struct screen_info
+ * structure that was populated by the stub based on the GOP protocol instance
+ * associated with ConOut
+ */
+#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID \
+ EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, \
+ 0xb9, 0xe, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95)
+
+#define LINUX_EFI_LOADER_ENTRY_GUID \
+ EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, \
+ 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
+
typedef struct {
efi_guid_t guid;
u64 table;
@@ -847,6 +877,14 @@
#define EFI_INVALID_TABLE_ADDR (~0UL)
+typedef struct {
+ u32 version;
+ u32 num_entries;
+ u32 desc_size;
+ u32 reserved;
+ efi_memory_desc_t entry[0];
+} efi_memory_attributes_table_t;
+
/*
* All runtime access to EFI goes through this structure:
*/
@@ -868,6 +906,7 @@
unsigned long config_table; /* config tables */
unsigned long esrt; /* ESRT table */
unsigned long properties_table; /* properties table */
+ unsigned long mem_attr_table; /* memory attributes table */
efi_get_time_t *get_time;
efi_set_time_t *set_time;
efi_get_wakeup_time_t *get_wakeup_time;
@@ -883,7 +922,7 @@
efi_get_next_high_mono_count_t *get_next_high_mono_count;
efi_reset_system_t *reset_system;
efi_set_virtual_address_map_t *set_virtual_address_map;
- struct efi_memory_map *memmap;
+ struct efi_memory_map memmap;
unsigned long flags;
} efi;
@@ -945,7 +984,6 @@
extern void efi_get_time(struct timespec *now);
extern void efi_reserve_boot_services(void);
extern int efi_get_fdt_params(struct efi_fdt_params *params);
-extern struct efi_memory_map memmap;
extern struct kobject *efi_kobj;
extern int efi_reboot_quirk_mode;
@@ -957,12 +995,34 @@
static inline void efi_fake_memmap(void) { }
#endif
+/*
+ * efi_memattr_perm_setter - arch specific callback function passed into
+ * efi_memattr_apply_permissions() that updates the
+ * mapping permissions described by the second
+ * argument in the page tables referred to by the
+ * first argument.
+ */
+typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *);
+
+extern int efi_memattr_init(void);
+extern int efi_memattr_apply_permissions(struct mm_struct *mm,
+ efi_memattr_perm_setter fn);
+
/* Iterate through an efi_memory_map */
-#define for_each_efi_memory_desc(m, md) \
+#define for_each_efi_memory_desc_in_map(m, md) \
for ((md) = (m)->map; \
(md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
(md) = (void *)(md) + (m)->desc_size)
+/**
+ * for_each_efi_memory_desc - iterate over descriptors in efi.memmap
+ * @md: the efi_memory_desc_t * iterator
+ *
+ * Once the loop finishes @md must not be accessed.
+ */
+#define for_each_efi_memory_desc(md) \
+ for_each_efi_memory_desc_in_map(&efi.memmap, md)
+
/*
* Format an EFI memory descriptor's type and attributes to a user-provided
* character buffer, as per snprintf(), and return the buffer.
@@ -1000,7 +1060,6 @@
* possible, remove EFI-related code altogether.
*/
#define EFI_BOOT 0 /* Were we booted from EFI? */
-#define EFI_SYSTEM_TABLES 1 /* Can we use EFI system tables? */
#define EFI_CONFIG_TABLES 2 /* Can we use EFI config tables? */
#define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */
#define EFI_MEMMAP 4 /* Can we use EFI memory map? */
@@ -1026,8 +1085,16 @@
}
static inline void
efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {}
+
+static inline bool
+efi_capsule_pending(int *reset_type)
+{
+ return false;
+}
#endif
+extern int efi_status_to_err(efi_status_t status);
+
/*
* Variable Attributes
*/
@@ -1180,6 +1247,80 @@
void *test_string;
};
+#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
+#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
+#define PIXEL_BIT_MASK 2
+#define PIXEL_BLT_ONLY 3
+#define PIXEL_FORMAT_MAX 4
+
+struct efi_pixel_bitmask {
+ u32 red_mask;
+ u32 green_mask;
+ u32 blue_mask;
+ u32 reserved_mask;
+};
+
+struct efi_graphics_output_mode_info {
+ u32 version;
+ u32 horizontal_resolution;
+ u32 vertical_resolution;
+ int pixel_format;
+ struct efi_pixel_bitmask pixel_information;
+ u32 pixels_per_scan_line;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_32 {
+ u32 max_mode;
+ u32 mode;
+ u32 info;
+ u32 size_of_info;
+ u64 frame_buffer_base;
+ u32 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode_64 {
+ u32 max_mode;
+ u32 mode;
+ u64 info;
+ u64 size_of_info;
+ u64 frame_buffer_base;
+ u64 frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_mode {
+ u32 max_mode;
+ u32 mode;
+ unsigned long info;
+ unsigned long size_of_info;
+ u64 frame_buffer_base;
+ unsigned long frame_buffer_size;
+} __packed;
+
+struct efi_graphics_output_protocol_32 {
+ u32 query_mode;
+ u32 set_mode;
+ u32 blt;
+ u32 mode;
+};
+
+struct efi_graphics_output_protocol_64 {
+ u64 query_mode;
+ u64 set_mode;
+ u64 blt;
+ u64 mode;
+};
+
+struct efi_graphics_output_protocol {
+ unsigned long query_mode;
+ unsigned long set_mode;
+ unsigned long blt;
+ struct efi_graphics_output_protocol_mode *mode;
+};
+
+typedef efi_status_t (*efi_graphics_output_protocol_query_mode)(
+ struct efi_graphics_output_protocol *, u32, unsigned long *,
+ struct efi_graphics_output_mode_info **);
+
extern struct list_head efivar_sysfs_list;
static inline void
@@ -1195,8 +1336,7 @@
struct kobject *efivars_kobject(void);
int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
- void *data, bool atomic, bool duplicates,
- struct list_head *head);
+ void *data, bool duplicates, struct list_head *head);
void efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
void efivar_entry_remove(struct efivar_entry *entry);
@@ -1242,6 +1382,13 @@
#define EFIVARS_DATA_SIZE_MAX 1024
#endif /* CONFIG_EFI_VARS */
+extern bool efi_capsule_pending(int *reset_type);
+
+extern int efi_capsule_supported(efi_guid_t guid, u32 flags,
+ size_t size, int *reset);
+
+extern int efi_capsule_update(efi_capsule_header_t *capsule,
+ struct page **pages);
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_runtime_map_init(struct kobject *);
@@ -1319,5 +1466,9 @@
efi_status_t efi_parse_options(char *cmdline);
+efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
+ struct screen_info *si, efi_guid_t *proto,
+ unsigned long size);
+
bool efi_runtime_disabled(void);
#endif /* _LINUX_EFI_H */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index f203a8f..d2b1306 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -50,6 +50,7 @@
#define LED_SYSFS_DISABLE (1 << 22)
#define LED_DEV_CAP_FLASH (1 << 23)
#define LED_HW_PLUGGABLE (1 << 24)
+#define LED_PANIC_INDICATOR (1 << 25)
/* Set LED brightness level
* Must not sleep. Use brightness_set_blocking for drivers
@@ -329,6 +330,12 @@
static inline void ledtrig_ide_activity(void) {}
#endif
+#ifdef CONFIG_LEDS_TRIGGER_MTD
+extern void ledtrig_mtd_activity(void);
+#else
+static inline void ledtrig_mtd_activity(void) {}
+#endif
+
#if defined(CONFIG_LEDS_TRIGGER_CAMERA) || defined(CONFIG_LEDS_TRIGGER_CAMERA_MODULE)
extern void ledtrig_flash_ctrl(bool on);
extern void ledtrig_torch_ctrl(bool on);
@@ -358,6 +365,7 @@
unsigned gpio;
unsigned active_low : 1;
unsigned retain_state_suspended : 1;
+ unsigned panic_indicator : 1;
unsigned default_state : 2;
/* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */
struct gpio_desc *gpiod;
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d10ef06..eabe013 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -356,8 +356,13 @@
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
-extern void lock_pin_lock(struct lockdep_map *lock);
-extern void lock_unpin_lock(struct lockdep_map *lock);
+struct pin_cookie { unsigned int val; };
+
+#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
+
+extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
+extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
+extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
@@ -373,8 +378,9 @@
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
-#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
-#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map)
+#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map)
+#define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c))
+#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c))
#else /* !CONFIG_LOCKDEP */
@@ -427,8 +433,13 @@
#define lockdep_recursing(tsk) (0)
-#define lockdep_pin_lock(l) do { (void)(l); } while (0)
-#define lockdep_unpin_lock(l) do { (void)(l); } while (0)
+struct pin_cookie { };
+
+#define NIL_COOKIE (struct pin_cookie){ }
+
+#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; })
+#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
+#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
#endif /* !LOCKDEP */
@@ -446,6 +457,18 @@
lock_acquired(&(_lock)->dep_map, _RET_IP_); \
} while (0)
+#define LOCK_CONTENDED_RETURN(_lock, try, lock) \
+({ \
+ int ____err = 0; \
+ if (!try(_lock)) { \
+ lock_contended(&(_lock)->dep_map, _RET_IP_); \
+ ____err = lock(_lock); \
+ } \
+ if (!____err) \
+ lock_acquired(&(_lock)->dep_map, _RET_IP_); \
+ ____err; \
+})
+
#else /* CONFIG_LOCK_STAT */
#define lock_contended(lockdep_map, ip) do {} while (0)
@@ -454,6 +477,9 @@
#define LOCK_CONTENDED(_lock, try, lock) \
lock(_lock)
+#define LOCK_CONTENDED_RETURN(_lock, try, lock) \
+ lock(_lock)
+
#endif /* CONFIG_LOCK_STAT */
#ifdef CONFIG_LOCKDEP
diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h
index 8d43e9f..51e6f94 100644
--- a/include/linux/mfd/as3722.h
+++ b/include/linux/mfd/as3722.h
@@ -196,6 +196,7 @@
#define AS3722_LDO3_VSEL_MIN 0x01
#define AS3722_LDO3_VSEL_MAX 0x2D
#define AS3722_LDO3_NUM_VOLT 0x2D
+#define AS3722_LDO6_VSEL_BYPASS 0x3F
#define AS3722_LDO_VSEL_MASK 0x7F
#define AS3722_LDO_VSEL_MIN 0x01
#define AS3722_LDO_VSEL_MAX 0x7F
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 6bc4bcd..5a23dd4 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -30,6 +30,9 @@
#define MIN_600_MV 600000
#define MIN_500_MV 500000
+/* Ramp delay in uV/us */
+#define RAMP_DELAY_12_MVUS 12000
+
/* Macros to represent steps for LDO/BUCK */
#define STEP_50_MV 50000
#define STEP_25_MV 25000
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 05d58ee..7a26286 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -66,8 +66,8 @@
*/
#define TMIO_MMC_SDIO_IRQ (1 << 2)
-/* Some controllers don't need to wait 10ms for clock changes */
-#define TMIO_MMC_FAST_CLK_CHG (1 << 3)
+/* Some features are only available or tested on RCar Gen2 or later */
+#define TMIO_MMC_MIN_RCAR2 (1 << 3)
/*
* Some controllers require waiting for the SD bus to become
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f468e0..727f7997 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -72,6 +72,10 @@
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif
+#ifndef page_to_virt
+#define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x)))
+#endif
+
/*
* To prevent common memory management code establishing
* a zero page mapping on a read fault.
@@ -957,7 +961,7 @@
static __always_inline void *lowmem_page_address(const struct page *page)
{
- return __va(PFN_PHYS(page_to_pfn(page)));
+ return page_to_virt(page);
}
#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 7b41c6d..f7ed271 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -36,7 +36,6 @@
EVENT_XFER_COMPLETE,
EVENT_DATA_COMPLETE,
EVENT_DATA_ERROR,
- EVENT_XFER_ERROR
};
struct mmc_data;
@@ -55,6 +54,7 @@
/**
* struct dw_mci - MMC controller state shared between all slots
* @lock: Spinlock protecting the queue and associated data.
+ * @irq_lock: Spinlock protecting the INTMASK setting.
* @regs: Pointer to MMIO registers.
* @fifo_reg: Pointer to MMIO registers for data FIFO
* @sg: Scatterlist entry currently being processed by PIO code, if any.
@@ -65,6 +65,9 @@
* @cmd: The command currently being sent to the card, or NULL.
* @data: The data currently being transferred, or NULL if no data
* transfer is in progress.
+ * @stop_abort: The command currently prepared for stoping transfer.
+ * @prev_blksz: The former transfer blksz record.
+ * @timing: Record of current ios timing.
* @use_dma: Whether DMA channel is initialized or not.
* @using_dma: Whether DMA is in use for the current transfer.
* @dma_64bit_address: Whether DMA supports 64-bit address mode or not.
@@ -72,7 +75,10 @@
* @sg_cpu: Virtual address of DMA buffer.
* @dma_ops: Pointer to platform-specific DMA callbacks.
* @cmd_status: Snapshot of SR taken upon completion of the current
+ * @ring_size: Buffer size for idma descriptors.
* command. Only valid when EVENT_CMD_COMPLETE is pending.
+ * @dms: structure of slave-dma private data.
+ * @phy_regs: physical address of controller's register map
* @data_status: Snapshot of SR taken upon completion of the current
* data transfer. Only valid when EVENT_DATA_COMPLETE or
* EVENT_DATA_ERROR is pending.
@@ -80,7 +86,6 @@
* to be sent.
* @dir_status: Direction of current transfer.
* @tasklet: Tasklet running the request state machine.
- * @card_tasklet: Tasklet handling card detect.
* @pending_events: Bitmask of events flagged by the interrupt handler
* to be processed by the tasklet.
* @completed_events: Bitmask of events which the state machine has
@@ -91,6 +96,7 @@
* rate and timeout calculations.
* @current_speed: Configured rate of the controller.
* @num_slots: Number of slots available.
+ * @fifoth_val: The value of FIFOTH register.
* @verid: Denote Version ID.
* @dev: Device associated with the MMC controller.
* @pdata: Platform data associated with the MMC controller.
@@ -107,9 +113,11 @@
* @push_data: Pointer to FIFO push function.
* @pull_data: Pointer to FIFO pull function.
* @quirks: Set of quirks that apply to specific versions of the IP.
+ * @vqmmc_enabled: Status of vqmmc, should be true or false.
* @irq_flags: The flags to be passed to request_irq.
* @irq: The irq value to be passed to request_irq.
* @sdio_id0: Number of slot0 in the SDIO interrupt registers.
+ * @cmd11_timer: Timer for SD3.0 voltage switch over scheme.
* @dto_timer: Timer for broken data transfer over scheme.
*
* Locking
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8dd4d29..85800b4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -93,28 +93,39 @@
void (*pre_req)(struct mmc_host *host, struct mmc_request *req,
bool is_first_req);
void (*request)(struct mmc_host *host, struct mmc_request *req);
+
/*
- * Avoid calling these three functions too often or in a "fast path",
- * since underlaying controller might implement them in an expensive
- * and/or slow way.
- *
- * Also note that these functions might sleep, so don't call them
- * in the atomic contexts!
- *
+ * Avoid calling the next three functions too often or in a "fast
+ * path", since underlaying controller might implement them in an
+ * expensive and/or slow way. Also note that these functions might
+ * sleep, so don't call them in the atomic contexts!
+ */
+
+ /*
+ * Notes to the set_ios callback:
+ * ios->clock might be 0. For some controllers, setting 0Hz
+ * as any other frequency works. However, some controllers
+ * explicitly need to disable the clock. Otherwise e.g. voltage
+ * switching might fail because the SDCLK is not really quiet.
+ */
+ void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios);
+
+ /*
* Return values for the get_ro callback should be:
* 0 for a read/write card
* 1 for a read-only card
* -ENOSYS when not supported (equal to NULL callback)
* or a negative errno value when something bad happened
- *
+ */
+ int (*get_ro)(struct mmc_host *host);
+
+ /*
* Return values for the get_cd callback should be:
* 0 for a absent card
* 1 for a present card
* -ENOSYS when not supported (equal to NULL callback)
* or a negative errno value when something bad happened
*/
- void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios);
- int (*get_ro)(struct mmc_host *host);
int (*get_cd)(struct mmc_host *host);
void (*enable_sdio_irq)(struct mmc_host *host, int enable);
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
deleted file mode 100644
index 95d6f03..0000000
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef LINUX_MMC_SH_MOBILE_SDHI_H
-#define LINUX_MMC_SH_MOBILE_SDHI_H
-
-#include <linux/types.h>
-
-#define SH_MOBILE_SDHI_IRQ_CARD_DETECT "card_detect"
-#define SH_MOBILE_SDHI_IRQ_SDCARD "sdcard"
-#define SH_MOBILE_SDHI_IRQ_SDIO "sdio"
-
-#endif /* LINUX_MMC_SH_MOBILE_SDHI_H */
diff --git a/include/linux/mmc/tmio.h b/include/linux/mmc/tmio.h
deleted file mode 100644
index 5f5cd80..0000000
--- a/include/linux/mmc/tmio.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * include/linux/mmc/tmio.h
- *
- * Copyright (C) 2016 Sang Engineering, Wolfram Sang
- * Copyright (C) 2015-16 Renesas Electronics Corporation
- * Copyright (C) 2007 Ian Molton
- * Copyright (C) 2004 Ian Molton
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Driver for the MMC / SD / SDIO cell found in:
- *
- * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3
- */
-#ifndef LINUX_MMC_TMIO_H
-#define LINUX_MMC_TMIO_H
-
-#define CTL_SD_CMD 0x00
-#define CTL_ARG_REG 0x04
-#define CTL_STOP_INTERNAL_ACTION 0x08
-#define CTL_XFER_BLK_COUNT 0xa
-#define CTL_RESPONSE 0x0c
-#define CTL_STATUS 0x1c
-#define CTL_STATUS2 0x1e
-#define CTL_IRQ_MASK 0x20
-#define CTL_SD_CARD_CLK_CTL 0x24
-#define CTL_SD_XFER_LEN 0x26
-#define CTL_SD_MEM_CARD_OPT 0x28
-#define CTL_SD_ERROR_DETAIL_STATUS 0x2c
-#define CTL_SD_DATA_PORT 0x30
-#define CTL_TRANSACTION_CTL 0x34
-#define CTL_SDIO_STATUS 0x36
-#define CTL_SDIO_IRQ_MASK 0x38
-#define CTL_DMA_ENABLE 0xd8
-#define CTL_RESET_SD 0xe0
-#define CTL_VERSION 0xe2
-#define CTL_SDIO_REGS 0x100
-#define CTL_CLK_AND_WAIT_CTL 0x138
-#define CTL_RESET_SDIO 0x1e0
-
-/* Definitions for values the CTRL_STATUS register can take. */
-#define TMIO_STAT_CMDRESPEND 0x00000001
-#define TMIO_STAT_DATAEND 0x00000004
-#define TMIO_STAT_CARD_REMOVE 0x00000008
-#define TMIO_STAT_CARD_INSERT 0x00000010
-#define TMIO_STAT_SIGSTATE 0x00000020
-#define TMIO_STAT_WRPROTECT 0x00000080
-#define TMIO_STAT_CARD_REMOVE_A 0x00000100
-#define TMIO_STAT_CARD_INSERT_A 0x00000200
-#define TMIO_STAT_SIGSTATE_A 0x00000400
-#define TMIO_STAT_CMD_IDX_ERR 0x00010000
-#define TMIO_STAT_CRCFAIL 0x00020000
-#define TMIO_STAT_STOPBIT_ERR 0x00040000
-#define TMIO_STAT_DATATIMEOUT 0x00080000
-#define TMIO_STAT_RXOVERFLOW 0x00100000
-#define TMIO_STAT_TXUNDERRUN 0x00200000
-#define TMIO_STAT_CMDTIMEOUT 0x00400000
-#define TMIO_STAT_RXRDY 0x01000000
-#define TMIO_STAT_TXRQ 0x02000000
-#define TMIO_STAT_ILL_FUNC 0x20000000
-#define TMIO_STAT_CMD_BUSY 0x40000000
-#define TMIO_STAT_ILL_ACCESS 0x80000000
-
-#define CLK_CTL_DIV_MASK 0xff
-#define CLK_CTL_SCLKEN BIT(8)
-
-#define TMIO_BBS 512 /* Boot block size */
-
-#endif /* LINUX_MMC_TMIO_H */
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
index 70fffeb..a444178 100644
--- a/include/linux/mmu_context.h
+++ b/include/linux/mmu_context.h
@@ -1,9 +1,16 @@
#ifndef _LINUX_MMU_CONTEXT_H
#define _LINUX_MMU_CONTEXT_H
+#include <asm/mmu_context.h>
+
struct mm_struct;
void use_mm(struct mm_struct *mm);
void unuse_mm(struct mm_struct *mm);
+/* Architectures that care about IRQ state in switch_mm can override this. */
+#ifndef switch_mm_irqs_off
+# define switch_mm_irqs_off switch_mm
+#endif
+
#endif
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 7712721..ef9fea4 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -283,17 +283,7 @@
const u_char *buf);
int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops);
-
-static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to,
- struct mtd_oob_ops *ops)
-{
- ops->retlen = ops->oobretlen = 0;
- if (!mtd->_write_oob)
- return -EOPNOTSUPP;
- if (!(mtd->flags & MTD_WRITEABLE))
- return -EROFS;
- return mtd->_write_oob(mtd, to, ops);
-}
+int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops);
int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
struct otp_info *buf);
diff --git a/include/linux/of.h b/include/linux/of.h
index 3175803..77ddace 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -685,6 +685,15 @@
}
#endif
+#ifdef CONFIG_OF_NUMA
+extern int of_numa_init(void);
+#else
+static inline int of_numa_init(void)
+{
+ return -ENOSYS;
+}
+#endif
+
static inline struct device_node *of_find_matching_node(
struct device_node *from,
const struct of_device_id *matches)
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 4196c90..d28ac05 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -105,6 +105,8 @@
struct mutex reserve_mutex;
u64 max_period;
bool secure_access; /* 32-bit ARM only */
+#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
+ DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
struct platform_device *plat_device;
struct pmu_hw_events __percpu *hw_events;
struct notifier_block hotplug_nb;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f291275..9e1c3ad 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -58,7 +58,7 @@
struct perf_callchain_entry {
__u64 nr;
- __u64 ip[PERF_MAX_STACK_DEPTH];
+ __u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
};
struct perf_raw_record {
@@ -151,6 +151,15 @@
*/
struct task_struct *target;
+ /*
+ * PMU would store hardware filter configuration
+ * here.
+ */
+ void *addr_filters;
+
+ /* Last sync'ed generation of filters */
+ unsigned long addr_filters_gen;
+
/*
* hw_perf_event::state flags; used to track the PERF_EF_* state.
*/
@@ -216,6 +225,7 @@
#define PERF_PMU_CAP_AUX_SW_DOUBLEBUF 0x08
#define PERF_PMU_CAP_EXCLUSIVE 0x10
#define PERF_PMU_CAP_ITRACE 0x20
+#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
/**
* struct pmu - generic performance monitoring unit
@@ -240,6 +250,9 @@
int task_ctx_nr;
int hrtimer_interval_ms;
+ /* number of address filters this PMU can do */
+ unsigned int nr_addr_filters;
+
/*
* Fully disable/enable this PMU, can be used to protect from the PMI
* as well as for lazy/batch writing of the MSRs.
@@ -393,12 +406,71 @@
void (*free_aux) (void *aux); /* optional */
/*
+ * Validate address range filters: make sure the HW supports the
+ * requested configuration and number of filters; return 0 if the
+ * supplied filters are valid, -errno otherwise.
+ *
+ * Runs in the context of the ioctl()ing process and is not serialized
+ * with the rest of the PMU callbacks.
+ */
+ int (*addr_filters_validate) (struct list_head *filters);
+ /* optional */
+
+ /*
+ * Synchronize address range filter configuration:
+ * translate hw-agnostic filters into hardware configuration in
+ * event::hw::addr_filters.
+ *
+ * Runs as a part of filter sync sequence that is done in ->start()
+ * callback by calling perf_event_addr_filters_sync().
+ *
+ * May (and should) traverse event::addr_filters::list, for which its
+ * caller provides necessary serialization.
+ */
+ void (*addr_filters_sync) (struct perf_event *event);
+ /* optional */
+
+ /*
* Filter events for PMU-specific reasons.
*/
int (*filter_match) (struct perf_event *event); /* optional */
};
/**
+ * struct perf_addr_filter - address range filter definition
+ * @entry: event's filter list linkage
+ * @inode: object file's inode for file-based filters
+ * @offset: filter range offset
+ * @size: filter range size
+ * @range: 1: range, 0: address
+ * @filter: 1: filter/start, 0: stop
+ *
+ * This is a hardware-agnostic filter configuration as specified by the user.
+ */
+struct perf_addr_filter {
+ struct list_head entry;
+ struct inode *inode;
+ unsigned long offset;
+ unsigned long size;
+ unsigned int range : 1,
+ filter : 1;
+};
+
+/**
+ * struct perf_addr_filters_head - container for address range filters
+ * @list: list of filters for this event
+ * @lock: spinlock that serializes accesses to the @list and event's
+ * (and its children's) filter generations.
+ *
+ * A child event will use parent's @list (and therefore @lock), so they are
+ * bundled together; see perf_event_addr_filters().
+ */
+struct perf_addr_filters_head {
+ struct list_head list;
+ raw_spinlock_t lock;
+};
+
+/**
* enum perf_event_active_state - the states of a event
*/
enum perf_event_active_state {
@@ -566,6 +638,12 @@
atomic_t event_limit;
+ /* address range filters */
+ struct perf_addr_filters_head addr_filters;
+ /* vma address array for file-based filders */
+ unsigned long *addr_filters_offs;
+ unsigned long addr_filters_gen;
+
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
@@ -834,9 +912,25 @@
struct perf_sample_data *data,
struct pt_regs *regs);
+extern void perf_event_output_forward(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs);
+extern void perf_event_output_backward(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs);
extern void perf_event_output(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs);
+ struct perf_sample_data *data,
+ struct pt_regs *regs);
+
+static inline bool
+is_default_overflow_handler(struct perf_event *event)
+{
+ if (likely(event->overflow_handler == perf_event_output_forward))
+ return true;
+ if (unlikely(event->overflow_handler == perf_event_output_backward))
+ return true;
+ return false;
+}
extern void
perf_event_header__init_id(struct perf_event_header *header,
@@ -977,9 +1071,11 @@
extern int get_callchain_buffers(void);
extern void put_callchain_buffers(void);
+extern int sysctl_perf_event_max_stack;
+
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
{
- if (entry->nr < PERF_MAX_STACK_DEPTH) {
+ if (entry->nr < sysctl_perf_event_max_stack) {
entry->ip[entry->nr++] = ip;
return 0;
} else {
@@ -1001,6 +1097,8 @@
void __user *buffer, size_t *lenp,
loff_t *ppos);
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
static inline bool perf_paranoid_tracepoint_raw(void)
{
@@ -1045,8 +1143,41 @@
return event->pmu->setup_aux;
}
+static inline bool is_write_backward(struct perf_event *event)
+{
+ return !!event->attr.write_backward;
+}
+
+static inline bool has_addr_filter(struct perf_event *event)
+{
+ return event->pmu->nr_addr_filters;
+}
+
+/*
+ * An inherited event uses parent's filters
+ */
+static inline struct perf_addr_filters_head *
+perf_event_addr_filters(struct perf_event *event)
+{
+ struct perf_addr_filters_head *ifh = &event->addr_filters;
+
+ if (event->parent)
+ ifh = &event->parent->addr_filters;
+
+ return ifh;
+}
+
+extern void perf_event_addr_filters_sync(struct perf_event *event);
+
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size);
+extern int perf_output_begin_forward(struct perf_output_handle *handle,
+ struct perf_event *event,
+ unsigned int size);
+extern int perf_output_begin_backward(struct perf_output_handle *handle,
+ struct perf_event *event,
+ unsigned int size);
+
extern void perf_output_end(struct perf_output_handle *handle);
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6a5d654..06eb353 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -563,7 +563,6 @@
bool is_suspended:1; /* Ditto */
bool is_noirq_suspended:1;
bool is_late_suspended:1;
- bool ignore_children:1;
bool early_init:1; /* Owned by the PM core */
bool direct_complete:1; /* Owned by the PM core */
spinlock_t lock;
@@ -591,6 +590,7 @@
unsigned int deferred_resume:1;
unsigned int run_wake:1;
unsigned int runtime_auto:1;
+ bool ignore_children:1;
unsigned int no_callbacks:1;
unsigned int irq_safe:1;
unsigned int use_autosuspend:1;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 49cd889..39285c7 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -28,14 +28,12 @@
struct dev_power_governor {
bool (*power_down_ok)(struct dev_pm_domain *domain);
- bool (*stop_ok)(struct device *dev);
+ bool (*suspend_ok)(struct device *dev);
};
struct gpd_dev_ops {
int (*start)(struct device *dev);
int (*stop)(struct device *dev);
- int (*save_state)(struct device *dev);
- int (*restore_state)(struct device *dev);
bool (*active_wakeup)(struct device *dev);
};
@@ -94,7 +92,7 @@
s64 resume_latency_ns;
s64 effective_constraint_ns;
bool constraint_changed;
- bool cached_stop_ok;
+ bool cached_suspend_ok;
};
struct pm_domain_data {
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index cccaf4a..bca2615 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -65,6 +65,10 @@
int dev_pm_opp_set_regulator(struct device *dev, const char *name);
void dev_pm_opp_put_regulator(struct device *dev);
int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
+int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
+int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
+void dev_pm_opp_remove_table(struct device *dev);
+void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask);
#else
static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
{
@@ -109,25 +113,25 @@
static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
unsigned long freq, bool available)
{
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENOTSUPP);
}
static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
unsigned long *freq)
{
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENOTSUPP);
}
static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
unsigned long *freq)
{
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENOTSUPP);
}
static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
unsigned long u_volt)
{
- return -EINVAL;
+ return -ENOTSUPP;
}
static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq)
@@ -147,73 +151,85 @@
static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
struct device *dev)
{
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENOTSUPP);
}
static inline int dev_pm_opp_set_supported_hw(struct device *dev,
const u32 *versions,
unsigned int count)
{
- return -EINVAL;
+ return -ENOTSUPP;
}
static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
{
- return -EINVAL;
+ return -ENOTSUPP;
}
static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
static inline int dev_pm_opp_set_regulator(struct device *dev, const char *name)
{
- return -EINVAL;
+ return -ENOTSUPP;
}
static inline void dev_pm_opp_put_regulator(struct device *dev) {}
static inline int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
{
+ return -ENOTSUPP;
+}
+
+static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask)
+{
+ return -ENOTSUPP;
+}
+
+static inline int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
+{
return -EINVAL;
}
+static inline void dev_pm_opp_remove_table(struct device *dev)
+{
+}
+
+static inline void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
+{
+}
+
#endif /* CONFIG_PM_OPP */
#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
int dev_pm_opp_of_add_table(struct device *dev);
void dev_pm_opp_of_remove_table(struct device *dev);
-int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask);
-void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask);
-int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
-int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask);
+int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
+void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
+int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
#else
static inline int dev_pm_opp_of_add_table(struct device *dev)
{
- return -EINVAL;
+ return -ENOTSUPP;
}
static inline void dev_pm_opp_of_remove_table(struct device *dev)
{
}
-static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
{
- return -ENOSYS;
+ return -ENOTSUPP;
}
-static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask)
+static inline void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
{
}
-static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
+static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
{
- return -ENOSYS;
-}
-
-static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask)
-{
- return -ENOSYS;
+ return -ENOTSUPP;
}
#endif
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7af093d..2e14d26 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -56,6 +56,11 @@
s64 delta_ns);
extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
+{
+ dev->power.ignore_children = enable;
+}
+
static inline bool pm_children_suspended(struct device *dev)
{
return dev->power.ignore_children
@@ -156,6 +161,7 @@
static inline void pm_runtime_allow(struct device *dev) {}
static inline void pm_runtime_forbid(struct device *dev) {}
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {}
static inline bool pm_children_suspended(struct device *dev) { return false; }
static inline void pm_runtime_get_noresume(struct device *dev) {}
static inline void pm_runtime_put_noidle(struct device *dev) {}
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 5df733b..2588ca6 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -337,9 +337,11 @@
#ifdef CONFIG_PNPBIOS
extern struct pnp_protocol pnpbios_protocol;
+extern bool arch_pnpbios_disabled(void);
#define pnp_device_is_pnpbios(dev) ((dev)->protocol == (&pnpbios_protocol))
#else
#define pnp_device_is_pnpbios(dev) 0
+#define arch_pnpbios_disabled() false
#endif
#ifdef CONFIG_PNPACPI
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
deleted file mode 100644
index 2122133..0000000
--- a/include/linux/proportions.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * FLoating proportions
- *
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * This file contains the public data structure and API definitions.
- */
-
-#ifndef _LINUX_PROPORTIONS_H
-#define _LINUX_PROPORTIONS_H
-
-#include <linux/percpu_counter.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/gfp.h>
-
-struct prop_global {
- /*
- * The period over which we differentiate
- *
- * period = 2^shift
- */
- int shift;
- /*
- * The total event counter aka 'time'.
- *
- * Treated as an unsigned long; the lower 'shift - 1' bits are the
- * counter bits, the remaining upper bits the period counter.
- */
- struct percpu_counter events;
-};
-
-/*
- * global proportion descriptor
- *
- * this is needed to consistently flip prop_global structures.
- */
-struct prop_descriptor {
- int index;
- struct prop_global pg[2];
- struct mutex mutex; /* serialize the prop_global switch */
-};
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
-void prop_change_shift(struct prop_descriptor *pd, int new_shift);
-
-/*
- * ----- PERCPU ------
- */
-
-struct prop_local_percpu {
- /*
- * the local events counter
- */
- struct percpu_counter events;
-
- /*
- * snapshot of the last seen global state
- */
- int shift;
- unsigned long period;
- raw_spinlock_t lock; /* protect the snapshot state */
-};
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
-void prop_local_destroy_percpu(struct prop_local_percpu *pl);
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
-void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
- long *numerator, long *denominator);
-
-static inline
-void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __prop_inc_percpu(pd, pl);
- local_irq_restore(flags);
-}
-
-/*
- * Limit the time part in order to ensure there are some bits left for the
- * cycle counter and fraction multiply.
- */
-#if BITS_PER_LONG == 32
-#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
-#else
-#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
-#endif
-
-#define PROP_FRAC_SHIFT (BITS_PER_LONG - PROP_MAX_SHIFT - 1)
-#define PROP_FRAC_BASE (1UL << PROP_FRAC_SHIFT)
-
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
- struct prop_local_percpu *pl, long frac);
-
-
-/*
- * ----- SINGLE ------
- */
-
-struct prop_local_single {
- /*
- * the local events counter
- */
- unsigned long events;
-
- /*
- * snapshot of the last seen global state
- * and a lock protecting this state
- */
- unsigned long period;
- int shift;
- raw_spinlock_t lock; /* protect the snapshot state */
-};
-
-#define INIT_PROP_LOCAL_SINGLE(name) \
-{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
-}
-
-int prop_local_init_single(struct prop_local_single *pl);
-void prop_local_destroy_single(struct prop_local_single *pl);
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
-void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
- long *numerator, long *denominator);
-
-static inline
-void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __prop_inc_single(pd, pl);
- local_irq_restore(flags);
-}
-
-#endif /* _LINUX_PROPORTIONS_H */
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index cfc3ed4..b78d27c 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -74,6 +74,24 @@
PWM_POLARITY_INVERSED,
};
+/**
+ * struct pwm_args - board-dependent PWM arguments
+ * @period: reference period
+ * @polarity: reference polarity
+ *
+ * This structure describes board-dependent arguments attached to a PWM
+ * device. These arguments are usually retrieved from the PWM lookup table or
+ * device tree.
+ *
+ * Do not confuse this with the PWM state: PWM arguments represent the initial
+ * configuration that users want to use on this PWM device rather than the
+ * current PWM hardware state.
+ */
+struct pwm_args {
+ unsigned int period;
+ enum pwm_polarity polarity;
+};
+
enum {
PWMF_REQUESTED = 1 << 0,
PWMF_ENABLED = 1 << 1,
@@ -92,6 +110,7 @@
* @period: period of the PWM signal (in nanoseconds)
* @duty_cycle: duty cycle of the PWM signal (in nanoseconds)
* @polarity: polarity of the PWM signal
+ * @args: PWM arguments
*/
struct pwm_device {
const char *label;
@@ -105,6 +124,8 @@
unsigned int period;
unsigned int duty_cycle;
enum pwm_polarity polarity;
+
+ struct pwm_args args;
};
static inline bool pwm_is_enabled(const struct pwm_device *pwm)
@@ -144,6 +165,18 @@
return pwm ? pwm->polarity : PWM_POLARITY_NORMAL;
}
+static inline void pwm_get_args(const struct pwm_device *pwm,
+ struct pwm_args *args)
+{
+ *args = pwm->args;
+}
+
+static inline void pwm_apply_args(struct pwm_device *pwm)
+{
+ pwm_set_period(pwm, pwm->args.period);
+ pwm_set_polarity(pwm, pwm->args.polarity);
+}
+
/**
* struct pwm_ops - PWM controller operations
* @request: optional hook for requesting a PWM
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2657aff..5f1533e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -508,14 +508,7 @@
* CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
* critical section unless it can prove otherwise.
*/
-#ifdef CONFIG_PREEMPT_COUNT
int rcu_read_lock_sched_held(void);
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
- return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -532,18 +525,10 @@
return 1;
}
-#ifdef CONFIG_PREEMPT_COUNT
static inline int rcu_read_lock_sched_held(void)
{
- return preempt_count() != 0 || irqs_disabled();
+ return !preemptible();
}
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
- return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
-
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
#ifdef CONFIG_PROVE_RCU
@@ -1144,4 +1129,17 @@
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+/*
+ * Dump the ftrace buffer, but only one time per callsite per boot.
+ */
+#define rcu_ftrace_dump(oops_dump_mode) \
+do { \
+ static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
+ \
+ if (!atomic_read(&___rfd_beenhere) && \
+ !atomic_xchg(&___rfd_beenhere, 1)) \
+ ftrace_dump(oops_dump_mode); \
+} while (0)
+
+
#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 64809ae..93aea75 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -149,6 +149,22 @@
return 0;
}
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+ return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+ return 0;
+}
+
static inline void rcu_force_quiescent_state(void)
{
}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index ad1eda9..5043cb8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -87,6 +87,8 @@
unsigned long rcu_batches_completed(void);
unsigned long rcu_batches_completed_bh(void);
unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
void show_rcu_gp_kthreads(void);
void rcu_force_quiescent_state(void);
diff --git a/include/linux/regulator/act8865.h b/include/linux/regulator/act8865.h
index 2eb3860..113d861 100644
--- a/include/linux/regulator/act8865.h
+++ b/include/linux/regulator/act8865.h
@@ -69,11 +69,13 @@
* @id: regulator id
* @name: regulator name
* @init_data: regulator init data
+ * @of_node: device tree node (optional)
*/
struct act8865_regulator_data {
int id;
const char *name;
struct regulator_init_data *init_data;
+ struct device_node *of_node;
};
/**
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 4860350..80dc4e5 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -224,7 +224,7 @@
void regulator_bulk_free(int num_consumers,
struct regulator_bulk_data *consumers);
-int regulator_can_change_voltage(struct regulator *regulator);
+int __deprecated regulator_can_change_voltage(struct regulator *regulator);
int regulator_count_voltages(struct regulator *regulator);
int regulator_list_voltage(struct regulator *regulator, unsigned selector);
int regulator_is_supported_voltage(struct regulator *regulator,
@@ -436,7 +436,7 @@
{
}
-static inline int regulator_can_change_voltage(struct regulator *regulator)
+static inline int __deprecated regulator_can_change_voltage(struct regulator *regulator)
{
return 0;
}
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index cd271e8..fcfa40a 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -93,6 +93,9 @@
* @get_current_limit: Get the configured limit for a current-limited regulator.
* @set_input_current_limit: Configure an input limit.
*
+ * @set_over_current_protection: Support capability of automatically shutting
+ * down when detecting an over current event.
+ *
* @set_active_discharge: Set active discharge enable/disable of regulators.
*
* @set_mode: Set the configured operating mode for the regulator.
@@ -255,6 +258,8 @@
*
* @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
* @vsel_mask: Mask for register bitfield used for selector
+ * @csel_reg: Register for TPS65218 LS3 current regulator
+ * @csel_mask: Mask for TPS65218 LS3 current regulator
* @apply_reg: Register for initiate voltage change on the output when
* using regulator_set_voltage_sel_regmap
* @apply_bit: Register bitfield used for initiate voltage change on the
@@ -292,7 +297,7 @@
const struct regulator_desc *,
struct regulator_config *);
int id;
- bool continuous_voltage_range;
+ unsigned int continuous_voltage_range:1;
unsigned n_voltages;
const struct regulator_ops *ops;
int irq;
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 5d627c8..ad3e5158 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -97,6 +97,7 @@
* @ramp_disable: Disable ramp delay when initialising or when setting voltage.
* @soft_start: Enable soft start so that voltage ramps slowly.
* @pull_down: Enable pull down when regulator is disabled.
+ * @over_current_protection: Auto disable on over current event.
*
* @input_uV: Input voltage for regulator when supplied by another regulator.
*
diff --git a/include/linux/regulator/max8973-regulator.h b/include/linux/regulator/max8973-regulator.h
index f6a8a16..2fcb998 100644
--- a/include/linux/regulator/max8973-regulator.h
+++ b/include/linux/regulator/max8973-regulator.h
@@ -54,6 +54,10 @@
* @reg_init_data: The regulator init data.
* @control_flags: Control flags which are ORed value of above flags to
* configure device.
+ * @junction_temp_warning: Junction temp in millicelcius on which warning need
+ * to be set. Thermal functionality is only supported on
+ * MAX77621. The threshold warning supported by MAX77621
+ * are 120C and 140C.
* @enable_ext_control: Enable the voltage enable/disable through external
* control signal from EN input pin. If it is false then
* voltage output will be enabled/disabled through EN bit of
@@ -67,6 +71,7 @@
struct max8973_regulator_platform_data {
struct regulator_init_data *reg_init_data;
unsigned long control_flags;
+ unsigned long junction_temp_warning;
bool enable_ext_control;
int enable_gpio;
int dvs_gpio;
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 561e861..ae0528b 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -34,7 +34,7 @@
extern void __down_read(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
extern void __down_write(struct rw_semaphore *sem);
-extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+extern int __must_check __down_write_killable(struct rw_semaphore *sem);
extern int __down_write_trylock(struct rw_semaphore *sem);
extern void __up_read(struct rw_semaphore *sem);
extern void __up_write(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 8f498cd..d1c12d1 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -14,6 +14,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
+#include <linux/err.h>
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
#include <linux/osq_lock.h>
#endif
@@ -43,6 +44,7 @@
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
@@ -116,6 +118,7 @@
* lock for writing
*/
extern void down_write(struct rw_semaphore *sem);
+extern int __must_check down_write_killable(struct rw_semaphore *sem);
/*
* trylock for writing -- returns 1 if successful, 0 if contention
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52c4847..31bd0d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -40,7 +40,6 @@
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
-#include <linux/proportions.h>
#include <linux/seccomp.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
@@ -178,9 +177,11 @@
extern void calc_global_load(unsigned long ticks);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void update_cpu_load_nohz(int active);
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
#else
-static inline void update_cpu_load_nohz(int active) { }
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
#endif
extern void dump_cpu_task(int cpu);
@@ -372,6 +373,15 @@
extern void trap_init(void);
extern void update_process_times(int user);
extern void scheduler_tick(void);
+extern int sched_cpu_starting(unsigned int cpu);
+extern int sched_cpu_activate(unsigned int cpu);
+extern int sched_cpu_deactivate(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_dying(unsigned int cpu);
+#else
+# define sched_cpu_dying NULL
+#endif
extern void sched_show_task(struct task_struct *p);
@@ -935,9 +945,19 @@
};
/*
+ * Integer metrics need fixed point arithmetic, e.g., sched/fair
+ * has a few: load, load_avg, util_avg, freq, and capacity.
+ *
+ * We define a basic fixed point arithmetic range, and then formalize
+ * all these metrics based on that basic range.
+ */
+# define SCHED_FIXEDPOINT_SHIFT 10
+# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
+
+/*
* Increase resolution of cpu_capacity calculations
*/
-#define SCHED_CAPACITY_SHIFT 10
+#define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
/*
@@ -1199,18 +1219,56 @@
};
/*
- * The load_avg/util_avg accumulates an infinite geometric series.
- * 1) load_avg factors frequency scaling into the amount of time that a
- * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the
- * aggregated such weights of all runnable and blocked sched_entities.
- * 2) util_avg factors frequency and cpu scaling into the amount of time
- * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
- * For cfs_rq, it is the aggregated such times of all runnable and
+ * The load_avg/util_avg accumulates an infinite geometric series
+ * (see __update_load_avg() in kernel/sched/fair.c).
+ *
+ * [load_avg definition]
+ *
+ * load_avg = runnable% * scale_load_down(load)
+ *
+ * where runnable% is the time ratio that a sched_entity is runnable.
+ * For cfs_rq, it is the aggregated load_avg of all runnable and
* blocked sched_entities.
- * The 64 bit load_sum can:
- * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
- * the highest weight (=88761) always runnable, we should not overflow
- * 2) for entity, support any load.weight always runnable
+ *
+ * load_avg may also take frequency scaling into account:
+ *
+ * load_avg = runnable% * scale_load_down(load) * freq%
+ *
+ * where freq% is the CPU frequency normalized to the highest frequency.
+ *
+ * [util_avg definition]
+ *
+ * util_avg = running% * SCHED_CAPACITY_SCALE
+ *
+ * where running% is the time ratio that a sched_entity is running on
+ * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
+ * and blocked sched_entities.
+ *
+ * util_avg may also factor frequency scaling and CPU capacity scaling:
+ *
+ * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
+ *
+ * where freq% is the same as above, and capacity% is the CPU capacity
+ * normalized to the greatest capacity (due to uarch differences, etc).
+ *
+ * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
+ * themselves are in the range of [0, 1]. To do fixed point arithmetics,
+ * we therefore scale them to as large a range as necessary. This is for
+ * example reflected by util_avg's SCHED_CAPACITY_SCALE.
+ *
+ * [Overflow issue]
+ *
+ * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
+ * with the highest load (=88761), always runnable on a single cfs_rq,
+ * and should not overflow as the number already hits PID_MAX_LIMIT.
+ *
+ * For all other cases (including 32-bit kernels), struct load_weight's
+ * weight will overflow first before we do, because:
+ *
+ * Max(load_avg) <= Max(load.weight)
+ *
+ * Then it is the load_weight's responsibility to consider overflow
+ * issues.
*/
struct sched_avg {
u64 last_update_time, load_sum;
@@ -1596,6 +1654,7 @@
unsigned long sas_ss_sp;
size_t sas_ss_size;
+ unsigned sas_ss_flags;
struct callback_head *task_works;
@@ -1871,6 +1930,11 @@
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+static inline int tsk_nr_cpus_allowed(struct task_struct *p)
+{
+ return p->nr_cpus_allowed;
+}
+
#define TNF_MIGRATED 0x01
#define TNF_NO_GROUP 0x02
#define TNF_SHARED 0x04
@@ -2303,8 +2367,6 @@
/*
* See the comment in kernel/sched/clock.c
*/
-extern u64 cpu_clock(int cpu);
-extern u64 local_clock(void);
extern u64 running_clock(void);
extern u64 sched_clock_cpu(int cpu);
@@ -2323,6 +2385,16 @@
static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
{
}
+
+static inline u64 cpu_clock(int cpu)
+{
+ return sched_clock();
+}
+
+static inline u64 local_clock(void)
+{
+ return sched_clock();
+}
#else
/*
* Architectures can set this to 1 if they have specified
@@ -2337,6 +2409,26 @@
extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+/*
+ * As outlined in clock.c, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !! #
+ * ####################################################################
+ */
+static inline u64 cpu_clock(int cpu)
+{
+ return sched_clock_cpu(cpu);
+}
+
+static inline u64 local_clock(void)
+{
+ return sched_clock_cpu(raw_smp_processor_id());
+}
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -2575,6 +2667,18 @@
*/
static inline int on_sig_stack(unsigned long sp)
{
+ /*
+ * If the signal stack is SS_AUTODISARM then, by construction, we
+ * can't be on the signal stack unless user code deliberately set
+ * SS_AUTODISARM when we were already on it.
+ *
+ * This improves reliability: if user state gets corrupted such that
+ * the stack pointer points very close to the end of the signal stack,
+ * then this check will enable the signal to be handled anyway.
+ */
+ if (current->sas_ss_flags & SS_AUTODISARM)
+ return 0;
+
#ifdef CONFIG_STACK_GROWSUP
return sp >= current->sas_ss_sp &&
sp - current->sas_ss_sp < current->sas_ss_size;
@@ -2592,6 +2696,13 @@
return on_sig_stack(sp) ? SS_ONSTACK : 0;
}
+static inline void sas_ss_reset(struct task_struct *p)
+{
+ p->sas_ss_sp = 0;
+ p->sas_ss_size = 0;
+ p->sas_ss_flags = SS_DISABLE;
+}
+
static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
{
if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp))
@@ -3240,7 +3351,10 @@
u64 time, unsigned long util, unsigned long max);
};
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned long util, unsigned long max));
+void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
#endif
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 92557bb..3fbe814 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -432,8 +432,10 @@
stack_t __user *__uss = uss; \
struct task_struct *t = current; \
put_user_ex((void __user *)t->sas_ss_sp, &__uss->ss_sp); \
- put_user_ex(sas_ss_flags(sp), &__uss->ss_flags); \
+ put_user_ex(t->sas_ss_flags, &__uss->ss_flags); \
put_user_ex(t->sas_ss_size, &__uss->ss_size); \
+ if (t->sas_ss_flags & SS_AUTODISARM) \
+ sas_ss_reset(t); \
} while (0);
#ifdef CONFIG_PROC_FS
diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h
new file mode 100644
index 0000000..a72f9b9
--- /dev/null
+++ b/include/trace/events/mmc.h
@@ -0,0 +1,182 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmc
+
+#if !defined(_TRACE_MMC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMC_H
+
+#include <linux/blkdev.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/host.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mmc_request_start,
+
+ TP_PROTO(struct mmc_host *host, struct mmc_request *mrq),
+
+ TP_ARGS(host, mrq),
+
+ TP_STRUCT__entry(
+ __field(u32, cmd_opcode)
+ __field(u32, cmd_arg)
+ __field(unsigned int, cmd_flags)
+ __field(unsigned int, cmd_retries)
+ __field(u32, stop_opcode)
+ __field(u32, stop_arg)
+ __field(unsigned int, stop_flags)
+ __field(unsigned int, stop_retries)
+ __field(u32, sbc_opcode)
+ __field(u32, sbc_arg)
+ __field(unsigned int, sbc_flags)
+ __field(unsigned int, sbc_retries)
+ __field(unsigned int, blocks)
+ __field(unsigned int, blksz)
+ __field(unsigned int, data_flags)
+ __field(unsigned int, can_retune)
+ __field(unsigned int, doing_retune)
+ __field(unsigned int, retune_now)
+ __field(int, need_retune)
+ __field(int, hold_retune)
+ __field(unsigned int, retune_period)
+ __field(struct mmc_request *, mrq)
+ __string(name, mmc_hostname(host))
+ ),
+
+ TP_fast_assign(
+ __entry->cmd_opcode = mrq->cmd->opcode;
+ __entry->cmd_arg = mrq->cmd->arg;
+ __entry->cmd_flags = mrq->cmd->flags;
+ __entry->cmd_retries = mrq->cmd->retries;
+ __entry->stop_opcode = mrq->stop ? mrq->stop->opcode : 0;
+ __entry->stop_arg = mrq->stop ? mrq->stop->arg : 0;
+ __entry->stop_flags = mrq->stop ? mrq->stop->flags : 0;
+ __entry->stop_retries = mrq->stop ? mrq->stop->retries : 0;
+ __entry->sbc_opcode = mrq->sbc ? mrq->sbc->opcode : 0;
+ __entry->sbc_arg = mrq->sbc ? mrq->sbc->arg : 0;
+ __entry->sbc_flags = mrq->sbc ? mrq->sbc->flags : 0;
+ __entry->sbc_retries = mrq->sbc ? mrq->sbc->retries : 0;
+ __entry->blksz = mrq->data ? mrq->data->blksz : 0;
+ __entry->blocks = mrq->data ? mrq->data->blocks : 0;
+ __entry->data_flags = mrq->data ? mrq->data->flags : 0;
+ __entry->can_retune = host->can_retune;
+ __entry->doing_retune = host->doing_retune;
+ __entry->retune_now = host->retune_now;
+ __entry->need_retune = host->need_retune;
+ __entry->hold_retune = host->hold_retune;
+ __entry->retune_period = host->retune_period;
+ __assign_str(name, mmc_hostname(host));
+ __entry->mrq = mrq;
+ ),
+
+ TP_printk("%s: start struct mmc_request[%p]: "
+ "cmd_opcode=%u cmd_arg=0x%x cmd_flags=0x%x cmd_retries=%u "
+ "stop_opcode=%u stop_arg=0x%x stop_flags=0x%x stop_retries=%u "
+ "sbc_opcode=%u sbc_arg=0x%x sbc_flags=0x%x sbc_retires=%u "
+ "blocks=%u block_size=%u data_flags=0x%x "
+ "can_retune=%u doing_retune=%u retune_now=%u "
+ "need_retune=%d hold_retune=%d retune_period=%u",
+ __get_str(name), __entry->mrq,
+ __entry->cmd_opcode, __entry->cmd_arg,
+ __entry->cmd_flags, __entry->cmd_retries,
+ __entry->stop_opcode, __entry->stop_arg,
+ __entry->stop_flags, __entry->stop_retries,
+ __entry->sbc_opcode, __entry->sbc_arg,
+ __entry->sbc_flags, __entry->sbc_retries,
+ __entry->blocks, __entry->blksz, __entry->data_flags,
+ __entry->can_retune, __entry->doing_retune,
+ __entry->retune_now, __entry->need_retune,
+ __entry->hold_retune, __entry->retune_period)
+);
+
+TRACE_EVENT(mmc_request_done,
+
+ TP_PROTO(struct mmc_host *host, struct mmc_request *mrq),
+
+ TP_ARGS(host, mrq),
+
+ TP_STRUCT__entry(
+ __field(u32, cmd_opcode)
+ __field(int, cmd_err)
+ __array(u32, cmd_resp, 4)
+ __field(unsigned int, cmd_retries)
+ __field(u32, stop_opcode)
+ __field(int, stop_err)
+ __array(u32, stop_resp, 4)
+ __field(unsigned int, stop_retries)
+ __field(u32, sbc_opcode)
+ __field(int, sbc_err)
+ __array(u32, sbc_resp, 4)
+ __field(unsigned int, sbc_retries)
+ __field(unsigned int, bytes_xfered)
+ __field(int, data_err)
+ __field(unsigned int, can_retune)
+ __field(unsigned int, doing_retune)
+ __field(unsigned int, retune_now)
+ __field(int, need_retune)
+ __field(int, hold_retune)
+ __field(unsigned int, retune_period)
+ __field(struct mmc_request *, mrq)
+ __string(name, mmc_hostname(host))
+ ),
+
+ TP_fast_assign(
+ __entry->cmd_opcode = mrq->cmd->opcode;
+ __entry->cmd_err = mrq->cmd->error;
+ memcpy(__entry->cmd_resp, mrq->cmd->resp, 4);
+ __entry->cmd_retries = mrq->cmd->retries;
+ __entry->stop_opcode = mrq->stop ? mrq->stop->opcode : 0;
+ __entry->stop_err = mrq->stop ? mrq->stop->error : 0;
+ __entry->stop_resp[0] = mrq->stop ? mrq->stop->resp[0] : 0;
+ __entry->stop_resp[1] = mrq->stop ? mrq->stop->resp[1] : 0;
+ __entry->stop_resp[2] = mrq->stop ? mrq->stop->resp[2] : 0;
+ __entry->stop_resp[3] = mrq->stop ? mrq->stop->resp[3] : 0;
+ __entry->stop_retries = mrq->stop ? mrq->stop->retries : 0;
+ __entry->sbc_opcode = mrq->sbc ? mrq->sbc->opcode : 0;
+ __entry->sbc_err = mrq->sbc ? mrq->sbc->error : 0;
+ __entry->sbc_resp[0] = mrq->sbc ? mrq->sbc->resp[0] : 0;
+ __entry->sbc_resp[1] = mrq->sbc ? mrq->sbc->resp[1] : 0;
+ __entry->sbc_resp[2] = mrq->sbc ? mrq->sbc->resp[2] : 0;
+ __entry->sbc_resp[3] = mrq->sbc ? mrq->sbc->resp[3] : 0;
+ __entry->sbc_retries = mrq->sbc ? mrq->sbc->retries : 0;
+ __entry->bytes_xfered = mrq->data ? mrq->data->bytes_xfered : 0;
+ __entry->data_err = mrq->data ? mrq->data->error : 0;
+ __entry->can_retune = host->can_retune;
+ __entry->doing_retune = host->doing_retune;
+ __entry->retune_now = host->retune_now;
+ __entry->need_retune = host->need_retune;
+ __entry->hold_retune = host->hold_retune;
+ __entry->retune_period = host->retune_period;
+ __assign_str(name, mmc_hostname(host));
+ __entry->mrq = mrq;
+ ),
+
+ TP_printk("%s: end struct mmc_request[%p]: "
+ "cmd_opcode=%u cmd_err=%d cmd_resp=0x%x 0x%x 0x%x 0x%x "
+ "cmd_retries=%u stop_opcode=%u stop_err=%d "
+ "stop_resp=0x%x 0x%x 0x%x 0x%x stop_retries=%u "
+ "sbc_opcode=%u sbc_err=%d sbc_resp=0x%x 0x%x 0x%x 0x%x "
+ "sbc_retries=%u bytes_xfered=%u data_err=%d "
+ "can_retune=%u doing_retune=%u retune_now=%u need_retune=%d "
+ "hold_retune=%d retune_period=%u",
+ __get_str(name), __entry->mrq,
+ __entry->cmd_opcode, __entry->cmd_err,
+ __entry->cmd_resp[0], __entry->cmd_resp[1],
+ __entry->cmd_resp[2], __entry->cmd_resp[3],
+ __entry->cmd_retries,
+ __entry->stop_opcode, __entry->stop_err,
+ __entry->stop_resp[0], __entry->stop_resp[1],
+ __entry->stop_resp[2], __entry->stop_resp[3],
+ __entry->stop_retries,
+ __entry->sbc_opcode, __entry->sbc_err,
+ __entry->sbc_resp[0], __entry->sbc_resp[1],
+ __entry->sbc_resp[2], __entry->sbc_resp[3],
+ __entry->sbc_retries,
+ __entry->bytes_xfered, __entry->data_err,
+ __entry->can_retune, __entry->doing_retune,
+ __entry->retune_now, __entry->need_retune,
+ __entry->hold_retune, __entry->retune_period)
+);
+
+#endif /* _TRACE_MMC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ef72c4a..d3e7565 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -172,6 +172,77 @@
);
/*
+ * Tracepoint for expedited grace-period events. Takes a string identifying
+ * the RCU flavor, the expedited grace-period sequence number, and a string
+ * identifying the grace-period-related event as follows:
+ *
+ * "snap": Captured snapshot of expedited grace period sequence number.
+ * "start": Started a real expedited grace period.
+ * "end": Ended a real expedited grace period.
+ * "endwake": Woke piggybackers up.
+ * "done": Someone else did the expedited grace period for us.
+ */
+TRACE_EVENT(rcu_exp_grace_period,
+
+ TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent),
+
+ TP_ARGS(rcuname, gpseq, gpevent),
+
+ TP_STRUCT__entry(
+ __field(const char *, rcuname)
+ __field(unsigned long, gpseq)
+ __field(const char *, gpevent)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->gpseq = gpseq;
+ __entry->gpevent = gpevent;
+ ),
+
+ TP_printk("%s %lu %s",
+ __entry->rcuname, __entry->gpseq, __entry->gpevent)
+);
+
+/*
+ * Tracepoint for expedited grace-period funnel-locking events. Takes a
+ * string identifying the RCU flavor, an integer identifying the rcu_node
+ * combining-tree level, another pair of integers identifying the lowest-
+ * and highest-numbered CPU associated with the current rcu_node structure,
+ * and a string. identifying the grace-period-related event as follows:
+ *
+ * "nxtlvl": Advance to next level of rcu_node funnel
+ * "wait": Wait for someone else to do expedited GP
+ */
+TRACE_EVENT(rcu_exp_funnel_lock,
+
+ TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi,
+ const char *gpevent),
+
+ TP_ARGS(rcuname, level, grplo, grphi, gpevent),
+
+ TP_STRUCT__entry(
+ __field(const char *, rcuname)
+ __field(u8, level)
+ __field(int, grplo)
+ __field(int, grphi)
+ __field(const char *, gpevent)
+ ),
+
+ TP_fast_assign(
+ __entry->rcuname = rcuname;
+ __entry->level = level;
+ __entry->grplo = grplo;
+ __entry->grphi = grphi;
+ __entry->gpevent = gpevent;
+ ),
+
+ TP_printk("%s %d %d %d %s",
+ __entry->rcuname, __entry->level, __entry->grplo,
+ __entry->grphi, __entry->gpevent)
+);
+
+/*
* Tracepoint for RCU no-CBs CPU callback handoffs. This event is intended
* to assist debugging of these handoffs.
*
@@ -704,11 +775,15 @@
#else /* #ifdef CONFIG_RCU_TRACE */
#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
- qsmask) do { } while (0)
#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
level, grplo, grphi, event) \
do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+ qsmask) do { } while (0)
+#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
+ do { } while (0)
+#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
+ do { } while (0)
#define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1afe962..43fc8d2 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -340,7 +340,8 @@
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- __reserved_1 : 37;
+ write_backward : 1, /* Write ring buffer from end to beginning */
+ __reserved_1 : 36;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -401,6 +402,7 @@
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h
index e1bd50c2..cd0804b 100644
--- a/include/uapi/linux/signal.h
+++ b/include/uapi/linux/signal.h
@@ -7,4 +7,9 @@
#define SS_ONSTACK 1
#define SS_DISABLE 2
+/* bit-flags */
+#define SS_AUTODISARM (1U << 31) /* disable sas during sighandling */
+/* mask for all SS_xxx flags */
+#define SS_FLAG_BITS SS_AUTODISARM
+
#endif /* _UAPI_LINUX_SIGNAL_H */
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 499d9e9..f5a1954 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -66,7 +66,7 @@
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
value_size < 8 || value_size % 8 ||
- value_size / 8 > PERF_MAX_STACK_DEPTH)
+ value_size / 8 > sysctl_perf_event_max_stack)
return ERR_PTR(-EINVAL);
/* hash table size must be power of 2 */
@@ -124,8 +124,8 @@
struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
u32 max_depth = map->value_size / 8;
- /* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
- u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
+ /* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
+ u32 init_nr = sysctl_perf_event_max_stack - max_depth;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
u32 hash, id, trace_nr, trace_len;
bool user = flags & BPF_F_USER_STACK;
@@ -143,7 +143,7 @@
return -EFAULT;
/* get_perf_callchain() guarantees that trace->nr >= init_nr
- * and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
+ * and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
*/
trace_nr = trace->nr - init_nr;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3e3f6e4..d948e44 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -703,21 +703,6 @@
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int err;
- /*
- * By now we've cleared cpu_active_mask, wait for all preempt-disabled
- * and RCU users of this state to go away such that all new such users
- * will observe it.
- *
- * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
- * not imply sync_sched(), so wait for both.
- *
- * Do sync before park smpboot threads to take care the rcu boost case.
- */
- if (IS_ENABLED(CONFIG_PREEMPT))
- synchronize_rcu_mult(call_rcu, call_rcu_sched);
- else
- synchronize_rcu();
-
/* Park the smpboot threads */
kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
smpboot_park_threads(cpu);
@@ -923,8 +908,6 @@
st->state = CPUHP_AP_ONLINE_IDLE;
- /* The cpu is marked online, set it active now */
- set_cpu_active(cpu, true);
/* Unpark the stopper thread and the hotplug thread of this cpu */
stop_machine_unpark(cpu);
kthread_unpark(st->thread);
@@ -1236,6 +1219,12 @@
.name = "ap:offline",
.cant_stop = true,
},
+ /* First state is scheduler control. Interrupts are disabled */
+ [CPUHP_AP_SCHED_STARTING] = {
+ .name = "sched:starting",
+ .startup = sched_cpu_starting,
+ .teardown = sched_cpu_dying,
+ },
/*
* Low level startup/teardown notifiers. Run with interrupts
* disabled. Will be removed once the notifiers are converted to
@@ -1274,6 +1263,15 @@
* The dynamically registered state space is here
*/
+#ifdef CONFIG_SMP
+ /* Last state is scheduler control setting the cpu active */
+ [CPUHP_AP_ACTIVE] = {
+ .name = "sched:active",
+ .startup = sched_cpu_activate,
+ .teardown = sched_cpu_deactivate,
+ },
+#endif
+
/* CPU is fully up and running. */
[CPUHP_ONLINE] = {
.name = "online",
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 343c22f..b9325e7 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -18,6 +18,14 @@
struct perf_callchain_entry *cpu_entries[0];
};
+int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
+
+static inline size_t perf_callchain_entry__sizeof(void)
+{
+ return (sizeof(struct perf_callchain_entry) +
+ sizeof(__u64) * sysctl_perf_event_max_stack);
+}
+
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
static atomic_t nr_callchain_events;
static DEFINE_MUTEX(callchain_mutex);
@@ -73,7 +81,7 @@
if (!entries)
return -ENOMEM;
- size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+ size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
for_each_possible_cpu(cpu) {
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@@ -147,7 +155,8 @@
cpu = smp_processor_id();
- return &entries->cpu_entries[cpu][*rctx];
+ return (((void *)entries->cpu_entries[cpu]) +
+ (*rctx * perf_callchain_entry__sizeof()));
}
static void
@@ -215,3 +224,25 @@
return entry;
}
+
+int perf_event_max_stack_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int new_value = sysctl_perf_event_max_stack, ret;
+ struct ctl_table new_table = *table;
+
+ new_table.data = &new_value;
+ ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
+ if (ret || !write)
+ return ret;
+
+ mutex_lock(&callchain_mutex);
+ if (atomic_read(&nr_callchain_events))
+ ret = -EBUSY;
+ else
+ sysctl_perf_event_max_stack = new_value;
+
+ mutex_unlock(&callchain_mutex);
+
+ return ret;
+}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c0ded24..050a290 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -44,6 +44,8 @@
#include <linux/compat.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/namei.h>
+#include <linux/parser.h>
#include "internal.h"
@@ -1927,8 +1929,13 @@
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
- event->state = PERF_EVENT_STATE_ACTIVE;
- event->oncpu = smp_processor_id();
+ WRITE_ONCE(event->oncpu, smp_processor_id());
+ /*
+ * Order event::oncpu write to happen before the ACTIVE state
+ * is visible.
+ */
+ smp_wmb();
+ WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE);
/*
* Unthrottle events, since we scheduled we might have missed several
@@ -2360,6 +2367,112 @@
}
EXPORT_SYMBOL_GPL(perf_event_enable);
+struct stop_event_data {
+ struct perf_event *event;
+ unsigned int restart;
+};
+
+static int __perf_event_stop(void *info)
+{
+ struct stop_event_data *sd = info;
+ struct perf_event *event = sd->event;
+
+ /* if it's already INACTIVE, do nothing */
+ if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+ return 0;
+
+ /* matches smp_wmb() in event_sched_in() */
+ smp_rmb();
+
+ /*
+ * There is a window with interrupts enabled before we get here,
+ * so we need to check again lest we try to stop another CPU's event.
+ */
+ if (READ_ONCE(event->oncpu) != smp_processor_id())
+ return -EAGAIN;
+
+ event->pmu->stop(event, PERF_EF_UPDATE);
+
+ /*
+ * May race with the actual stop (through perf_pmu_output_stop()),
+ * but it is only used for events with AUX ring buffer, and such
+ * events will refuse to restart because of rb::aux_mmap_count==0,
+ * see comments in perf_aux_output_begin().
+ *
+ * Since this is happening on a event-local CPU, no trace is lost
+ * while restarting.
+ */
+ if (sd->restart)
+ event->pmu->start(event, PERF_EF_START);
+
+ return 0;
+}
+
+static int perf_event_restart(struct perf_event *event)
+{
+ struct stop_event_data sd = {
+ .event = event,
+ .restart = 1,
+ };
+ int ret = 0;
+
+ do {
+ if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+ return 0;
+
+ /* matches smp_wmb() in event_sched_in() */
+ smp_rmb();
+
+ /*
+ * We only want to restart ACTIVE events, so if the event goes
+ * inactive here (event->oncpu==-1), there's nothing more to do;
+ * fall through with ret==-ENXIO.
+ */
+ ret = cpu_function_call(READ_ONCE(event->oncpu),
+ __perf_event_stop, &sd);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+/*
+ * In order to contain the amount of racy and tricky in the address filter
+ * configuration management, it is a two part process:
+ *
+ * (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
+ * we update the addresses of corresponding vmas in
+ * event::addr_filters_offs array and bump the event::addr_filters_gen;
+ * (p2) when an event is scheduled in (pmu::add), it calls
+ * perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
+ * if the generation has changed since the previous call.
+ *
+ * If (p1) happens while the event is active, we restart it to force (p2).
+ *
+ * (1) perf_addr_filters_apply(): adjusting filters' offsets based on
+ * pre-existing mappings, called once when new filters arrive via SET_FILTER
+ * ioctl;
+ * (2) perf_addr_filters_adjust(): adjusting filters' offsets based on newly
+ * registered mapping, called for every new mmap(), with mm::mmap_sem down
+ * for reading;
+ * (3) perf_event_addr_filters_exec(): clearing filters' offsets in the process
+ * of exec.
+ */
+void perf_event_addr_filters_sync(struct perf_event *event)
+{
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+
+ if (!has_addr_filter(event))
+ return;
+
+ raw_spin_lock(&ifh->lock);
+ if (event->addr_filters_gen != event->hw.addr_filters_gen) {
+ event->pmu->addr_filters_sync(event);
+ event->hw.addr_filters_gen = event->addr_filters_gen;
+ }
+ raw_spin_unlock(&ifh->lock);
+}
+EXPORT_SYMBOL_GPL(perf_event_addr_filters_sync);
+
static int _perf_event_refresh(struct perf_event *event, int refresh)
{
/*
@@ -3209,16 +3322,6 @@
put_ctx(clone_ctx);
}
-void perf_event_exec(void)
-{
- int ctxn;
-
- rcu_read_lock();
- for_each_task_context_nr(ctxn)
- perf_event_enable_on_exec(ctxn);
- rcu_read_unlock();
-}
-
struct perf_read_data {
struct perf_event *event;
bool group;
@@ -3720,6 +3823,9 @@
return true;
}
+static void perf_addr_filters_splice(struct perf_event *event,
+ struct list_head *head);
+
static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending);
@@ -3747,6 +3853,8 @@
}
perf_event_free_bpf_prog(event);
+ perf_addr_filters_splice(event, NULL);
+ kfree(event->addr_filters_offs);
if (event->destroy)
event->destroy(event);
@@ -4343,6 +4451,19 @@
case PERF_EVENT_IOC_SET_BPF:
return perf_event_set_bpf_prog(event, arg);
+ case PERF_EVENT_IOC_PAUSE_OUTPUT: {
+ struct ring_buffer *rb;
+
+ rcu_read_lock();
+ rb = rcu_dereference(event->rb);
+ if (!rb || !rb->nr_pages) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ rb_toggle_paused(rb, !!arg);
+ rcu_read_unlock();
+ return 0;
+ }
default:
return -ENOTTY;
}
@@ -4659,6 +4780,8 @@
event->pmu->event_mapped(event);
}
+static void perf_pmu_output_stop(struct perf_event *event);
+
/*
* A buffer can be mmap()ed multiple times; either directly through the same
* event, or through other events by use of perf_event_set_output().
@@ -4686,10 +4809,22 @@
*/
if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+ /*
+ * Stop all AUX events that are writing to this buffer,
+ * so that we can free its AUX pages and corresponding PMU
+ * data. Note that after rb::aux_mmap_count dropped to zero,
+ * they won't start any more (see perf_aux_output_begin()).
+ */
+ perf_pmu_output_stop(event);
+
+ /* now it's safe to free the pages */
atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+ /* this has to be the last one */
rb_free_aux(rb);
+ WARN_ON_ONCE(atomic_read(&rb->aux_refcount));
+
mutex_unlock(&event->mmap_mutex);
}
@@ -5630,9 +5765,13 @@
}
}
-void perf_event_output(struct perf_event *event,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static void __always_inline
+__perf_event_output(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs,
+ int (*output_begin)(struct perf_output_handle *,
+ struct perf_event *,
+ unsigned int))
{
struct perf_output_handle handle;
struct perf_event_header header;
@@ -5642,7 +5781,7 @@
perf_prepare_sample(&header, data, event, regs);
- if (perf_output_begin(&handle, event, header.size))
+ if (output_begin(&handle, event, header.size))
goto exit;
perf_output_sample(&handle, &header, data, event);
@@ -5653,6 +5792,30 @@
rcu_read_unlock();
}
+void
+perf_event_output_forward(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ __perf_event_output(event, data, regs, perf_output_begin_forward);
+}
+
+void
+perf_event_output_backward(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ __perf_event_output(event, data, regs, perf_output_begin_backward);
+}
+
+void
+perf_event_output(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ __perf_event_output(event, data, regs, perf_output_begin);
+}
+
/*
* read event_id
*/
@@ -5698,15 +5861,18 @@
static void
perf_event_aux_ctx(struct perf_event_context *ctx,
perf_event_aux_output_cb output,
- void *data)
+ void *data, bool all)
{
struct perf_event *event;
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
- if (event->state < PERF_EVENT_STATE_INACTIVE)
- continue;
- if (!event_filter_match(event))
- continue;
+ if (!all) {
+ if (event->state < PERF_EVENT_STATE_INACTIVE)
+ continue;
+ if (!event_filter_match(event))
+ continue;
+ }
+
output(event, data);
}
}
@@ -5717,7 +5883,7 @@
{
rcu_read_lock();
preempt_disable();
- perf_event_aux_ctx(task_ctx, output, data);
+ perf_event_aux_ctx(task_ctx, output, data, false);
preempt_enable();
rcu_read_unlock();
}
@@ -5747,13 +5913,13 @@
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
if (cpuctx->unique_pmu != pmu)
goto next;
- perf_event_aux_ctx(&cpuctx->ctx, output, data);
+ perf_event_aux_ctx(&cpuctx->ctx, output, data, false);
ctxn = pmu->task_ctx_nr;
if (ctxn < 0)
goto next;
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
if (ctx)
- perf_event_aux_ctx(ctx, output, data);
+ perf_event_aux_ctx(ctx, output, data, false);
next:
put_cpu_ptr(pmu->pmu_cpu_context);
}
@@ -5761,6 +5927,134 @@
}
/*
+ * Clear all file-based filters at exec, they'll have to be
+ * re-instated when/if these objects are mmapped again.
+ */
+static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
+{
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ struct perf_addr_filter *filter;
+ unsigned int restart = 0, count = 0;
+ unsigned long flags;
+
+ if (!has_addr_filter(event))
+ return;
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ list_for_each_entry(filter, &ifh->list, entry) {
+ if (filter->inode) {
+ event->addr_filters_offs[count] = 0;
+ restart++;
+ }
+
+ count++;
+ }
+
+ if (restart)
+ event->addr_filters_gen++;
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ if (restart)
+ perf_event_restart(event);
+}
+
+void perf_event_exec(void)
+{
+ struct perf_event_context *ctx;
+ int ctxn;
+
+ rcu_read_lock();
+ for_each_task_context_nr(ctxn) {
+ ctx = current->perf_event_ctxp[ctxn];
+ if (!ctx)
+ continue;
+
+ perf_event_enable_on_exec(ctxn);
+
+ perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL,
+ true);
+ }
+ rcu_read_unlock();
+}
+
+struct remote_output {
+ struct ring_buffer *rb;
+ int err;
+};
+
+static void __perf_event_output_stop(struct perf_event *event, void *data)
+{
+ struct perf_event *parent = event->parent;
+ struct remote_output *ro = data;
+ struct ring_buffer *rb = ro->rb;
+ struct stop_event_data sd = {
+ .event = event,
+ };
+
+ if (!has_aux(event))
+ return;
+
+ if (!parent)
+ parent = event;
+
+ /*
+ * In case of inheritance, it will be the parent that links to the
+ * ring-buffer, but it will be the child that's actually using it:
+ */
+ if (rcu_dereference(parent->rb) == rb)
+ ro->err = __perf_event_stop(&sd);
+}
+
+static int __perf_pmu_output_stop(void *info)
+{
+ struct perf_event *event = info;
+ struct pmu *pmu = event->pmu;
+ struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+ struct remote_output ro = {
+ .rb = event->rb,
+ };
+
+ rcu_read_lock();
+ perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false);
+ if (cpuctx->task_ctx)
+ perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop,
+ &ro, false);
+ rcu_read_unlock();
+
+ return ro.err;
+}
+
+static void perf_pmu_output_stop(struct perf_event *event)
+{
+ struct perf_event *iter;
+ int err, cpu;
+
+restart:
+ rcu_read_lock();
+ list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) {
+ /*
+ * For per-CPU events, we need to make sure that neither they
+ * nor their children are running; for cpu==-1 events it's
+ * sufficient to stop the event itself if it's active, since
+ * it can't have children.
+ */
+ cpu = iter->cpu;
+ if (cpu == -1)
+ cpu = READ_ONCE(iter->oncpu);
+
+ if (cpu == -1)
+ continue;
+
+ err = cpu_function_call(cpu, __perf_pmu_output_stop, event);
+ if (err == -EAGAIN) {
+ rcu_read_unlock();
+ goto restart;
+ }
+ }
+ rcu_read_unlock();
+}
+
+/*
* task tracking -- fork/exit
*
* enabled by: attr.comm | attr.mmap | attr.mmap2 | attr.mmap_data | attr.task
@@ -6169,6 +6463,87 @@
kfree(buf);
}
+/*
+ * Whether this @filter depends on a dynamic object which is not loaded
+ * yet or its load addresses are not known.
+ */
+static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
+{
+ return filter->filter && filter->inode;
+}
+
+/*
+ * Check whether inode and address range match filter criteria.
+ */
+static bool perf_addr_filter_match(struct perf_addr_filter *filter,
+ struct file *file, unsigned long offset,
+ unsigned long size)
+{
+ if (filter->inode != file->f_inode)
+ return false;
+
+ if (filter->offset > offset + size)
+ return false;
+
+ if (filter->offset + filter->size < offset)
+ return false;
+
+ return true;
+}
+
+static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
+{
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ struct vm_area_struct *vma = data;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
+ struct file *file = vma->vm_file;
+ struct perf_addr_filter *filter;
+ unsigned int restart = 0, count = 0;
+
+ if (!has_addr_filter(event))
+ return;
+
+ if (!file)
+ return;
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ list_for_each_entry(filter, &ifh->list, entry) {
+ if (perf_addr_filter_match(filter, file, off,
+ vma->vm_end - vma->vm_start)) {
+ event->addr_filters_offs[count] = vma->vm_start;
+ restart++;
+ }
+
+ count++;
+ }
+
+ if (restart)
+ event->addr_filters_gen++;
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ if (restart)
+ perf_event_restart(event);
+}
+
+/*
+ * Adjust all task's events' filters to the new vma
+ */
+static void perf_addr_filters_adjust(struct vm_area_struct *vma)
+{
+ struct perf_event_context *ctx;
+ int ctxn;
+
+ rcu_read_lock();
+ for_each_task_context_nr(ctxn) {
+ ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+ if (!ctx)
+ continue;
+
+ perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+ }
+ rcu_read_unlock();
+}
+
void perf_event_mmap(struct vm_area_struct *vma)
{
struct perf_mmap_event mmap_event;
@@ -6200,6 +6575,7 @@
/* .flags (attr_mmap2 only) */
};
+ perf_addr_filters_adjust(vma);
perf_event_mmap_event(&mmap_event);
}
@@ -6491,10 +6867,7 @@
irq_work_queue(&event->pending);
}
- if (event->overflow_handler)
- event->overflow_handler(event, data, regs);
- else
- perf_event_output(event, data, regs);
+ event->overflow_handler(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
@@ -7081,24 +7454,6 @@
perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
}
-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
-{
- char *filter_str;
- int ret;
-
- if (event->attr.type != PERF_TYPE_TRACEPOINT)
- return -EINVAL;
-
- filter_str = strndup_user(arg, PAGE_SIZE);
- if (IS_ERR(filter_str))
- return PTR_ERR(filter_str);
-
- ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
-
- kfree(filter_str);
- return ret;
-}
-
static void perf_event_free_filter(struct perf_event *event)
{
ftrace_profile_free_filter(event);
@@ -7153,11 +7508,6 @@
{
}
-static int perf_event_set_filter(struct perf_event *event, void __user *arg)
-{
- return -ENOENT;
-}
-
static void perf_event_free_filter(struct perf_event *event)
{
}
@@ -7186,6 +7536,387 @@
#endif
/*
+ * Allocate a new address filter
+ */
+static struct perf_addr_filter *
+perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
+{
+ int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
+ struct perf_addr_filter *filter;
+
+ filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
+ if (!filter)
+ return NULL;
+
+ INIT_LIST_HEAD(&filter->entry);
+ list_add_tail(&filter->entry, filters);
+
+ return filter;
+}
+
+static void free_filters_list(struct list_head *filters)
+{
+ struct perf_addr_filter *filter, *iter;
+
+ list_for_each_entry_safe(filter, iter, filters, entry) {
+ if (filter->inode)
+ iput(filter->inode);
+ list_del(&filter->entry);
+ kfree(filter);
+ }
+}
+
+/*
+ * Free existing address filters and optionally install new ones
+ */
+static void perf_addr_filters_splice(struct perf_event *event,
+ struct list_head *head)
+{
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ if (!has_addr_filter(event))
+ return;
+
+ /* don't bother with children, they don't have their own filters */
+ if (event->parent)
+ return;
+
+ raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
+
+ list_splice_init(&event->addr_filters.list, &list);
+ if (head)
+ list_splice(head, &event->addr_filters.list);
+
+ raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
+
+ free_filters_list(&list);
+}
+
+/*
+ * Scan through mm's vmas and see if one of them matches the
+ * @filter; if so, adjust filter's address range.
+ * Called with mm::mmap_sem down for reading.
+ */
+static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
+ struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ struct file *file = vma->vm_file;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
+
+ if (!file)
+ continue;
+
+ if (!perf_addr_filter_match(filter, file, off, vma_size))
+ continue;
+
+ return vma->vm_start;
+ }
+
+ return 0;
+}
+
+/*
+ * Update event's address range filters based on the
+ * task's existing mappings, if any.
+ */
+static void perf_event_addr_filters_apply(struct perf_event *event)
+{
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ struct task_struct *task = READ_ONCE(event->ctx->task);
+ struct perf_addr_filter *filter;
+ struct mm_struct *mm = NULL;
+ unsigned int count = 0;
+ unsigned long flags;
+
+ /*
+ * We may observe TASK_TOMBSTONE, which means that the event tear-down
+ * will stop on the parent's child_mutex that our caller is also holding
+ */
+ if (task == TASK_TOMBSTONE)
+ return;
+
+ mm = get_task_mm(event->ctx->task);
+ if (!mm)
+ goto restart;
+
+ down_read(&mm->mmap_sem);
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ list_for_each_entry(filter, &ifh->list, entry) {
+ event->addr_filters_offs[count] = 0;
+
+ if (perf_addr_filter_needs_mmap(filter))
+ event->addr_filters_offs[count] =
+ perf_addr_filter_apply(filter, mm);
+
+ count++;
+ }
+
+ event->addr_filters_gen++;
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ up_read(&mm->mmap_sem);
+
+ mmput(mm);
+
+restart:
+ perf_event_restart(event);
+}
+
+/*
+ * Address range filtering: limiting the data to certain
+ * instruction address ranges. Filters are ioctl()ed to us from
+ * userspace as ascii strings.
+ *
+ * Filter string format:
+ *
+ * ACTION RANGE_SPEC
+ * where ACTION is one of the
+ * * "filter": limit the trace to this region
+ * * "start": start tracing from this address
+ * * "stop": stop tracing at this address/region;
+ * RANGE_SPEC is
+ * * for kernel addresses: <start address>[/<size>]
+ * * for object files: <start address>[/<size>]@</path/to/object/file>
+ *
+ * if <size> is not specified, the range is treated as a single address.
+ */
+enum {
+ IF_ACT_FILTER,
+ IF_ACT_START,
+ IF_ACT_STOP,
+ IF_SRC_FILE,
+ IF_SRC_KERNEL,
+ IF_SRC_FILEADDR,
+ IF_SRC_KERNELADDR,
+};
+
+enum {
+ IF_STATE_ACTION = 0,
+ IF_STATE_SOURCE,
+ IF_STATE_END,
+};
+
+static const match_table_t if_tokens = {
+ { IF_ACT_FILTER, "filter" },
+ { IF_ACT_START, "start" },
+ { IF_ACT_STOP, "stop" },
+ { IF_SRC_FILE, "%u/%u@%s" },
+ { IF_SRC_KERNEL, "%u/%u" },
+ { IF_SRC_FILEADDR, "%u@%s" },
+ { IF_SRC_KERNELADDR, "%u" },
+};
+
+/*
+ * Address filter string parser
+ */
+static int
+perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
+ struct list_head *filters)
+{
+ struct perf_addr_filter *filter = NULL;
+ char *start, *orig, *filename = NULL;
+ struct path path;
+ substring_t args[MAX_OPT_ARGS];
+ int state = IF_STATE_ACTION, token;
+ unsigned int kernel = 0;
+ int ret = -EINVAL;
+
+ orig = fstr = kstrdup(fstr, GFP_KERNEL);
+ if (!fstr)
+ return -ENOMEM;
+
+ while ((start = strsep(&fstr, " ,\n")) != NULL) {
+ ret = -EINVAL;
+
+ if (!*start)
+ continue;
+
+ /* filter definition begins */
+ if (state == IF_STATE_ACTION) {
+ filter = perf_addr_filter_new(event, filters);
+ if (!filter)
+ goto fail;
+ }
+
+ token = match_token(start, if_tokens, args);
+ switch (token) {
+ case IF_ACT_FILTER:
+ case IF_ACT_START:
+ filter->filter = 1;
+
+ case IF_ACT_STOP:
+ if (state != IF_STATE_ACTION)
+ goto fail;
+
+ state = IF_STATE_SOURCE;
+ break;
+
+ case IF_SRC_KERNELADDR:
+ case IF_SRC_KERNEL:
+ kernel = 1;
+
+ case IF_SRC_FILEADDR:
+ case IF_SRC_FILE:
+ if (state != IF_STATE_SOURCE)
+ goto fail;
+
+ if (token == IF_SRC_FILE || token == IF_SRC_KERNEL)
+ filter->range = 1;
+
+ *args[0].to = 0;
+ ret = kstrtoul(args[0].from, 0, &filter->offset);
+ if (ret)
+ goto fail;
+
+ if (filter->range) {
+ *args[1].to = 0;
+ ret = kstrtoul(args[1].from, 0, &filter->size);
+ if (ret)
+ goto fail;
+ }
+
+ if (token == IF_SRC_FILE) {
+ filename = match_strdup(&args[2]);
+ if (!filename) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ state = IF_STATE_END;
+ break;
+
+ default:
+ goto fail;
+ }
+
+ /*
+ * Filter definition is fully parsed, validate and install it.
+ * Make sure that it doesn't contradict itself or the event's
+ * attribute.
+ */
+ if (state == IF_STATE_END) {
+ if (kernel && event->attr.exclude_kernel)
+ goto fail;
+
+ if (!kernel) {
+ if (!filename)
+ goto fail;
+
+ /* look up the path and grab its inode */
+ ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+ if (ret)
+ goto fail_free_name;
+
+ filter->inode = igrab(d_inode(path.dentry));
+ path_put(&path);
+ kfree(filename);
+ filename = NULL;
+
+ ret = -EINVAL;
+ if (!filter->inode ||
+ !S_ISREG(filter->inode->i_mode))
+ /* free_filters_list() will iput() */
+ goto fail;
+ }
+
+ /* ready to consume more filters */
+ state = IF_STATE_ACTION;
+ filter = NULL;
+ }
+ }
+
+ if (state != IF_STATE_ACTION)
+ goto fail;
+
+ kfree(orig);
+
+ return 0;
+
+fail_free_name:
+ kfree(filename);
+fail:
+ free_filters_list(filters);
+ kfree(orig);
+
+ return ret;
+}
+
+static int
+perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
+{
+ LIST_HEAD(filters);
+ int ret;
+
+ /*
+ * Since this is called in perf_ioctl() path, we're already holding
+ * ctx::mutex.
+ */
+ lockdep_assert_held(&event->ctx->mutex);
+
+ if (WARN_ON_ONCE(event->parent))
+ return -EINVAL;
+
+ /*
+ * For now, we only support filtering in per-task events; doing so
+ * for CPU-wide events requires additional context switching trickery,
+ * since same object code will be mapped at different virtual
+ * addresses in different processes.
+ */
+ if (!event->ctx->task)
+ return -EOPNOTSUPP;
+
+ ret = perf_event_parse_addr_filter(event, filter_str, &filters);
+ if (ret)
+ return ret;
+
+ ret = event->pmu->addr_filters_validate(&filters);
+ if (ret) {
+ free_filters_list(&filters);
+ return ret;
+ }
+
+ /* remove existing filters, if any */
+ perf_addr_filters_splice(event, &filters);
+
+ /* install new filters */
+ perf_event_for_each_child(event, perf_event_addr_filters_apply);
+
+ return ret;
+}
+
+static int perf_event_set_filter(struct perf_event *event, void __user *arg)
+{
+ char *filter_str;
+ int ret = -EINVAL;
+
+ if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
+ !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
+ !has_addr_filter(event))
+ return -EINVAL;
+
+ filter_str = strndup_user(arg, PAGE_SIZE);
+ if (IS_ERR(filter_str))
+ return PTR_ERR(filter_str);
+
+ if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
+ event->attr.type == PERF_TYPE_TRACEPOINT)
+ ret = ftrace_profile_set_filter(event, event->attr.config,
+ filter_str);
+ else if (has_addr_filter(event))
+ ret = perf_event_set_addr_filter(event, filter_str);
+
+ kfree(filter_str);
+ return ret;
+}
+
+/*
* hrtimer based swevent callback
*/
@@ -7542,6 +8273,20 @@
out:
mutex_unlock(&pmus_lock);
}
+
+/*
+ * Let userspace know that this PMU supports address range filtering:
+ */
+static ssize_t nr_addr_filters_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+
+ return snprintf(page, PAGE_SIZE - 1, "%d\n", pmu->nr_addr_filters);
+}
+DEVICE_ATTR_RO(nr_addr_filters);
+
static struct idr pmu_idr;
static ssize_t
@@ -7643,9 +8388,19 @@
if (ret)
goto free_dev;
+ /* For PMUs with address filters, throw in an extra attribute: */
+ if (pmu->nr_addr_filters)
+ ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters);
+
+ if (ret)
+ goto del_dev;
+
out:
return ret;
+del_dev:
+ device_del(pmu->dev);
+
free_dev:
put_device(pmu->dev);
goto out;
@@ -7685,6 +8440,21 @@
}
skip_type:
+ if (pmu->task_ctx_nr == perf_hw_context) {
+ static int hw_context_taken = 0;
+
+ /*
+ * Other than systems with heterogeneous CPUs, it never makes
+ * sense for two PMUs to share perf_hw_context. PMUs which are
+ * uncore must use perf_invalid_context.
+ */
+ if (WARN_ON_ONCE(hw_context_taken &&
+ !(pmu->capabilities & PERF_PMU_CAP_HETEROGENEOUS_CPUS)))
+ pmu->task_ctx_nr = perf_invalid_context;
+
+ hw_context_taken = 1;
+ }
+
pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr);
if (pmu->pmu_cpu_context)
goto got_cpu_context;
@@ -7772,6 +8542,8 @@
free_percpu(pmu->pmu_disable_count);
if (pmu->type >= PERF_TYPE_MAX)
idr_remove(&pmu_idr, pmu->type);
+ if (pmu->nr_addr_filters)
+ device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
device_del(pmu->dev);
put_device(pmu->dev);
free_pmu_context(pmu);
@@ -7965,6 +8737,7 @@
INIT_LIST_HEAD(&event->sibling_list);
INIT_LIST_HEAD(&event->rb_entry);
INIT_LIST_HEAD(&event->active_entry);
+ INIT_LIST_HEAD(&event->addr_filters.list);
INIT_HLIST_NODE(&event->hlist_entry);
@@ -7972,6 +8745,7 @@
init_irq_work(&event->pending, perf_pending_event);
mutex_init(&event->mmap_mutex);
+ raw_spin_lock_init(&event->addr_filters.lock);
atomic_long_set(&event->refcount, 1);
event->cpu = cpu;
@@ -8006,8 +8780,16 @@
context = parent_event->overflow_handler_context;
}
- event->overflow_handler = overflow_handler;
- event->overflow_handler_context = context;
+ if (overflow_handler) {
+ event->overflow_handler = overflow_handler;
+ event->overflow_handler_context = context;
+ } else if (is_write_backward(event)){
+ event->overflow_handler = perf_event_output_backward;
+ event->overflow_handler_context = NULL;
+ } else {
+ event->overflow_handler = perf_event_output_forward;
+ event->overflow_handler_context = NULL;
+ }
perf_event__state_init(event);
@@ -8048,11 +8830,22 @@
if (err)
goto err_pmu;
+ if (has_addr_filter(event)) {
+ event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!event->addr_filters_offs)
+ goto err_per_task;
+
+ /* force hw sync on the address filters */
+ event->addr_filters_gen = 1;
+ }
+
if (!event->parent) {
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
err = get_callchain_buffers();
if (err)
- goto err_per_task;
+ goto err_addr_filters;
}
}
@@ -8061,6 +8854,9 @@
return event;
+err_addr_filters:
+ kfree(event->addr_filters_offs);
+
err_per_task:
exclusive_event_destroy(event);
@@ -8240,6 +9036,13 @@
goto out;
/*
+ * Either writing ring buffer from beginning or from end.
+ * Mixing is not allowed.
+ */
+ if (is_write_backward(output_event) != is_write_backward(event))
+ goto out;
+
+ /*
* If both events generate aux data, they must be on the same PMU
*/
if (has_aux(event) && has_aux(output_event) &&
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 4199b6d..05f9f6d 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -11,13 +11,13 @@
struct ring_buffer {
atomic_t refcount;
struct rcu_head rcu_head;
- struct irq_work irq_work;
#ifdef CONFIG_PERF_USE_VMALLOC
struct work_struct work;
int page_order; /* allocation order */
#endif
int nr_pages; /* nr of data pages */
int overwrite; /* can overwrite itself */
+ int paused; /* can write into ring buffer */
atomic_t poll; /* POLL_ for wakeups */
@@ -65,6 +65,14 @@
rb_free(rb);
}
+static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause)
+{
+ if (!pause && rb->nr_pages)
+ rb->paused = 0;
+ else
+ rb->paused = 1;
+}
+
extern struct ring_buffer *
rb_alloc(int nr_pages, long watermark, int cpu, int flags);
extern void perf_event_wakeup(struct perf_event *event);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 7611d0f..ae9b90d 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -102,8 +102,21 @@
preempt_enable();
}
-int perf_output_begin(struct perf_output_handle *handle,
- struct perf_event *event, unsigned int size)
+static bool __always_inline
+ring_buffer_has_space(unsigned long head, unsigned long tail,
+ unsigned long data_size, unsigned int size,
+ bool backward)
+{
+ if (!backward)
+ return CIRC_SPACE(head, tail, data_size) >= size;
+ else
+ return CIRC_SPACE(tail, head, data_size) >= size;
+}
+
+static int __always_inline
+__perf_output_begin(struct perf_output_handle *handle,
+ struct perf_event *event, unsigned int size,
+ bool backward)
{
struct ring_buffer *rb;
unsigned long tail, offset, head;
@@ -125,8 +138,11 @@
if (unlikely(!rb))
goto out;
- if (unlikely(!rb->nr_pages))
+ if (unlikely(rb->paused)) {
+ if (rb->nr_pages)
+ local_inc(&rb->lost);
goto out;
+ }
handle->rb = rb;
handle->event = event;
@@ -143,9 +159,12 @@
do {
tail = READ_ONCE(rb->user_page->data_tail);
offset = head = local_read(&rb->head);
- if (!rb->overwrite &&
- unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size))
- goto fail;
+ if (!rb->overwrite) {
+ if (unlikely(!ring_buffer_has_space(head, tail,
+ perf_data_size(rb),
+ size, backward)))
+ goto fail;
+ }
/*
* The above forms a control dependency barrier separating the
@@ -159,9 +178,17 @@
* See perf_output_put_handle().
*/
- head += size;
+ if (!backward)
+ head += size;
+ else
+ head -= size;
} while (local_cmpxchg(&rb->head, offset, head) != offset);
+ if (backward) {
+ offset = head;
+ head = (u64)(-head);
+ }
+
/*
* We rely on the implied barrier() by local_cmpxchg() to ensure
* none of the data stores below can be lifted up by the compiler.
@@ -203,6 +230,26 @@
return -ENOSPC;
}
+int perf_output_begin_forward(struct perf_output_handle *handle,
+ struct perf_event *event, unsigned int size)
+{
+ return __perf_output_begin(handle, event, size, false);
+}
+
+int perf_output_begin_backward(struct perf_output_handle *handle,
+ struct perf_event *event, unsigned int size)
+{
+ return __perf_output_begin(handle, event, size, true);
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+ struct perf_event *event, unsigned int size)
+{
+
+ return __perf_output_begin(handle, event, size,
+ unlikely(is_write_backward(event)));
+}
+
unsigned int perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
@@ -221,8 +268,6 @@
rcu_read_unlock();
}
-static void rb_irq_work(struct irq_work *work);
-
static void
ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
{
@@ -243,16 +288,13 @@
INIT_LIST_HEAD(&rb->event_list);
spin_lock_init(&rb->event_lock);
- init_irq_work(&rb->irq_work, rb_irq_work);
-}
-static void ring_buffer_put_async(struct ring_buffer *rb)
-{
- if (!atomic_dec_and_test(&rb->refcount))
- return;
-
- rb->rcu_head.next = (void *)rb;
- irq_work_queue(&rb->irq_work);
+ /*
+ * perf_output_begin() only checks rb->paused, therefore
+ * rb->paused must be true if we have no pages for output.
+ */
+ if (!rb->nr_pages)
+ rb->paused = 1;
}
/*
@@ -264,6 +306,10 @@
* The ordering is similar to that of perf_output_{begin,end}, with
* the exception of (B), which should be taken care of by the pmu
* driver, since ordering rules will differ depending on hardware.
+ *
+ * Call this from pmu::start(); see the comment in perf_aux_output_end()
+ * about its use in pmu callbacks. Both can also be called from the PMI
+ * handler if needed.
*/
void *perf_aux_output_begin(struct perf_output_handle *handle,
struct perf_event *event)
@@ -288,6 +334,13 @@
goto err;
/*
+ * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
+ * the aux buffer is in perf_mmap_close(), about to get freed.
+ */
+ if (!atomic_read(&rb->aux_mmap_count))
+ goto err_put;
+
+ /*
* Nesting is not supported for AUX area, make sure nested
* writers are caught early
*/
@@ -328,10 +381,11 @@
return handle->rb->aux_priv;
err_put:
+ /* can't be last */
rb_free_aux(rb);
err:
- ring_buffer_put_async(rb);
+ ring_buffer_put(rb);
handle->event = NULL;
return NULL;
@@ -342,6 +396,10 @@
* aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
* pmu driver's responsibility to observe ordering rules of the hardware,
* so that all the data is externally visible before this is called.
+ *
+ * Note: this has to be called from pmu::stop() callback, as the assumption
+ * of the AUX buffer management code is that after pmu::stop(), the AUX
+ * transaction must be stopped and therefore drop the AUX reference count.
*/
void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
bool truncated)
@@ -389,8 +447,9 @@
handle->event = NULL;
local_set(&rb->aux_nest, 0);
+ /* can't be last */
rb_free_aux(rb);
- ring_buffer_put_async(rb);
+ ring_buffer_put(rb);
}
/*
@@ -471,6 +530,14 @@
{
int pg;
+ /*
+ * Should never happen, the last reference should be dropped from
+ * perf_mmap_close() path, which first stops aux transactions (which
+ * in turn are the atomic holders of aux_refcount) and then does the
+ * last rb_free_aux().
+ */
+ WARN_ON_ONCE(in_atomic());
+
if (rb->aux_priv) {
rb->free_aux(rb->aux_priv);
rb->free_aux = NULL;
@@ -582,18 +649,7 @@
void rb_free_aux(struct ring_buffer *rb)
{
if (atomic_dec_and_test(&rb->aux_refcount))
- irq_work_queue(&rb->irq_work);
-}
-
-static void rb_irq_work(struct irq_work *work)
-{
- struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work);
-
- if (!atomic_read(&rb->aux_refcount))
__rb_free_aux(rb);
-
- if (rb->rcu_head.next == (void *)rb)
- call_rcu(&rb->rcu_head, rb_free_rcu);
}
#ifndef CONFIG_PERF_USE_VMALLOC
diff --git a/kernel/fork.c b/kernel/fork.c
index d277e83..3e84515 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1494,7 +1494,7 @@
* sigaltstack should be cleared when sharing the same VM
*/
if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
- p->sas_ss_sp = p->sas_ss_size = 0;
+ sas_ss_reset(p);
/*
* Syscall tracing and stepping should be turned off in the
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 78c1c0e..81f1a71 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -45,6 +45,7 @@
#include <linux/bitops.h>
#include <linux/gfp.h>
#include <linux/kmemcheck.h>
+#include <linux/random.h>
#include <asm/sections.h>
@@ -708,7 +709,7 @@
* yet. Otherwise we look it up. We cache the result in the lock object
* itself, so actual lookup of the hash should be once per lock object.
*/
-static inline struct lock_class *
+static struct lock_class *
register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
{
struct lockdep_subclass_key *key;
@@ -3585,7 +3586,35 @@
return 0;
}
-static void __lock_pin_lock(struct lockdep_map *lock)
+static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)
+{
+ struct pin_cookie cookie = NIL_COOKIE;
+ struct task_struct *curr = current;
+ int i;
+
+ if (unlikely(!debug_locks))
+ return cookie;
+
+ for (i = 0; i < curr->lockdep_depth; i++) {
+ struct held_lock *hlock = curr->held_locks + i;
+
+ if (match_held_lock(hlock, lock)) {
+ /*
+ * Grab 16bits of randomness; this is sufficient to not
+ * be guessable and still allows some pin nesting in
+ * our u32 pin_count.
+ */
+ cookie.val = 1 + (prandom_u32() >> 16);
+ hlock->pin_count += cookie.val;
+ return cookie;
+ }
+ }
+
+ WARN(1, "pinning an unheld lock\n");
+ return cookie;
+}
+
+static void __lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{
struct task_struct *curr = current;
int i;
@@ -3597,7 +3626,7 @@
struct held_lock *hlock = curr->held_locks + i;
if (match_held_lock(hlock, lock)) {
- hlock->pin_count++;
+ hlock->pin_count += cookie.val;
return;
}
}
@@ -3605,7 +3634,7 @@
WARN(1, "pinning an unheld lock\n");
}
-static void __lock_unpin_lock(struct lockdep_map *lock)
+static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{
struct task_struct *curr = current;
int i;
@@ -3620,7 +3649,11 @@
if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n"))
return;
- hlock->pin_count--;
+ hlock->pin_count -= cookie.val;
+
+ if (WARN((int)hlock->pin_count < 0, "pin count corrupted\n"))
+ hlock->pin_count = 0;
+
return;
}
}
@@ -3751,24 +3784,27 @@
}
EXPORT_SYMBOL_GPL(lock_is_held);
-void lock_pin_lock(struct lockdep_map *lock)
+struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
{
+ struct pin_cookie cookie = NIL_COOKIE;
unsigned long flags;
if (unlikely(current->lockdep_recursion))
- return;
+ return cookie;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
- __lock_pin_lock(lock);
+ cookie = __lock_pin_lock(lock);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
+
+ return cookie;
}
EXPORT_SYMBOL_GPL(lock_pin_lock);
-void lock_unpin_lock(struct lockdep_map *lock)
+void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
{
unsigned long flags;
@@ -3779,7 +3815,24 @@
check_flags(flags);
current->lockdep_recursion = 1;
- __lock_unpin_lock(lock);
+ __lock_repin_lock(lock, cookie);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_repin_lock);
+
+void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ check_flags(flags);
+
+ current->lockdep_recursion = 1;
+ __lock_unpin_lock(lock, cookie);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919..f8c5af5 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -75,12 +75,7 @@
long n_lock_acquired;
};
-#if defined(MODULE)
-#define LOCKTORTURE_RUNNABLE_INIT 1
-#else
-#define LOCKTORTURE_RUNNABLE_INIT 0
-#endif
-int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
+int torture_runnable = IS_ENABLED(MODULE);
module_param(torture_runnable, int, 0444);
MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
@@ -394,12 +389,12 @@
if (!rt_task(current)) {
/*
- * (1) Boost priority once every ~50k operations. When the
+ * Boost priority once every ~50k operations. When the
* task tries to take the lock, the rtmutex it will account
* for the new priority, and do any corresponding pi-dance.
*/
- if (!(torture_random(trsp) %
- (cxt.nrealwriters_stress * factor))) {
+ if (trsp && !(torture_random(trsp) %
+ (cxt.nrealwriters_stress * factor))) {
policy = SCHED_FIFO;
param.sched_priority = MAX_RT_PRIO - 1;
} else /* common case, do nothing */
@@ -748,6 +743,15 @@
if (torture_cleanup_begin())
return;
+ /*
+ * Indicates early cleanup, meaning that the test has not run,
+ * such as when passing bogus args when loading the module. As
+ * such, only perform the underlying torture-specific cleanups,
+ * and avoid anything related to locktorture.
+ */
+ if (!cxt.lwsa)
+ goto end;
+
if (writer_tasks) {
for (i = 0; i < cxt.nrealwriters_stress; i++)
torture_stop_kthread(lock_torture_writer,
@@ -776,6 +780,7 @@
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
+end:
torture_cleanup_end();
}
@@ -870,6 +875,7 @@
VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
firsterr = -ENOMEM;
kfree(cxt.lwsa);
+ cxt.lwsa = NULL;
goto unwind;
}
@@ -878,6 +884,7 @@
cxt.lrsa[i].n_lock_acquired = 0;
}
}
+
lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
/* Prepare torture context. */
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index d734b75..22e0253 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -191,8 +191,6 @@
for (i = 0 ; i < qstat_num; i++)
WRITE_ONCE(ptr[i], 0);
- for (i = 0 ; i < qstat_num; i++)
- WRITE_ONCE(ptr[i], 0);
}
return count;
}
@@ -214,10 +212,8 @@
struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
int i;
- if (!d_qstat) {
- pr_warn("Could not create 'qlockstat' debugfs directory\n");
- return 0;
- }
+ if (!d_qstat)
+ goto out;
/*
* Create the debugfs files
@@ -227,12 +223,20 @@
* performance.
*/
for (i = 0; i < qstat_num; i++)
- debugfs_create_file(qstat_names[i], 0400, d_qstat,
- (void *)(long)i, &fops_qstat);
+ if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
+ (void *)(long)i, &fops_qstat))
+ goto fail_undo;
- debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
- (void *)(long)qstat_reset_cnts, &fops_qstat);
+ if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
+ (void *)(long)qstat_reset_cnts, &fops_qstat))
+ goto fail_undo;
+
return 0;
+fail_undo:
+ debugfs_remove_recursive(d_qstat);
+out:
+ pr_warn("Could not create 'qlockstat' debugfs entries\n");
+ return -ENOMEM;
}
fs_initcall(init_qspinlock_stat);
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 3a50485..1591f6b 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -191,11 +191,12 @@
/*
* get a write lock on the semaphore
*/
-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
+int __sched __down_write_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
struct task_struct *tsk;
unsigned long flags;
+ int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
@@ -215,21 +216,33 @@
*/
if (sem->count == 0)
break;
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ if (signal_pending_state(state, current)) {
+ ret = -EINTR;
+ goto out;
+ }
+ set_task_state(tsk, state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->count = -1;
+out:
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return ret;
}
void __sched __down_write(struct rw_semaphore *sem)
{
- __down_write_nested(sem, 0);
+ __down_write_common(sem, TASK_UNINTERRUPTIBLE);
+}
+
+int __sched __down_write_killable(struct rw_semaphore *sem)
+{
+ return __down_write_common(sem, TASK_KILLABLE);
}
/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4d4de0..09e30c6 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -433,12 +433,13 @@
/*
* Wait until we successfully acquire the write lock
*/
-__visible
-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
+static inline struct rw_semaphore *
+__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
{
long count;
bool waiting = true; /* any queued threads before us */
struct rwsem_waiter waiter;
+ struct rw_semaphore *ret = sem;
/* undo write bias from down_write operation, stop active locking */
count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
@@ -478,7 +479,7 @@
count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
/* wait until we successfully acquire the lock */
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(state);
while (true) {
if (rwsem_try_write_lock(count, sem))
break;
@@ -486,21 +487,48 @@
/* Block until there are no active lockers. */
do {
+ if (signal_pending_state(state, current))
+ goto out_nolock;
+
schedule();
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(state);
} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
raw_spin_lock_irq(&sem->wait_lock);
}
__set_current_state(TASK_RUNNING);
-
list_del(&waiter.list);
raw_spin_unlock_irq(&sem->wait_lock);
- return sem;
+ return ret;
+
+out_nolock:
+ __set_current_state(TASK_RUNNING);
+ raw_spin_lock_irq(&sem->wait_lock);
+ list_del(&waiter.list);
+ if (list_empty(&sem->wait_list))
+ rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+ else
+ __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+ raw_spin_unlock_irq(&sem->wait_lock);
+
+ return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed(struct rw_semaphore *sem)
+{
+ return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_write_failed);
+__visible struct rw_semaphore * __sched
+rwsem_down_write_failed_killable(struct rw_semaphore *sem)
+{
+ return __rwsem_down_write_failed_common(sem, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(rwsem_down_write_failed_killable);
+
/*
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 205be0c..c817216 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -55,6 +55,25 @@
EXPORT_SYMBOL(down_write);
/*
+ * lock for writing
+ */
+int __sched down_write_killable(struct rw_semaphore *sem)
+{
+ might_sleep();
+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
+ if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) {
+ rwsem_release(&sem->dep_map, 1, _RET_IP_);
+ return -EINTR;
+ }
+
+ rwsem_set_owner(sem);
+ return 0;
+}
+
+EXPORT_SYMBOL(down_write_killable);
+
+/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int down_write_trylock(struct rw_semaphore *sem)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 12cd989..160e100 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -37,6 +37,14 @@
#define HIBERNATE_SIG "S1SUSPEND"
/*
+ * When reading an {un,}compressed image, we may restore pages in place,
+ * in which case some architectures need these pages cleaning before they
+ * can be executed. We don't know which pages these may be, so clean the lot.
+ */
+static bool clean_pages_on_read;
+static bool clean_pages_on_decompress;
+
+/*
* The swap map is a data structure used for keeping track of each page
* written to a swap partition. It consists of many swap_map_page
* structures that contain each an array of MAP_PAGE_ENTRIES swap entries.
@@ -241,6 +249,9 @@
if (bio_data_dir(bio) == WRITE)
put_page(page);
+ else if (clean_pages_on_read)
+ flush_icache_range((unsigned long)page_address(page),
+ (unsigned long)page_address(page) + PAGE_SIZE);
if (bio->bi_error && !hb->error)
hb->error = bio->bi_error;
@@ -1049,6 +1060,7 @@
hib_init_batch(&hb);
+ clean_pages_on_read = true;
printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
nr_to_read);
m = nr_to_read / 10;
@@ -1124,6 +1136,10 @@
d->unc_len = LZO_UNC_SIZE;
d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
d->unc, &d->unc_len);
+ if (clean_pages_on_decompress)
+ flush_icache_range((unsigned long)d->unc,
+ (unsigned long)d->unc + d->unc_len);
+
atomic_set(&d->stop, 1);
wake_up(&d->done);
}
@@ -1189,6 +1205,8 @@
}
memset(crc, 0, offsetof(struct crc_data, go));
+ clean_pages_on_decompress = true;
+
/*
* Start the decompression threads.
*/
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 032b2c0..18dfc48 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -5,6 +5,7 @@
obj-y += update.o sync.o
obj-$(CONFIG_SRCU) += srcu.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
obj-$(CONFIG_TREE_RCU) += tree.o
obj-$(CONFIG_PREEMPT_RCU) += tree.o
obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
new file mode 100644
index 0000000..3cee0d8
--- /dev/null
+++ b/kernel/rcu/rcuperf.c
@@ -0,0 +1,655 @@
+/*
+ * Read-Copy Update module-based performance-test facility
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (C) IBM Corporation, 2015
+ *
+ * Authors: Paul E. McKenney <paulmck@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/freezer.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <linux/torture.h>
+#include <linux/vmalloc.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
+
+#define PERF_FLAG "-perf:"
+#define PERFOUT_STRING(s) \
+ pr_alert("%s" PERF_FLAG s "\n", perf_type)
+#define VERBOSE_PERFOUT_STRING(s) \
+ do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
+#define VERBOSE_PERFOUT_ERRSTRING(s) \
+ do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
+
+torture_param(bool, gp_exp, true, "Use expedited GP wait primitives");
+torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+torture_param(int, nreaders, -1, "Number of RCU reader threads");
+torture_param(int, nwriters, -1, "Number of RCU updater threads");
+torture_param(bool, shutdown, false, "Shutdown at end of performance tests.");
+torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+
+static char *perf_type = "rcu";
+module_param(perf_type, charp, 0444);
+MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, rcu_bh, ...)");
+
+static int nrealreaders;
+static int nrealwriters;
+static struct task_struct **writer_tasks;
+static struct task_struct **reader_tasks;
+static struct task_struct *shutdown_task;
+
+static u64 **writer_durations;
+static int *writer_n_durations;
+static atomic_t n_rcu_perf_reader_started;
+static atomic_t n_rcu_perf_writer_started;
+static atomic_t n_rcu_perf_writer_finished;
+static wait_queue_head_t shutdown_wq;
+static u64 t_rcu_perf_writer_started;
+static u64 t_rcu_perf_writer_finished;
+static unsigned long b_rcu_perf_writer_started;
+static unsigned long b_rcu_perf_writer_finished;
+
+static int rcu_perf_writer_state;
+#define RTWS_INIT 0
+#define RTWS_EXP_SYNC 1
+#define RTWS_SYNC 2
+#define RTWS_IDLE 2
+#define RTWS_STOPPING 3
+
+#define MAX_MEAS 10000
+#define MIN_MEAS 100
+
+#if defined(MODULE) || defined(CONFIG_RCU_PERF_TEST_RUNNABLE)
+#define RCUPERF_RUNNABLE_INIT 1
+#else
+#define RCUPERF_RUNNABLE_INIT 0
+#endif
+static int perf_runnable = RCUPERF_RUNNABLE_INIT;
+module_param(perf_runnable, int, 0444);
+MODULE_PARM_DESC(perf_runnable, "Start rcuperf at boot");
+
+/*
+ * Operations vector for selecting different types of tests.
+ */
+
+struct rcu_perf_ops {
+ int ptype;
+ void (*init)(void);
+ void (*cleanup)(void);
+ int (*readlock)(void);
+ void (*readunlock)(int idx);
+ unsigned long (*started)(void);
+ unsigned long (*completed)(void);
+ unsigned long (*exp_completed)(void);
+ void (*sync)(void);
+ void (*exp_sync)(void);
+ const char *name;
+};
+
+static struct rcu_perf_ops *cur_ops;
+
+/*
+ * Definitions for rcu perf testing.
+ */
+
+static int rcu_perf_read_lock(void) __acquires(RCU)
+{
+ rcu_read_lock();
+ return 0;
+}
+
+static void rcu_perf_read_unlock(int idx) __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static unsigned long __maybe_unused rcu_no_completed(void)
+{
+ return 0;
+}
+
+static void rcu_sync_perf_init(void)
+{
+}
+
+static struct rcu_perf_ops rcu_ops = {
+ .ptype = RCU_FLAVOR,
+ .init = rcu_sync_perf_init,
+ .readlock = rcu_perf_read_lock,
+ .readunlock = rcu_perf_read_unlock,
+ .started = rcu_batches_started,
+ .completed = rcu_batches_completed,
+ .exp_completed = rcu_exp_batches_completed,
+ .sync = synchronize_rcu,
+ .exp_sync = synchronize_rcu_expedited,
+ .name = "rcu"
+};
+
+/*
+ * Definitions for rcu_bh perf testing.
+ */
+
+static int rcu_bh_perf_read_lock(void) __acquires(RCU_BH)
+{
+ rcu_read_lock_bh();
+ return 0;
+}
+
+static void rcu_bh_perf_read_unlock(int idx) __releases(RCU_BH)
+{
+ rcu_read_unlock_bh();
+}
+
+static struct rcu_perf_ops rcu_bh_ops = {
+ .ptype = RCU_BH_FLAVOR,
+ .init = rcu_sync_perf_init,
+ .readlock = rcu_bh_perf_read_lock,
+ .readunlock = rcu_bh_perf_read_unlock,
+ .started = rcu_batches_started_bh,
+ .completed = rcu_batches_completed_bh,
+ .exp_completed = rcu_exp_batches_completed_sched,
+ .sync = synchronize_rcu_bh,
+ .exp_sync = synchronize_rcu_bh_expedited,
+ .name = "rcu_bh"
+};
+
+/*
+ * Definitions for srcu perf testing.
+ */
+
+DEFINE_STATIC_SRCU(srcu_ctl_perf);
+static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf;
+
+static int srcu_perf_read_lock(void) __acquires(srcu_ctlp)
+{
+ return srcu_read_lock(srcu_ctlp);
+}
+
+static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp)
+{
+ srcu_read_unlock(srcu_ctlp, idx);
+}
+
+static unsigned long srcu_perf_completed(void)
+{
+ return srcu_batches_completed(srcu_ctlp);
+}
+
+static void srcu_perf_synchronize(void)
+{
+ synchronize_srcu(srcu_ctlp);
+}
+
+static void srcu_perf_synchronize_expedited(void)
+{
+ synchronize_srcu_expedited(srcu_ctlp);
+}
+
+static struct rcu_perf_ops srcu_ops = {
+ .ptype = SRCU_FLAVOR,
+ .init = rcu_sync_perf_init,
+ .readlock = srcu_perf_read_lock,
+ .readunlock = srcu_perf_read_unlock,
+ .started = NULL,
+ .completed = srcu_perf_completed,
+ .exp_completed = srcu_perf_completed,
+ .sync = srcu_perf_synchronize,
+ .exp_sync = srcu_perf_synchronize_expedited,
+ .name = "srcu"
+};
+
+/*
+ * Definitions for sched perf testing.
+ */
+
+static int sched_perf_read_lock(void)
+{
+ preempt_disable();
+ return 0;
+}
+
+static void sched_perf_read_unlock(int idx)
+{
+ preempt_enable();
+}
+
+static struct rcu_perf_ops sched_ops = {
+ .ptype = RCU_SCHED_FLAVOR,
+ .init = rcu_sync_perf_init,
+ .readlock = sched_perf_read_lock,
+ .readunlock = sched_perf_read_unlock,
+ .started = rcu_batches_started_sched,
+ .completed = rcu_batches_completed_sched,
+ .exp_completed = rcu_exp_batches_completed_sched,
+ .sync = synchronize_sched,
+ .exp_sync = synchronize_sched_expedited,
+ .name = "sched"
+};
+
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Definitions for RCU-tasks perf testing.
+ */
+
+static int tasks_perf_read_lock(void)
+{
+ return 0;
+}
+
+static void tasks_perf_read_unlock(int idx)
+{
+}
+
+static struct rcu_perf_ops tasks_ops = {
+ .ptype = RCU_TASKS_FLAVOR,
+ .init = rcu_sync_perf_init,
+ .readlock = tasks_perf_read_lock,
+ .readunlock = tasks_perf_read_unlock,
+ .started = rcu_no_completed,
+ .completed = rcu_no_completed,
+ .sync = synchronize_rcu_tasks,
+ .exp_sync = synchronize_rcu_tasks,
+ .name = "tasks"
+};
+
+#define RCUPERF_TASKS_OPS &tasks_ops,
+
+static bool __maybe_unused torturing_tasks(void)
+{
+ return cur_ops == &tasks_ops;
+}
+
+#else /* #ifdef CONFIG_TASKS_RCU */
+
+#define RCUPERF_TASKS_OPS
+
+static bool __maybe_unused torturing_tasks(void)
+{
+ return false;
+}
+
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
+/*
+ * If performance tests complete, wait for shutdown to commence.
+ */
+static void rcu_perf_wait_shutdown(void)
+{
+ cond_resched_rcu_qs();
+ if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
+ return;
+ while (!torture_must_stop())
+ schedule_timeout_uninterruptible(1);
+}
+
+/*
+ * RCU perf reader kthread. Repeatedly does empty RCU read-side
+ * critical section, minimizing update-side interference.
+ */
+static int
+rcu_perf_reader(void *arg)
+{
+ unsigned long flags;
+ int idx;
+ long me = (long)arg;
+
+ VERBOSE_PERFOUT_STRING("rcu_perf_reader task started");
+ set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+ set_user_nice(current, MAX_NICE);
+ atomic_inc(&n_rcu_perf_reader_started);
+
+ do {
+ local_irq_save(flags);
+ idx = cur_ops->readlock();
+ cur_ops->readunlock(idx);
+ local_irq_restore(flags);
+ rcu_perf_wait_shutdown();
+ } while (!torture_must_stop());
+ torture_kthread_stopping("rcu_perf_reader");
+ return 0;
+}
+
+/*
+ * RCU perf writer kthread. Repeatedly does a grace period.
+ */
+static int
+rcu_perf_writer(void *arg)
+{
+ int i = 0;
+ int i_max;
+ long me = (long)arg;
+ struct sched_param sp;
+ bool started = false, done = false, alldone = false;
+ u64 t;
+ u64 *wdp;
+ u64 *wdpp = writer_durations[me];
+
+ VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
+ WARN_ON(rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp);
+ WARN_ON(rcu_gp_is_normal() && gp_exp);
+ WARN_ON(!wdpp);
+ set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
+ sp.sched_priority = 1;
+ sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
+
+ if (holdoff)
+ schedule_timeout_uninterruptible(holdoff * HZ);
+
+ t = ktime_get_mono_fast_ns();
+ if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) {
+ t_rcu_perf_writer_started = t;
+ if (gp_exp) {
+ b_rcu_perf_writer_started =
+ cur_ops->exp_completed() / 2;
+ } else {
+ b_rcu_perf_writer_started =
+ cur_ops->completed();
+ }
+ }
+
+ do {
+ wdp = &wdpp[i];
+ *wdp = ktime_get_mono_fast_ns();
+ if (gp_exp) {
+ rcu_perf_writer_state = RTWS_EXP_SYNC;
+ cur_ops->exp_sync();
+ } else {
+ rcu_perf_writer_state = RTWS_SYNC;
+ cur_ops->sync();
+ }
+ rcu_perf_writer_state = RTWS_IDLE;
+ t = ktime_get_mono_fast_ns();
+ *wdp = t - *wdp;
+ i_max = i;
+ if (!started &&
+ atomic_read(&n_rcu_perf_writer_started) >= nrealwriters)
+ started = true;
+ if (!done && i >= MIN_MEAS) {
+ done = true;
+ sp.sched_priority = 0;
+ sched_setscheduler_nocheck(current,
+ SCHED_NORMAL, &sp);
+ pr_alert("%s" PERF_FLAG
+ "rcu_perf_writer %ld has %d measurements\n",
+ perf_type, me, MIN_MEAS);
+ if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
+ nrealwriters) {
+ schedule_timeout_interruptible(10);
+ rcu_ftrace_dump(DUMP_ALL);
+ PERFOUT_STRING("Test complete");
+ t_rcu_perf_writer_finished = t;
+ if (gp_exp) {
+ b_rcu_perf_writer_finished =
+ cur_ops->exp_completed() / 2;
+ } else {
+ b_rcu_perf_writer_finished =
+ cur_ops->completed();
+ }
+ if (shutdown) {
+ smp_mb(); /* Assign before wake. */
+ wake_up(&shutdown_wq);
+ }
+ }
+ }
+ if (done && !alldone &&
+ atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters)
+ alldone = true;
+ if (started && !alldone && i < MAX_MEAS - 1)
+ i++;
+ rcu_perf_wait_shutdown();
+ } while (!torture_must_stop());
+ rcu_perf_writer_state = RTWS_STOPPING;
+ writer_n_durations[me] = i_max;
+ torture_kthread_stopping("rcu_perf_writer");
+ return 0;
+}
+
+static inline void
+rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
+{
+ pr_alert("%s" PERF_FLAG
+ "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
+ perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+}
+
+static void
+rcu_perf_cleanup(void)
+{
+ int i;
+ int j;
+ int ngps = 0;
+ u64 *wdp;
+ u64 *wdpp;
+
+ if (torture_cleanup_begin())
+ return;
+
+ if (reader_tasks) {
+ for (i = 0; i < nrealreaders; i++)
+ torture_stop_kthread(rcu_perf_reader,
+ reader_tasks[i]);
+ kfree(reader_tasks);
+ }
+
+ if (writer_tasks) {
+ for (i = 0; i < nrealwriters; i++) {
+ torture_stop_kthread(rcu_perf_writer,
+ writer_tasks[i]);
+ if (!writer_n_durations)
+ continue;
+ j = writer_n_durations[i];
+ pr_alert("%s%s writer %d gps: %d\n",
+ perf_type, PERF_FLAG, i, j);
+ ngps += j;
+ }
+ pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
+ perf_type, PERF_FLAG,
+ t_rcu_perf_writer_started, t_rcu_perf_writer_finished,
+ t_rcu_perf_writer_finished -
+ t_rcu_perf_writer_started,
+ ngps,
+ b_rcu_perf_writer_finished -
+ b_rcu_perf_writer_started);
+ for (i = 0; i < nrealwriters; i++) {
+ if (!writer_durations)
+ break;
+ if (!writer_n_durations)
+ continue;
+ wdpp = writer_durations[i];
+ if (!wdpp)
+ continue;
+ for (j = 0; j <= writer_n_durations[i]; j++) {
+ wdp = &wdpp[j];
+ pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+ perf_type, PERF_FLAG,
+ i, j, *wdp);
+ if (j % 100 == 0)
+ schedule_timeout_uninterruptible(1);
+ }
+ kfree(writer_durations[i]);
+ }
+ kfree(writer_tasks);
+ kfree(writer_durations);
+ kfree(writer_n_durations);
+ }
+
+ /* Do flavor-specific cleanup operations. */
+ if (cur_ops->cleanup != NULL)
+ cur_ops->cleanup();
+
+ torture_cleanup_end();
+}
+
+/*
+ * Return the number if non-negative. If -1, the number of CPUs.
+ * If less than -1, that much less than the number of CPUs, but
+ * at least one.
+ */
+static int compute_real(int n)
+{
+ int nr;
+
+ if (n >= 0) {
+ nr = n;
+ } else {
+ nr = num_online_cpus() + 1 + n;
+ if (nr <= 0)
+ nr = 1;
+ }
+ return nr;
+}
+
+/*
+ * RCU perf shutdown kthread. Just waits to be awakened, then shuts
+ * down system.
+ */
+static int
+rcu_perf_shutdown(void *arg)
+{
+ do {
+ wait_event(shutdown_wq,
+ atomic_read(&n_rcu_perf_writer_finished) >=
+ nrealwriters);
+ } while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters);
+ smp_mb(); /* Wake before output. */
+ rcu_perf_cleanup();
+ kernel_power_off();
+ return -EINVAL;
+}
+
+static int __init
+rcu_perf_init(void)
+{
+ long i;
+ int firsterr = 0;
+ static struct rcu_perf_ops *perf_ops[] = {
+ &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops,
+ RCUPERF_TASKS_OPS
+ };
+
+ if (!torture_init_begin(perf_type, verbose, &perf_runnable))
+ return -EBUSY;
+
+ /* Process args and tell the world that the perf'er is on the job. */
+ for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
+ cur_ops = perf_ops[i];
+ if (strcmp(perf_type, cur_ops->name) == 0)
+ break;
+ }
+ if (i == ARRAY_SIZE(perf_ops)) {
+ pr_alert("rcu-perf: invalid perf type: \"%s\"\n",
+ perf_type);
+ pr_alert("rcu-perf types:");
+ for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
+ pr_alert(" %s", perf_ops[i]->name);
+ pr_alert("\n");
+ firsterr = -EINVAL;
+ goto unwind;
+ }
+ if (cur_ops->init)
+ cur_ops->init();
+
+ nrealwriters = compute_real(nwriters);
+ nrealreaders = compute_real(nreaders);
+ atomic_set(&n_rcu_perf_reader_started, 0);
+ atomic_set(&n_rcu_perf_writer_started, 0);
+ atomic_set(&n_rcu_perf_writer_finished, 0);
+ rcu_perf_print_module_parms(cur_ops, "Start of test");
+
+ /* Start up the kthreads. */
+
+ if (shutdown) {
+ init_waitqueue_head(&shutdown_wq);
+ firsterr = torture_create_kthread(rcu_perf_shutdown, NULL,
+ shutdown_task);
+ if (firsterr)
+ goto unwind;
+ schedule_timeout_uninterruptible(1);
+ }
+ reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
+ GFP_KERNEL);
+ if (reader_tasks == NULL) {
+ VERBOSE_PERFOUT_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+ for (i = 0; i < nrealreaders; i++) {
+ firsterr = torture_create_kthread(rcu_perf_reader, (void *)i,
+ reader_tasks[i]);
+ if (firsterr)
+ goto unwind;
+ }
+ while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders)
+ schedule_timeout_uninterruptible(1);
+ writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
+ GFP_KERNEL);
+ writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
+ GFP_KERNEL);
+ writer_n_durations =
+ kcalloc(nrealwriters, sizeof(*writer_n_durations),
+ GFP_KERNEL);
+ if (!writer_tasks || !writer_durations || !writer_n_durations) {
+ VERBOSE_PERFOUT_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
+ for (i = 0; i < nrealwriters; i++) {
+ writer_durations[i] =
+ kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
+ GFP_KERNEL);
+ if (!writer_durations[i])
+ goto unwind;
+ firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
+ writer_tasks[i]);
+ if (firsterr)
+ goto unwind;
+ }
+ torture_init_end();
+ return 0;
+
+unwind:
+ torture_init_end();
+ rcu_perf_cleanup();
+ return firsterr;
+}
+
+module_init(rcu_perf_init);
+module_exit(rcu_perf_cleanup);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 250ea67..084a28a 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -130,8 +130,8 @@
static unsigned long rcu_torture_current_version;
static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
static DEFINE_SPINLOCK(rcu_torture_lock);
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = { 0 };
-static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = { 0 };
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count);
+static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch);
static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
static atomic_t n_rcu_torture_alloc;
static atomic_t n_rcu_torture_alloc_fail;
@@ -916,7 +916,7 @@
static int
rcu_torture_writer(void *arg)
{
- bool can_expedite = !rcu_gp_is_expedited();
+ bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
int expediting = 0;
unsigned long gp_snap;
bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
@@ -932,7 +932,7 @@
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
if (!can_expedite) {
pr_alert("%s" TORTURE_FLAG
- " Grace periods expedited from boot/sysfs for %s,\n",
+ " GP expediting controlled from boot/sysfs for %s,\n",
torture_type, cur_ops->name);
pr_alert("%s" TORTURE_FLAG
" Disabled dynamic grace-period expediting.\n",
@@ -1082,17 +1082,6 @@
return 0;
}
-static void rcutorture_trace_dump(void)
-{
- static atomic_t beenhere = ATOMIC_INIT(0);
-
- if (atomic_read(&beenhere))
- return;
- if (atomic_xchg(&beenhere, 1) != 0)
- return;
- ftrace_dump(DUMP_ALL);
-}
-
/*
* RCU torture reader from timer handler. Dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
@@ -1142,7 +1131,7 @@
if (pipe_count > 1) {
do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts,
started, completed);
- rcutorture_trace_dump();
+ rcu_ftrace_dump(DUMP_ALL);
}
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = completed - started;
@@ -1215,7 +1204,7 @@
if (pipe_count > 1) {
do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
ts, started, completed);
- rcutorture_trace_dump();
+ rcu_ftrace_dump(DUMP_ALL);
}
__this_cpu_inc(rcu_torture_count[pipe_count]);
completed = completed - started;
@@ -1333,7 +1322,7 @@
rcu_torture_writer_state,
gpnum, completed, flags);
show_rcu_gp_kthreads();
- rcutorture_trace_dump();
+ rcu_ftrace_dump(DUMP_ALL);
}
rtcv_snap = rcu_torture_current_version;
}
@@ -1489,7 +1478,9 @@
* The above smp_load_acquire() ensures barrier_phase load
* is ordered before the folloiwng ->call().
*/
+ local_irq_disable(); /* Just to test no-irq call_rcu(). */
cur_ops->call(&rcu, rcu_torture_barrier_cbf);
+ local_irq_enable();
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!torture_must_stop());
@@ -1596,7 +1587,7 @@
{
long cpu = (long)hcpu;
- switch (action) {
+ switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
case CPU_DOWN_FAILED:
(void)rcutorture_booster_init(cpu);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9a535a8..c7f1bc4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -102,6 +102,8 @@
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
.name = RCU_STATE_NAME(sname), \
.abbr = sabbr, \
+ .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
+ .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
}
RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
@@ -370,6 +372,21 @@
rcu_momentary_dyntick_idle();
local_irq_restore(flags);
}
+ if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) {
+ /*
+ * Yes, we just checked a per-CPU variable with preemption
+ * enabled, so we might be migrated to some other CPU at
+ * this point. That is OK because in that case, the
+ * migration will supply the needed quiescent state.
+ * We might end up needlessly disabling preemption and
+ * invoking rcu_sched_qs() on the destination CPU, but
+ * the probability and cost are both quite low, so this
+ * should not be a problem in practice.
+ */
+ preempt_disable();
+ rcu_sched_qs();
+ preempt_enable();
+ }
this_cpu_inc(rcu_qs_ctr);
barrier(); /* Avoid RCU read-side critical sections leaking up. */
}
@@ -385,9 +402,11 @@
static ulong jiffies_till_first_fqs = ULONG_MAX;
static ulong jiffies_till_next_fqs = ULONG_MAX;
+static bool rcu_kick_kthreads;
module_param(jiffies_till_first_fqs, ulong, 0644);
module_param(jiffies_till_next_fqs, ulong, 0644);
+module_param(rcu_kick_kthreads, bool, 0644);
/*
* How long the grace period must be before we start recruiting
@@ -460,6 +479,28 @@
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/*
+ * Return the number of RCU expedited batches completed thus far for
+ * debug & stats. Odd numbers mean that a batch is in progress, even
+ * numbers mean idle. The value returned will thus be roughly double
+ * the cumulative batches since boot.
+ */
+unsigned long rcu_exp_batches_completed(void)
+{
+ return rcu_state_p->expedited_sequence;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
+
+/*
+ * Return the number of RCU-sched expedited batches completed thus far
+ * for debug & stats. Similar to rcu_exp_batches_completed().
+ */
+unsigned long rcu_exp_batches_completed_sched(void)
+{
+ return rcu_sched_state.expedited_sequence;
+}
+EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
+
+/*
* Force a quiescent state.
*/
void rcu_force_quiescent_state(void)
@@ -637,7 +678,7 @@
idle_task(smp_processor_id());
trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
- ftrace_dump(DUMP_ORIG);
+ rcu_ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
@@ -799,7 +840,7 @@
trace_rcu_dyntick(TPS("Error on exit: not idle task"),
oldval, rdtp->dynticks_nesting);
- ftrace_dump(DUMP_ORIG);
+ rcu_ftrace_dump(DUMP_ORIG);
WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
@@ -1224,8 +1265,10 @@
rsp->gp_flags,
gp_state_getname(rsp->gp_state), rsp->gp_state,
rsp->gp_kthread ? rsp->gp_kthread->state : ~0);
- if (rsp->gp_kthread)
+ if (rsp->gp_kthread) {
sched_show_task(rsp->gp_kthread);
+ wake_up_process(rsp->gp_kthread);
+ }
}
}
@@ -1249,6 +1292,25 @@
}
}
+/*
+ * If too much time has passed in the current grace period, and if
+ * so configured, go kick the relevant kthreads.
+ */
+static void rcu_stall_kick_kthreads(struct rcu_state *rsp)
+{
+ unsigned long j;
+
+ if (!rcu_kick_kthreads)
+ return;
+ j = READ_ONCE(rsp->jiffies_kick_kthreads);
+ if (time_after(jiffies, j) && rsp->gp_kthread) {
+ WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name);
+ rcu_ftrace_dump(DUMP_ALL);
+ wake_up_process(rsp->gp_kthread);
+ WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ);
+ }
+}
+
static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
{
int cpu;
@@ -1260,6 +1322,11 @@
struct rcu_node *rnp = rcu_get_root(rsp);
long totqlen = 0;
+ /* Kick and suppress, if so configured. */
+ rcu_stall_kick_kthreads(rsp);
+ if (rcu_cpu_stall_suppress)
+ return;
+
/* Only let one CPU complain about others per time interval. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -1333,6 +1400,11 @@
struct rcu_node *rnp = rcu_get_root(rsp);
long totqlen = 0;
+ /* Kick and suppress, if so configured. */
+ rcu_stall_kick_kthreads(rsp);
+ if (rcu_cpu_stall_suppress)
+ return;
+
/*
* OK, time to rat on ourselves...
* See Documentation/RCU/stallwarn.txt for info on how to debug
@@ -1377,8 +1449,10 @@
unsigned long js;
struct rcu_node *rnp;
- if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
+ if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
+ !rcu_gp_in_progress(rsp))
return;
+ rcu_stall_kick_kthreads(rsp);
j = jiffies;
/*
@@ -2117,8 +2191,11 @@
}
ret = 0;
for (;;) {
- if (!ret)
+ if (!ret) {
rsp->jiffies_force_qs = jiffies + j;
+ WRITE_ONCE(rsp->jiffies_kick_kthreads,
+ jiffies + 3 * j);
+ }
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqswait"));
@@ -2144,6 +2221,15 @@
TPS("fqsend"));
cond_resched_rcu_qs();
WRITE_ONCE(rsp->gp_activity, jiffies);
+ ret = 0; /* Force full wait till next FQS. */
+ j = jiffies_till_next_fqs;
+ if (j > HZ) {
+ j = HZ;
+ jiffies_till_next_fqs = HZ;
+ } else if (j < 1) {
+ j = 1;
+ jiffies_till_next_fqs = 1;
+ }
} else {
/* Deal with stray signal. */
cond_resched_rcu_qs();
@@ -2152,14 +2238,12 @@
trace_rcu_grace_period(rsp->name,
READ_ONCE(rsp->gpnum),
TPS("fqswaitsig"));
- }
- j = jiffies_till_next_fqs;
- if (j > HZ) {
- j = HZ;
- jiffies_till_next_fqs = HZ;
- } else if (j < 1) {
- j = 1;
- jiffies_till_next_fqs = 1;
+ ret = 1; /* Keep old FQS timing. */
+ j = jiffies;
+ if (time_after(jiffies, rsp->jiffies_force_qs))
+ j = 1;
+ else
+ j = rsp->jiffies_force_qs - j;
}
}
@@ -3376,8 +3460,12 @@
}
static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
{
+ unsigned long s;
+
smp_mb(); /* Caller's modifications seen first by other CPUs. */
- return rcu_seq_snap(&rsp->expedited_sequence);
+ s = rcu_seq_snap(&rsp->expedited_sequence);
+ trace_rcu_exp_grace_period(rsp->name, s, TPS("snap"));
+ return s;
}
static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
{
@@ -3469,7 +3557,7 @@
* for the current expedited grace period. Works only for preemptible
* RCU -- other RCU implementation use other means.
*
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the rcu_state's exp_mutex.
*/
static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
{
@@ -3485,8 +3573,8 @@
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
- * Caller must hold the root rcu_node's exp_funnel_mutex and the
- * specified rcu_node structure's ->lock.
+ * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
+ * structure's ->lock.
*/
static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake, unsigned long flags)
@@ -3523,7 +3611,7 @@
* Report expedited quiescent state for specified node. This is a
* lock-acquisition wrapper function for __rcu_report_exp_rnp().
*
- * Caller must hold the root rcu_node's exp_funnel_mutex.
+ * Caller must hold the rcu_state's exp_mutex.
*/
static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
struct rcu_node *rnp, bool wake)
@@ -3536,8 +3624,8 @@
/*
* Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure. Caller must hold the root
- * rcu_node's exp_funnel_mutex.
+ * specified leaf rcu_node structure. Caller must hold the rcu_state's
+ * exp_mutex.
*/
static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
unsigned long mask, bool wake)
@@ -3555,7 +3643,6 @@
/*
* Report expedited quiescent state for specified rcu_data (CPU).
- * Caller must hold the root rcu_node's exp_funnel_mutex.
*/
static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
bool wake)
@@ -3564,15 +3651,11 @@
}
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
- struct rcu_data *rdp,
- atomic_long_t *stat, unsigned long s)
+static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
+ unsigned long s)
{
if (rcu_exp_gp_seq_done(rsp, s)) {
- if (rnp)
- mutex_unlock(&rnp->exp_funnel_mutex);
- else if (rdp)
- mutex_unlock(&rdp->exp_funnel_mutex);
+ trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
/* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(stat);
@@ -3582,59 +3665,65 @@
}
/*
- * Funnel-lock acquisition for expedited grace periods. Returns a
- * pointer to the root rcu_node structure, or NULL if some other
- * task did the expedited grace period for us.
+ * Funnel-lock acquisition for expedited grace periods. Returns true
+ * if some other task completed an expedited grace period that this task
+ * can piggy-back on, and with no mutex held. Otherwise, returns false
+ * with the mutex held, indicating that the caller must actually do the
+ * expedited grace period.
*/
-static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
+static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
{
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
- struct rcu_node *rnp0;
- struct rcu_node *rnp1 = NULL;
+ struct rcu_node *rnp = rdp->mynode;
+ struct rcu_node *rnp_root = rcu_get_root(rsp);
+
+ /* Low-contention fastpath. */
+ if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
+ (rnp == rnp_root ||
+ ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
+ !mutex_is_locked(&rsp->exp_mutex) &&
+ mutex_trylock(&rsp->exp_mutex))
+ goto fastpath;
/*
- * First try directly acquiring the root lock in order to reduce
- * latency in the common case where expedited grace periods are
- * rare. We check mutex_is_locked() to avoid pathological levels of
- * memory contention on ->exp_funnel_mutex in the heavy-load case.
+ * Each pass through the following loop works its way up
+ * the rcu_node tree, returning if others have done the work or
+ * otherwise falls through to acquire rsp->exp_mutex. The mapping
+ * from CPU to rcu_node structure can be inexact, as it is just
+ * promoting locality and is not strictly needed for correctness.
*/
- rnp0 = rcu_get_root(rsp);
- if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
- if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
- if (sync_exp_work_done(rsp, rnp0, NULL,
- &rdp->expedited_workdone0, s))
- return NULL;
- return rnp0;
+ for (; rnp != NULL; rnp = rnp->parent) {
+ if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+ return true;
+
+ /* Work not done, either wait here or go up. */
+ spin_lock(&rnp->exp_lock);
+ if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
+
+ /* Someone else doing GP, so wait for them. */
+ spin_unlock(&rnp->exp_lock);
+ trace_rcu_exp_funnel_lock(rsp->name, rnp->level,
+ rnp->grplo, rnp->grphi,
+ TPS("wait"));
+ wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+ sync_exp_work_done(rsp,
+ &rdp->exp_workdone2, s));
+ return true;
}
+ rnp->exp_seq_rq = s; /* Followers can wait on us. */
+ spin_unlock(&rnp->exp_lock);
+ trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo,
+ rnp->grphi, TPS("nxtlvl"));
}
-
- /*
- * Each pass through the following loop works its way
- * up the rcu_node tree, returning if others have done the
- * work or otherwise falls through holding the root rnp's
- * ->exp_funnel_mutex. The mapping from CPU to rcu_node structure
- * can be inexact, as it is just promoting locality and is not
- * strictly needed for correctness.
- */
- if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s))
- return NULL;
- mutex_lock(&rdp->exp_funnel_mutex);
- rnp0 = rdp->mynode;
- for (; rnp0 != NULL; rnp0 = rnp0->parent) {
- if (sync_exp_work_done(rsp, rnp1, rdp,
- &rdp->expedited_workdone2, s))
- return NULL;
- mutex_lock(&rnp0->exp_funnel_mutex);
- if (rnp1)
- mutex_unlock(&rnp1->exp_funnel_mutex);
- else
- mutex_unlock(&rdp->exp_funnel_mutex);
- rnp1 = rnp0;
+ mutex_lock(&rsp->exp_mutex);
+fastpath:
+ if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+ mutex_unlock(&rsp->exp_mutex);
+ return true;
}
- if (sync_exp_work_done(rsp, rnp1, rdp,
- &rdp->expedited_workdone3, s))
- return NULL;
- return rnp1;
+ rcu_exp_gp_seq_start(rsp);
+ trace_rcu_exp_grace_period(rsp->name, s, TPS("start"));
+ return false;
}
/* Invoked on each online non-idle CPU for expedited quiescent state. */
@@ -3649,6 +3738,11 @@
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
return;
+ if (rcu_is_cpu_rrupt_from_idle()) {
+ rcu_report_exp_rdp(&rcu_sched_state,
+ this_cpu_ptr(&rcu_sched_data), true);
+ return;
+ }
__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
resched_cpu(smp_processor_id());
}
@@ -3773,7 +3867,7 @@
rsp->name);
ndetected = 0;
rcu_for_each_leaf_node(rsp, rnp) {
- ndetected = rcu_print_task_exp_stall(rnp);
+ ndetected += rcu_print_task_exp_stall(rnp);
mask = 1;
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
struct rcu_data *rdp;
@@ -3783,7 +3877,7 @@
ndetected++;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
- "O."[cpu_online(cpu)],
+ "O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rnp->expmaskinit)],
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
}
@@ -3792,7 +3886,7 @@
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
jiffies - jiffies_start, rsp->expedited_sequence,
rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
- if (!ndetected) {
+ if (ndetected) {
pr_err("blocking rcu_node structures:");
rcu_for_each_node_breadth_first(rsp, rnp) {
if (rnp == rnp_root)
@@ -3818,6 +3912,41 @@
}
}
+/*
+ * Wait for the current expedited grace period to complete, and then
+ * wake up everyone who piggybacked on the just-completed expedited
+ * grace period. Also update all the ->exp_seq_rq counters as needed
+ * in order to avoid counter-wrap problems.
+ */
+static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
+{
+ struct rcu_node *rnp;
+
+ synchronize_sched_expedited_wait(rsp);
+ rcu_exp_gp_seq_end(rsp);
+ trace_rcu_exp_grace_period(rsp->name, s, TPS("end"));
+
+ /*
+ * Switch over to wakeup mode, allowing the next GP, but -only- the
+ * next GP, to proceed.
+ */
+ mutex_lock(&rsp->exp_wake_mutex);
+ mutex_unlock(&rsp->exp_mutex);
+
+ rcu_for_each_node_breadth_first(rsp, rnp) {
+ if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
+ spin_lock(&rnp->exp_lock);
+ /* Recheck, avoid hang in case someone just arrived. */
+ if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
+ rnp->exp_seq_rq = s;
+ spin_unlock(&rnp->exp_lock);
+ }
+ wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]);
+ }
+ trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake"));
+ mutex_unlock(&rsp->exp_wake_mutex);
+}
+
/**
* synchronize_sched_expedited - Brute-force RCU-sched grace period
*
@@ -3837,7 +3966,6 @@
void synchronize_sched_expedited(void)
{
unsigned long s;
- struct rcu_node *rnp;
struct rcu_state *rsp = &rcu_sched_state;
/* If only one CPU, this is automatically a grace period. */
@@ -3852,17 +3980,14 @@
/* Take a snapshot of the sequence number. */
s = rcu_exp_gp_seq_snap(rsp);
-
- rnp = exp_funnel_lock(rsp, s);
- if (rnp == NULL)
+ if (exp_funnel_lock(rsp, s))
return; /* Someone else did our work for us. */
- rcu_exp_gp_seq_start(rsp);
+ /* Initialize the rcu_node tree in preparation for the wait. */
sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
- synchronize_sched_expedited_wait(rsp);
- rcu_exp_gp_seq_end(rsp);
- mutex_unlock(&rnp->exp_funnel_mutex);
+ /* Wait and clean up, including waking everyone. */
+ rcu_exp_wait_wake(rsp, s);
}
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
@@ -4162,7 +4287,6 @@
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
rdp->cpu = cpu;
rdp->rsp = rsp;
- mutex_init(&rdp->exp_funnel_mutex);
rcu_boot_init_nocb_percpu_data(rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
@@ -4420,10 +4544,8 @@
{
static const char * const buf[] = RCU_NODE_NAME_INIT;
static const char * const fqs[] = RCU_FQS_NAME_INIT;
- static const char * const exp[] = RCU_EXP_NAME_INIT;
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
- static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
static u8 fl_mask = 0x1;
int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */
@@ -4482,9 +4604,11 @@
rnp->level = i;
INIT_LIST_HEAD(&rnp->blkd_tasks);
rcu_init_one_nocb(rnp);
- mutex_init(&rnp->exp_funnel_mutex);
- lockdep_set_class_and_name(&rnp->exp_funnel_mutex,
- &rcu_exp_class[i], exp[i]);
+ init_waitqueue_head(&rnp->exp_wq[0]);
+ init_waitqueue_head(&rnp->exp_wq[1]);
+ init_waitqueue_head(&rnp->exp_wq[2]);
+ init_waitqueue_head(&rnp->exp_wq[3]);
+ spin_lock_init(&rnp->exp_lock);
}
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index df668c0..e3959f5 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -70,7 +70,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
# define RCU_NODE_NAME_INIT { "rcu_node_0" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
-# define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
#elif NR_CPUS <= RCU_FANOUT_2
# define RCU_NUM_LVLS 2
# define NUM_RCU_LVL_0 1
@@ -79,7 +78,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
-# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
#elif NR_CPUS <= RCU_FANOUT_3
# define RCU_NUM_LVLS 3
# define NUM_RCU_LVL_0 1
@@ -89,7 +87,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
-# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
#elif NR_CPUS <= RCU_FANOUT_4
# define RCU_NUM_LVLS 4
# define NUM_RCU_LVL_0 1
@@ -100,7 +97,6 @@
# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
-# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
#else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
@@ -252,7 +248,9 @@
/* Counts of upcoming no-CB GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
- struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp;
+ spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
+ unsigned long exp_seq_rq;
+ wait_queue_head_t exp_wq[4];
} ____cacheline_internodealigned_in_smp;
/*
@@ -387,11 +385,9 @@
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
- struct mutex exp_funnel_mutex;
- atomic_long_t expedited_workdone0; /* # done by others #0. */
- atomic_long_t expedited_workdone1; /* # done by others #1. */
- atomic_long_t expedited_workdone2; /* # done by others #2. */
- atomic_long_t expedited_workdone3; /* # done by others #3. */
+ atomic_long_t exp_workdone1; /* # done by others #1. */
+ atomic_long_t exp_workdone2; /* # done by others #2. */
+ atomic_long_t exp_workdone3; /* # done by others #3. */
/* 7) Callback offloading. */
#ifdef CONFIG_RCU_NOCB_CPU
@@ -505,6 +501,8 @@
/* _rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */
+ struct mutex exp_mutex; /* Serialize expedited GP. */
+ struct mutex exp_wake_mutex; /* Serialize wakeup. */
unsigned long expedited_sequence; /* Take a ticket. */
atomic_long_t expedited_normal; /* # fallbacks to normal. */
atomic_t expedited_need_qs; /* # CPUs left to check in. */
@@ -513,6 +511,8 @@
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
+ unsigned long jiffies_kick_kthreads; /* Time at which to kick */
+ /* kthreads, if configured. */
unsigned long n_force_qs; /* Number of calls to */
/* force_quiescent_state(). */
unsigned long n_force_qs_lh; /* ~Number of calls leaving */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index efdf7b6..ff1cd4e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -722,18 +722,22 @@
* synchronize_rcu_expedited - Brute-force RCU grace period
*
* Wait for an RCU-preempt grace period, but expedite it. The basic
- * idea is to invoke synchronize_sched_expedited() to push all the tasks to
- * the ->blkd_tasks lists and wait for this list to drain. This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.
- * In fact, if you are using synchronize_rcu_expedited() in a loop,
- * please restructure your code to batch your updates, and then Use a
- * single synchronize_rcu() instead.
+ * idea is to IPI all non-idle non-nohz online CPUs. The IPI handler
+ * checks whether the CPU is in an RCU-preempt critical section, and
+ * if so, it sets a flag that causes the outermost rcu_read_unlock()
+ * to report the quiescent state. On the other hand, if the CPU is
+ * not in an RCU read-side critical section, the IPI handler reports
+ * the quiescent state immediately.
+ *
+ * Although this is a greate improvement over previous expedited
+ * implementations, it is still unfriendly to real-time workloads, so is
+ * thus not recommended for any sort of common-case code. In fact, if
+ * you are using synchronize_rcu_expedited() in a loop, please restructure
+ * your code to batch your updates, and then Use a single synchronize_rcu()
+ * instead.
*/
void synchronize_rcu_expedited(void)
{
- struct rcu_node *rnp;
- struct rcu_node *rnp_unlock;
struct rcu_state *rsp = rcu_state_p;
unsigned long s;
@@ -744,23 +748,14 @@
}
s = rcu_exp_gp_seq_snap(rsp);
-
- rnp_unlock = exp_funnel_lock(rsp, s);
- if (rnp_unlock == NULL)
+ if (exp_funnel_lock(rsp, s))
return; /* Someone else did our work for us. */
- rcu_exp_gp_seq_start(rsp);
-
/* Initialize the rcu_node tree in preparation for the wait. */
sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
- /* Wait for snapshotted ->blkd_tasks lists to drain. */
- rnp = rcu_get_root(rsp);
- synchronize_sched_expedited_wait(rsp);
-
- /* Clean up and exit. */
- rcu_exp_gp_seq_end(rsp);
- mutex_unlock(&rnp_unlock->exp_funnel_mutex);
+ /* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
+ rcu_exp_wait_wake(rsp, s);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 1088e64..86782f9a 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,17 +185,16 @@
int cpu;
struct rcu_state *rsp = (struct rcu_state *)m->private;
struct rcu_data *rdp;
- unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+ unsigned long s1 = 0, s2 = 0, s3 = 0;
for_each_possible_cpu(cpu) {
rdp = per_cpu_ptr(rsp->rda, cpu);
- s0 += atomic_long_read(&rdp->expedited_workdone0);
- s1 += atomic_long_read(&rdp->expedited_workdone1);
- s2 += atomic_long_read(&rdp->expedited_workdone2);
- s3 += atomic_long_read(&rdp->expedited_workdone3);
+ s1 += atomic_long_read(&rdp->exp_workdone1);
+ s2 += atomic_long_read(&rdp->exp_workdone2);
+ s3 += atomic_long_read(&rdp->exp_workdone3);
}
- seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
- rsp->expedited_sequence, s0, s1, s2, s3,
+ seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+ rsp->expedited_sequence, s1, s2, s3,
atomic_long_read(&rsp->expedited_normal),
atomic_read(&rsp->expedited_need_qs),
rsp->expedited_sequence / 2);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index ca828b4..3ccdc8e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -67,7 +67,7 @@
module_param(rcu_normal_after_boot, int, 0);
#endif /* #ifndef CONFIG_TINY_RCU */
-#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
* rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
*
@@ -111,7 +111,7 @@
return 0;
if (debug_locks)
lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
- return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
+ return lockdep_opinion || !preemptible();
}
EXPORT_SYMBOL(rcu_read_lock_sched_held);
#endif
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 414d9c1..5e59b83 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -24,3 +24,4 @@
obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
+obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index fedb967..e85a725 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -318,6 +318,7 @@
return clock;
}
+EXPORT_SYMBOL_GPL(sched_clock_cpu);
void sched_clock_tick(void)
{
@@ -363,39 +364,6 @@
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-/*
- * As outlined at the top, provides a fast, high resolution, nanosecond
- * time source that is monotonic per cpu argument and has bounded drift
- * between cpus.
- *
- * ######################### BIG FAT WARNING ##########################
- * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
- * # go backwards !! #
- * ####################################################################
- */
-u64 cpu_clock(int cpu)
-{
- if (!sched_clock_stable())
- return sched_clock_cpu(cpu);
-
- return sched_clock();
-}
-
-/*
- * Similar to cpu_clock() for the current cpu. Time will only be observed
- * to be monotonic if care is taken to only compare timestampt taken on the
- * same CPU.
- *
- * See cpu_clock().
- */
-u64 local_clock(void)
-{
- if (!sched_clock_stable())
- return sched_clock_cpu(raw_smp_processor_id());
-
- return sched_clock();
-}
-
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void sched_clock_init(void)
@@ -410,22 +378,8 @@
return sched_clock();
}
-
-u64 cpu_clock(int cpu)
-{
- return sched_clock();
-}
-
-u64 local_clock(void)
-{
- return sched_clock();
-}
-
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-EXPORT_SYMBOL_GPL(cpu_clock);
-EXPORT_SYMBOL_GPL(local_clock);
-
/*
* Running clock - returns the time that has elapsed while a guest has been
* running.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d1f7149..404c078 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -33,7 +33,7 @@
#include <linux/init.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
-#include <asm/mmu_context.h>
+#include <linux/mmu_context.h>
#include <linux/interrupt.h>
#include <linux/capability.h>
#include <linux/completion.h>
@@ -170,6 +170,71 @@
return rq;
}
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ lockdep_assert_held(&p->pi_lock);
+
+ for (;;) {
+ rq = task_rq(p);
+ raw_spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+ rf->cookie = lockdep_pin_lock(&rq->lock);
+ return rq;
+ }
+ raw_spin_unlock(&rq->lock);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
+ }
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+ __acquires(p->pi_lock)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ for (;;) {
+ raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
+ rq = task_rq(p);
+ raw_spin_lock(&rq->lock);
+ /*
+ * move_queued_task() task_rq_lock()
+ *
+ * ACQUIRE (rq->lock)
+ * [S] ->on_rq = MIGRATING [L] rq = task_rq()
+ * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
+ * [S] ->cpu = new_cpu [L] task_rq()
+ * [L] ->on_rq
+ * RELEASE (rq->lock)
+ *
+ * If we observe the old cpu in task_rq_lock, the acquire of
+ * the old rq->lock will fully serialize against the stores.
+ *
+ * If we observe the new cpu in task_rq_lock, the acquire will
+ * pair with the WMB to ensure we must then also see migrating.
+ */
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
+ rf->cookie = lockdep_pin_lock(&rq->lock);
+ return rq;
+ }
+ raw_spin_unlock(&rq->lock);
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
+ }
+}
+
#ifdef CONFIG_SCHED_HRTICK
/*
* Use HR-timers to deliver accurate preemption points.
@@ -249,29 +314,6 @@
}
}
-static int
-hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- int cpu = (int)(long)hcpu;
-
- switch (action) {
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- hrtick_clear(cpu_rq(cpu));
- return NOTIFY_OK;
- }
-
- return NOTIFY_DONE;
-}
-
-static __init void init_hrtick(void)
-{
- hotcpu_notifier(hotplug_hrtick, 0);
-}
#else
/*
* Called to set the hrtick timer state.
@@ -288,10 +330,6 @@
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
HRTIMER_MODE_REL_PINNED);
}
-
-static inline void init_hrtick(void)
-{
-}
#endif /* CONFIG_SMP */
static void init_rq_hrtick(struct rq *rq)
@@ -315,10 +353,6 @@
static inline void init_rq_hrtick(struct rq *rq)
{
}
-
-static inline void init_hrtick(void)
-{
-}
#endif /* CONFIG_SCHED_HRTICK */
/*
@@ -400,7 +434,7 @@
* wakeup due to that.
*
* This cmpxchg() implies a full barrier, which pairs with the write
- * barrier implied by the wakeup in wake_up_list().
+ * barrier implied by the wakeup in wake_up_q().
*/
if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
return;
@@ -499,7 +533,10 @@
rcu_read_lock();
for_each_domain(cpu, sd) {
for_each_cpu(i, sched_domain_span(sd)) {
- if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
+ if (cpu == i)
+ continue;
+
+ if (!idle_cpu(i) && is_housekeeping_cpu(i)) {
cpu = i;
goto unlock;
}
@@ -1085,12 +1122,20 @@
static int __set_cpus_allowed_ptr(struct task_struct *p,
const struct cpumask *new_mask, bool check)
{
- unsigned long flags;
- struct rq *rq;
+ const struct cpumask *cpu_valid_mask = cpu_active_mask;
unsigned int dest_cpu;
+ struct rq_flags rf;
+ struct rq *rq;
int ret = 0;
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
+
+ if (p->flags & PF_KTHREAD) {
+ /*
+ * Kernel threads are allowed on online && !active CPUs
+ */
+ cpu_valid_mask = cpu_online_mask;
+ }
/*
* Must re-check here, to close a race against __kthread_bind(),
@@ -1104,22 +1149,32 @@
if (cpumask_equal(&p->cpus_allowed, new_mask))
goto out;
- if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
ret = -EINVAL;
goto out;
}
do_set_cpus_allowed(p, new_mask);
+ if (p->flags & PF_KTHREAD) {
+ /*
+ * For kernel threads that do indeed end up on online &&
+ * !active we want to ensure they are strict per-cpu threads.
+ */
+ WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) &&
+ !cpumask_intersects(new_mask, cpu_active_mask) &&
+ p->nr_cpus_allowed != 1);
+ }
+
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
goto out;
- dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+ dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
if (task_running(rq, p) || p->state == TASK_WAKING) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
@@ -1128,12 +1183,12 @@
* OK, since we're going to drop the lock immediately
* afterwards anyway.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
rq = move_queued_task(rq, p, dest_cpu);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, rf.cookie);
}
out:
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return ret;
}
@@ -1317,8 +1372,8 @@
*/
unsigned long wait_task_inactive(struct task_struct *p, long match_state)
{
- unsigned long flags;
int running, queued;
+ struct rq_flags rf;
unsigned long ncsw;
struct rq *rq;
@@ -1353,14 +1408,14 @@
* lock now, to be *sure*. If we're wrong, we'll
* just go back and repeat.
*/
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
trace_sched_wait_task(p);
running = task_running(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
/*
* If it changed from the expected state, bail out now.
@@ -1434,6 +1489,25 @@
/*
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
+ *
+ * A few notes on cpu_active vs cpu_online:
+ *
+ * - cpu_active must be a subset of cpu_online
+ *
+ * - on cpu-up we allow per-cpu kthreads on the online && !active cpu,
+ * see __set_cpus_allowed_ptr(). At this point the newly online
+ * cpu isn't yet part of the sched domains, and balancing will not
+ * see it.
+ *
+ * - on cpu-down we clear cpu_active() to mask the sched domains and
+ * avoid the load balancer to place new tasks on the to be removed
+ * cpu. Existing tasks will remain running there and will be taken
+ * off.
+ *
+ * This means that fallback selection must not select !active CPUs.
+ * And can assume that any active CPU must be online. Conversely
+ * select_task_rq() below may allow selection of !active CPUs in order
+ * to satisfy the above rules.
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
@@ -1452,8 +1526,6 @@
/* Look for allowed, online CPU in same node. */
for_each_cpu(dest_cpu, nodemask) {
- if (!cpu_online(dest_cpu))
- continue;
if (!cpu_active(dest_cpu))
continue;
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
@@ -1464,8 +1536,6 @@
for (;;) {
/* Any allowed, online CPU? */
for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
- if (!cpu_online(dest_cpu))
- continue;
if (!cpu_active(dest_cpu))
continue;
goto out;
@@ -1515,8 +1585,10 @@
{
lockdep_assert_held(&p->pi_lock);
- if (p->nr_cpus_allowed > 1)
+ if (tsk_nr_cpus_allowed(p) > 1)
cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags);
+ else
+ cpu = cpumask_any(tsk_cpus_allowed(p));
/*
* In order not to call set_task_cpu() on a blocking task we need
@@ -1604,8 +1676,8 @@
/*
* Mark the task runnable and perform wakeup-preemption.
*/
-static void
-ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
+static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
+ struct pin_cookie cookie)
{
check_preempt_curr(rq, p, wake_flags);
p->state = TASK_RUNNING;
@@ -1617,9 +1689,9 @@
* Our task @p is fully woken up and running; so its safe to
* drop the rq->lock, hereafter rq is only used for statistics.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
p->sched_class->task_woken(rq, p);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, cookie);
}
if (rq->idle_stamp) {
@@ -1637,17 +1709,23 @@
}
static void
-ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
+ struct pin_cookie cookie)
{
+ int en_flags = ENQUEUE_WAKEUP;
+
lockdep_assert_held(&rq->lock);
#ifdef CONFIG_SMP
if (p->sched_contributes_to_load)
rq->nr_uninterruptible--;
+
+ if (wake_flags & WF_MIGRATED)
+ en_flags |= ENQUEUE_MIGRATED;
#endif
- ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
- ttwu_do_wakeup(rq, p, wake_flags);
+ ttwu_activate(rq, p, en_flags);
+ ttwu_do_wakeup(rq, p, wake_flags, cookie);
}
/*
@@ -1658,17 +1736,18 @@
*/
static int ttwu_remote(struct task_struct *p, int wake_flags)
{
+ struct rq_flags rf;
struct rq *rq;
int ret = 0;
- rq = __task_rq_lock(p);
+ rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) {
/* check_preempt_curr() may use rq clock */
update_rq_clock(rq);
- ttwu_do_wakeup(rq, p, wake_flags);
+ ttwu_do_wakeup(rq, p, wake_flags, rf.cookie);
ret = 1;
}
- __task_rq_unlock(rq);
+ __task_rq_unlock(rq, &rf);
return ret;
}
@@ -1678,6 +1757,7 @@
{
struct rq *rq = this_rq();
struct llist_node *llist = llist_del_all(&rq->wake_list);
+ struct pin_cookie cookie;
struct task_struct *p;
unsigned long flags;
@@ -1685,15 +1765,19 @@
return;
raw_spin_lock_irqsave(&rq->lock, flags);
- lockdep_pin_lock(&rq->lock);
+ cookie = lockdep_pin_lock(&rq->lock);
while (llist) {
p = llist_entry(llist, struct task_struct, wake_entry);
llist = llist_next(llist);
- ttwu_do_activate(rq, p, 0);
+ /*
+ * See ttwu_queue(); we only call ttwu_queue_remote() when
+ * its a x-cpu wakeup.
+ */
+ ttwu_do_activate(rq, p, WF_MIGRATED, cookie);
}
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@@ -1777,9 +1861,10 @@
}
#endif /* CONFIG_SMP */
-static void ttwu_queue(struct task_struct *p, int cpu)
+static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
{
struct rq *rq = cpu_rq(cpu);
+ struct pin_cookie cookie;
#if defined(CONFIG_SMP)
if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
@@ -1790,9 +1875,9 @@
#endif
raw_spin_lock(&rq->lock);
- lockdep_pin_lock(&rq->lock);
- ttwu_do_activate(rq, p, 0);
- lockdep_unpin_lock(&rq->lock);
+ cookie = lockdep_pin_lock(&rq->lock);
+ ttwu_do_activate(rq, p, wake_flags, cookie);
+ lockdep_unpin_lock(&rq->lock, cookie);
raw_spin_unlock(&rq->lock);
}
@@ -1961,9 +2046,6 @@
p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
- if (p->sched_class->task_waking)
- p->sched_class->task_waking(p);
-
cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
if (task_cpu(p) != cpu) {
wake_flags |= WF_MIGRATED;
@@ -1971,7 +2053,7 @@
}
#endif /* CONFIG_SMP */
- ttwu_queue(p, cpu);
+ ttwu_queue(p, cpu, wake_flags);
stat:
if (schedstat_enabled())
ttwu_stat(p, cpu, wake_flags);
@@ -1989,7 +2071,7 @@
* ensure that this_rq() is locked, @p is bound to this_rq() and not
* the current task.
*/
-static void try_to_wake_up_local(struct task_struct *p)
+static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie)
{
struct rq *rq = task_rq(p);
@@ -2006,11 +2088,11 @@
* disabled avoiding further scheduler activity on it and we've
* not yet picked a replacement task.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
raw_spin_unlock(&rq->lock);
raw_spin_lock(&p->pi_lock);
raw_spin_lock(&rq->lock);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, cookie);
}
if (!(p->state & TASK_NORMAL))
@@ -2021,7 +2103,7 @@
if (!task_on_rq_queued(p))
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
- ttwu_do_wakeup(rq, p, 0);
+ ttwu_do_wakeup(rq, p, 0, cookie);
if (schedstat_enabled())
ttwu_stat(p, smp_processor_id(), 0);
out:
@@ -2381,7 +2463,8 @@
u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
int cpus, err = -1;
- if (new_bw == p->dl.dl_bw)
+ /* !deadline task may carry old deadline bandwidth */
+ if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
return 0;
/*
@@ -2420,12 +2503,12 @@
*/
void wake_up_new_task(struct task_struct *p)
{
- unsigned long flags;
+ struct rq_flags rf;
struct rq *rq;
- raw_spin_lock_irqsave(&p->pi_lock, flags);
/* Initialize new task's runnable average */
init_entity_runnable_average(&p->se);
+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
@@ -2434,8 +2517,10 @@
*/
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
#endif
+ /* Post initialize new task's util average when its cfs_rq is set */
+ post_init_entity_util_avg(&p->se);
- rq = __task_rq_lock(p);
+ rq = __task_rq_lock(p, &rf);
activate_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p);
@@ -2446,12 +2531,12 @@
* Nothing relies on rq->lock after this, so its fine to
* drop it.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
p->sched_class->task_woken(rq, p);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, rf.cookie);
}
#endif
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
}
#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -2713,7 +2798,7 @@
*/
static __always_inline struct rq *
context_switch(struct rq *rq, struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next, struct pin_cookie cookie)
{
struct mm_struct *mm, *oldmm;
@@ -2733,7 +2818,7 @@
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
- switch_mm(oldmm, mm, next);
+ switch_mm_irqs_off(oldmm, mm, next);
if (!prev->mm) {
prev->active_mm = NULL;
@@ -2745,7 +2830,7 @@
* of the scheduler it's an obvious special-case), so we
* do an early lockdep release here:
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
/* Here we just switch the register state and the stack. */
@@ -2867,7 +2952,7 @@
*/
unsigned long long task_sched_runtime(struct task_struct *p)
{
- unsigned long flags;
+ struct rq_flags rf;
struct rq *rq;
u64 ns;
@@ -2887,7 +2972,7 @@
return p->se.sum_exec_runtime;
#endif
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
/*
* Must be ->curr _and_ ->on_rq. If dequeued, we would
* project cycles that may never be accounted to this
@@ -2898,7 +2983,7 @@
p->sched_class->update_curr(rq);
}
ns = p->se.sum_exec_runtime;
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return ns;
}
@@ -2918,7 +3003,7 @@
raw_spin_lock(&rq->lock);
update_rq_clock(rq);
curr->sched_class->task_tick(rq, curr, 0);
- update_cpu_load_active(rq);
+ cpu_load_update_active(rq);
calc_global_load_tick(rq);
raw_spin_unlock(&rq->lock);
@@ -2961,6 +3046,20 @@
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
defined(CONFIG_PREEMPT_TRACER))
+/*
+ * If the value passed in is equal to the current preempt count
+ * then we just disabled preemption. Start timing the latency.
+ */
+static inline void preempt_latency_start(int val)
+{
+ if (preempt_count() == val) {
+ unsigned long ip = get_lock_parent_ip();
+#ifdef CONFIG_DEBUG_PREEMPT
+ current->preempt_disable_ip = ip;
+#endif
+ trace_preempt_off(CALLER_ADDR0, ip);
+ }
+}
void preempt_count_add(int val)
{
@@ -2979,17 +3078,21 @@
DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
PREEMPT_MASK - 10);
#endif
- if (preempt_count() == val) {
- unsigned long ip = get_lock_parent_ip();
-#ifdef CONFIG_DEBUG_PREEMPT
- current->preempt_disable_ip = ip;
-#endif
- trace_preempt_off(CALLER_ADDR0, ip);
- }
+ preempt_latency_start(val);
}
EXPORT_SYMBOL(preempt_count_add);
NOKPROBE_SYMBOL(preempt_count_add);
+/*
+ * If the value passed in equals to the current preempt count
+ * then we just enabled preemption. Stop timing the latency.
+ */
+static inline void preempt_latency_stop(int val)
+{
+ if (preempt_count() == val)
+ trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+}
+
void preempt_count_sub(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
@@ -3006,13 +3109,15 @@
return;
#endif
- if (preempt_count() == val)
- trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+ preempt_latency_stop(val);
__preempt_count_sub(val);
}
EXPORT_SYMBOL(preempt_count_sub);
NOKPROBE_SYMBOL(preempt_count_sub);
+#else
+static inline void preempt_latency_start(int val) { }
+static inline void preempt_latency_stop(int val) { }
#endif
/*
@@ -3065,7 +3170,7 @@
* Pick up the highest-prio task:
*/
static inline struct task_struct *
-pick_next_task(struct rq *rq, struct task_struct *prev)
+pick_next_task(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
const struct sched_class *class = &fair_sched_class;
struct task_struct *p;
@@ -3076,20 +3181,20 @@
*/
if (likely(prev->sched_class == class &&
rq->nr_running == rq->cfs.h_nr_running)) {
- p = fair_sched_class.pick_next_task(rq, prev);
+ p = fair_sched_class.pick_next_task(rq, prev, cookie);
if (unlikely(p == RETRY_TASK))
goto again;
/* assumes fair_sched_class->next == idle_sched_class */
if (unlikely(!p))
- p = idle_sched_class.pick_next_task(rq, prev);
+ p = idle_sched_class.pick_next_task(rq, prev, cookie);
return p;
}
again:
for_each_class(class) {
- p = class->pick_next_task(rq, prev);
+ p = class->pick_next_task(rq, prev, cookie);
if (p) {
if (unlikely(p == RETRY_TASK))
goto again;
@@ -3143,6 +3248,7 @@
{
struct task_struct *prev, *next;
unsigned long *switch_count;
+ struct pin_cookie cookie;
struct rq *rq;
int cpu;
@@ -3176,7 +3282,7 @@
*/
smp_mb__before_spinlock();
raw_spin_lock(&rq->lock);
- lockdep_pin_lock(&rq->lock);
+ cookie = lockdep_pin_lock(&rq->lock);
rq->clock_skip_update <<= 1; /* promote REQ to ACT */
@@ -3198,7 +3304,7 @@
to_wakeup = wq_worker_sleeping(prev);
if (to_wakeup)
- try_to_wake_up_local(to_wakeup);
+ try_to_wake_up_local(to_wakeup, cookie);
}
}
switch_count = &prev->nvcsw;
@@ -3207,7 +3313,7 @@
if (task_on_rq_queued(prev))
update_rq_clock(rq);
- next = pick_next_task(rq, prev);
+ next = pick_next_task(rq, prev, cookie);
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
rq->clock_skip_update = 0;
@@ -3218,9 +3324,9 @@
++*switch_count;
trace_sched_switch(preempt, prev, next);
- rq = context_switch(rq, prev, next); /* unlocks the rq */
+ rq = context_switch(rq, prev, next, cookie); /* unlocks the rq */
} else {
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
raw_spin_unlock_irq(&rq->lock);
}
@@ -3287,8 +3393,23 @@
static void __sched notrace preempt_schedule_common(void)
{
do {
+ /*
+ * Because the function tracer can trace preempt_count_sub()
+ * and it also uses preempt_enable/disable_notrace(), if
+ * NEED_RESCHED is set, the preempt_enable_notrace() called
+ * by the function tracer will call this function again and
+ * cause infinite recursion.
+ *
+ * Preemption must be disabled here before the function
+ * tracer can trace. Break up preempt_disable() into two
+ * calls. One to disable preemption without fear of being
+ * traced. The other to still record the preemption latency,
+ * which can also be traced by the function tracer.
+ */
preempt_disable_notrace();
+ preempt_latency_start(1);
__schedule(true);
+ preempt_latency_stop(1);
preempt_enable_no_resched_notrace();
/*
@@ -3340,7 +3461,21 @@
return;
do {
+ /*
+ * Because the function tracer can trace preempt_count_sub()
+ * and it also uses preempt_enable/disable_notrace(), if
+ * NEED_RESCHED is set, the preempt_enable_notrace() called
+ * by the function tracer will call this function again and
+ * cause infinite recursion.
+ *
+ * Preemption must be disabled here before the function
+ * tracer can trace. Break up preempt_disable() into two
+ * calls. One to disable preemption without fear of being
+ * traced. The other to still record the preemption latency,
+ * which can also be traced by the function tracer.
+ */
preempt_disable_notrace();
+ preempt_latency_start(1);
/*
* Needs preempt disabled in case user_exit() is traced
* and the tracer calls preempt_enable_notrace() causing
@@ -3350,6 +3485,7 @@
__schedule(true);
exception_exit(prev_ctx);
+ preempt_latency_stop(1);
preempt_enable_no_resched_notrace();
} while (need_resched());
}
@@ -3406,12 +3542,13 @@
void rt_mutex_setprio(struct task_struct *p, int prio)
{
int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
- struct rq *rq;
const struct sched_class *prev_class;
+ struct rq_flags rf;
+ struct rq *rq;
BUG_ON(prio > MAX_PRIO);
- rq = __task_rq_lock(p);
+ rq = __task_rq_lock(p, &rf);
/*
* Idle task boosting is a nono in general. There is one
@@ -3487,7 +3624,7 @@
check_class_changed(rq, p, prev_class, oldprio);
out_unlock:
preempt_disable(); /* avoid rq from going away on us */
- __task_rq_unlock(rq);
+ __task_rq_unlock(rq, &rf);
balance_callback(rq);
preempt_enable();
@@ -3497,7 +3634,7 @@
void set_user_nice(struct task_struct *p, long nice)
{
int old_prio, delta, queued;
- unsigned long flags;
+ struct rq_flags rf;
struct rq *rq;
if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
@@ -3506,7 +3643,7 @@
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
*/
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
/*
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
@@ -3537,7 +3674,7 @@
resched_curr(rq);
}
out_unlock:
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
}
EXPORT_SYMBOL(set_user_nice);
@@ -3834,11 +3971,11 @@
MAX_RT_PRIO - 1 - attr->sched_priority;
int retval, oldprio, oldpolicy = -1, queued, running;
int new_effective_prio, policy = attr->sched_policy;
- unsigned long flags;
const struct sched_class *prev_class;
- struct rq *rq;
+ struct rq_flags rf;
int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
+ struct rq *rq;
/* may grab non-irq protected spin_locks */
BUG_ON(in_interrupt());
@@ -3933,13 +4070,13 @@
* To be able to change p->policy safely, the appropriate
* runqueue lock must be held.
*/
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
/*
* Changing the policy of the stop threads its a very bad idea
*/
if (p == rq->stop) {
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return -EINVAL;
}
@@ -3956,7 +4093,7 @@
goto change;
p->sched_reset_on_fork = reset_on_fork;
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return 0;
}
change:
@@ -3970,7 +4107,7 @@
if (rt_bandwidth_enabled() && rt_policy(policy) &&
task_group(p)->rt_bandwidth.rt_runtime == 0 &&
!task_group_is_autogroup(task_group(p))) {
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return -EPERM;
}
#endif
@@ -3985,7 +4122,7 @@
*/
if (!cpumask_subset(span, &p->cpus_allowed) ||
rq->rd->dl_bw.bw == 0) {
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return -EPERM;
}
}
@@ -3995,7 +4132,7 @@
/* recheck policy now with rq lock held */
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
goto recheck;
}
@@ -4005,7 +4142,7 @@
* is available.
*/
if ((dl_policy(policy) || dl_task(p)) && dl_overflow(p, policy, attr)) {
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
return -EBUSY;
}
@@ -4050,7 +4187,7 @@
check_class_changed(rq, p, prev_class, oldprio);
preempt_disable(); /* avoid rq from going away on us */
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
if (pi)
rt_mutex_adjust_pi(p);
@@ -4903,10 +5040,10 @@
{
struct task_struct *p;
unsigned int time_slice;
- unsigned long flags;
+ struct rq_flags rf;
+ struct timespec t;
struct rq *rq;
int retval;
- struct timespec t;
if (pid < 0)
return -EINVAL;
@@ -4921,11 +5058,11 @@
if (retval)
goto out_unlock;
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
time_slice = 0;
if (p->sched_class->get_rr_interval)
time_slice = p->sched_class->get_rr_interval(rq, p);
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
rcu_read_unlock();
jiffies_to_timespec(time_slice, &t);
@@ -5001,7 +5138,8 @@
touch_all_softlockup_watchdogs();
#ifdef CONFIG_SCHED_DEBUG
- sysrq_sched_debug_show();
+ if (!state_filter)
+ sysrq_sched_debug_show();
#endif
rcu_read_unlock();
/*
@@ -5163,6 +5301,8 @@
#ifdef CONFIG_SMP
+static bool sched_smp_initialized __read_mostly;
+
#ifdef CONFIG_NUMA_BALANCING
/* Migrate current task p to target_cpu */
int migrate_task_to(struct task_struct *p, int target_cpu)
@@ -5188,11 +5328,11 @@
*/
void sched_setnuma(struct task_struct *p, int nid)
{
- struct rq *rq;
- unsigned long flags;
bool queued, running;
+ struct rq_flags rf;
+ struct rq *rq;
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
queued = task_on_rq_queued(p);
running = task_current(rq, p);
@@ -5207,7 +5347,7 @@
p->sched_class->set_curr_task(rq);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE);
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
}
#endif /* CONFIG_NUMA_BALANCING */
@@ -5223,7 +5363,7 @@
BUG_ON(cpu_online(smp_processor_id()));
if (mm != &init_mm) {
- switch_mm(mm, &init_mm, current);
+ switch_mm_irqs_off(mm, &init_mm, current);
finish_arch_post_lock_switch();
}
mmdrop(mm);
@@ -5271,6 +5411,7 @@
{
struct rq *rq = dead_rq;
struct task_struct *next, *stop = rq->stop;
+ struct pin_cookie cookie;
int dest_cpu;
/*
@@ -5302,8 +5443,8 @@
/*
* pick_next_task assumes pinned rq->lock.
*/
- lockdep_pin_lock(&rq->lock);
- next = pick_next_task(rq, &fake_task);
+ cookie = lockdep_pin_lock(&rq->lock);
+ next = pick_next_task(rq, &fake_task, cookie);
BUG_ON(!next);
next->sched_class->put_prev_task(rq, next);
@@ -5316,7 +5457,7 @@
* because !cpu_active at this point, which means load-balance
* will not interfere. Also, stop-machine.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
raw_spin_unlock(&rq->lock);
raw_spin_lock(&next->pi_lock);
raw_spin_lock(&rq->lock);
@@ -5377,127 +5518,13 @@
}
}
-/*
- * migration_call - callback that gets triggered when a CPU is added.
- * Here we can start up the necessary migration thread for the new CPU.
- */
-static int
-migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static void set_cpu_rq_start_time(unsigned int cpu)
{
- int cpu = (long)hcpu;
- unsigned long flags;
struct rq *rq = cpu_rq(cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
-
- case CPU_UP_PREPARE:
- rq->calc_load_update = calc_load_update;
- account_reset_rq(rq);
- break;
-
- case CPU_ONLINE:
- /* Update our root-domain */
- raw_spin_lock_irqsave(&rq->lock, flags);
- if (rq->rd) {
- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
-
- set_rq_online(rq);
- }
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- break;
-
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DYING:
- sched_ttwu_pending();
- /* Update our root-domain */
- raw_spin_lock_irqsave(&rq->lock, flags);
- if (rq->rd) {
- BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
- set_rq_offline(rq);
- }
- migrate_tasks(rq);
- BUG_ON(rq->nr_running != 1); /* the migration thread */
- raw_spin_unlock_irqrestore(&rq->lock, flags);
- break;
-
- case CPU_DEAD:
- calc_load_migrate(rq);
- break;
-#endif
- }
-
- update_max_interval();
-
- return NOTIFY_OK;
-}
-
-/*
- * Register at high priority so that task migration (migrate_all_tasks)
- * happens before everything else. This has to be lower priority than
- * the notifier in the perf_event subsystem, though.
- */
-static struct notifier_block migration_notifier = {
- .notifier_call = migration_call,
- .priority = CPU_PRI_MIGRATION,
-};
-
-static void set_cpu_rq_start_time(void)
-{
- int cpu = smp_processor_id();
- struct rq *rq = cpu_rq(cpu);
rq->age_stamp = sched_clock_cpu(cpu);
}
-static int sched_cpu_active(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_STARTING:
- set_cpu_rq_start_time();
- return NOTIFY_OK;
-
- case CPU_DOWN_FAILED:
- set_cpu_active(cpu, true);
- return NOTIFY_OK;
-
- default:
- return NOTIFY_DONE;
- }
-}
-
-static int sched_cpu_inactive(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DOWN_PREPARE:
- set_cpu_active((long)hcpu, false);
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
-}
-
-static int __init migration_init(void)
-{
- void *cpu = (void *)(long)smp_processor_id();
- int err;
-
- /* Initialize migration for the boot CPU */
- err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
- BUG_ON(err == NOTIFY_BAD);
- migration_call(&migration_notifier, CPU_ONLINE, cpu);
- register_cpu_notifier(&migration_notifier);
-
- /* Register cpu active notifiers */
- cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
- cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
-
- return 0;
-}
-early_initcall(migration_init);
-
static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */
#ifdef CONFIG_SCHED_DEBUG
@@ -6645,10 +6672,10 @@
init_numa_topology_type();
}
-static void sched_domains_numa_masks_set(int cpu)
+static void sched_domains_numa_masks_set(unsigned int cpu)
{
- int i, j;
int node = cpu_to_node(cpu);
+ int i, j;
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) {
@@ -6658,51 +6685,20 @@
}
}
-static void sched_domains_numa_masks_clear(int cpu)
+static void sched_domains_numa_masks_clear(unsigned int cpu)
{
int i, j;
+
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++)
cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
}
}
-/*
- * Update sched_domains_numa_masks[level][node] array when new cpus
- * are onlined.
- */
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- sched_domains_numa_masks_set(cpu);
- break;
-
- case CPU_DEAD:
- sched_domains_numa_masks_clear(cpu);
- break;
-
- default:
- return NOTIFY_DONE;
- }
-
- return NOTIFY_OK;
-}
#else
-static inline void sched_init_numa(void)
-{
-}
-
-static int sched_domains_numa_masks_update(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
-{
- return 0;
-}
+static inline void sched_init_numa(void) { }
+static void sched_domains_numa_masks_set(unsigned int cpu) { }
+static void sched_domains_numa_masks_clear(unsigned int cpu) { }
#endif /* CONFIG_NUMA */
static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -7092,13 +7088,9 @@
* If we come here as part of a suspend/resume, don't touch cpusets because we
* want to restore it back to its original state upon resume anyway.
*/
-static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static void cpuset_cpu_active(void)
{
- switch (action) {
- case CPU_ONLINE_FROZEN:
- case CPU_DOWN_FAILED_FROZEN:
-
+ if (cpuhp_tasks_frozen) {
/*
* num_cpus_frozen tracks how many CPUs are involved in suspend
* resume sequence. As long as this is not the last online
@@ -7108,35 +7100,25 @@
num_cpus_frozen--;
if (likely(num_cpus_frozen)) {
partition_sched_domains(1, NULL, NULL);
- break;
+ return;
}
-
/*
* This is the last CPU online operation. So fall through and
* restore the original sched domains by considering the
* cpuset configurations.
*/
-
- case CPU_ONLINE:
- cpuset_update_active_cpus(true);
- break;
- default:
- return NOTIFY_DONE;
}
- return NOTIFY_OK;
+ cpuset_update_active_cpus(true);
}
-static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+static int cpuset_cpu_inactive(unsigned int cpu)
{
unsigned long flags;
- long cpu = (long)hcpu;
struct dl_bw *dl_b;
bool overflow;
int cpus;
- switch (action) {
- case CPU_DOWN_PREPARE:
+ if (!cpuhp_tasks_frozen) {
rcu_read_lock_sched();
dl_b = dl_bw_of(cpu);
@@ -7148,19 +7130,120 @@
rcu_read_unlock_sched();
if (overflow)
- return notifier_from_errno(-EBUSY);
+ return -EBUSY;
cpuset_update_active_cpus(false);
- break;
- case CPU_DOWN_PREPARE_FROZEN:
+ } else {
num_cpus_frozen++;
partition_sched_domains(1, NULL, NULL);
- break;
- default:
- return NOTIFY_DONE;
}
- return NOTIFY_OK;
+ return 0;
}
+int sched_cpu_activate(unsigned int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+
+ set_cpu_active(cpu, true);
+
+ if (sched_smp_initialized) {
+ sched_domains_numa_masks_set(cpu);
+ cpuset_cpu_active();
+ }
+
+ /*
+ * Put the rq online, if not already. This happens:
+ *
+ * 1) In the early boot process, because we build the real domains
+ * after all cpus have been brought up.
+ *
+ * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
+ * domains.
+ */
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ if (rq->rd) {
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+ set_rq_online(rq);
+ }
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+ update_max_interval();
+
+ return 0;
+}
+
+int sched_cpu_deactivate(unsigned int cpu)
+{
+ int ret;
+
+ set_cpu_active(cpu, false);
+ /*
+ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
+ * users of this state to go away such that all new such users will
+ * observe it.
+ *
+ * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
+ * not imply sync_sched(), so wait for both.
+ *
+ * Do sync before park smpboot threads to take care the rcu boost case.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT))
+ synchronize_rcu_mult(call_rcu, call_rcu_sched);
+ else
+ synchronize_rcu();
+
+ if (!sched_smp_initialized)
+ return 0;
+
+ ret = cpuset_cpu_inactive(cpu);
+ if (ret) {
+ set_cpu_active(cpu, true);
+ return ret;
+ }
+ sched_domains_numa_masks_clear(cpu);
+ return 0;
+}
+
+static void sched_rq_cpu_starting(unsigned int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+ rq->calc_load_update = calc_load_update;
+ account_reset_rq(rq);
+ update_max_interval();
+}
+
+int sched_cpu_starting(unsigned int cpu)
+{
+ set_cpu_rq_start_time(cpu);
+ sched_rq_cpu_starting(cpu);
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+int sched_cpu_dying(unsigned int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
+
+ /* Handle pending wakeups and then migrate everything off */
+ sched_ttwu_pending();
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ if (rq->rd) {
+ BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+ set_rq_offline(rq);
+ }
+ migrate_tasks(rq);
+ BUG_ON(rq->nr_running != 1);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ calc_load_migrate(rq);
+ update_max_interval();
+ nohz_balance_exit_idle(cpu);
+ hrtick_clear(rq);
+ return 0;
+}
+#endif
+
void __init sched_init_smp(void)
{
cpumask_var_t non_isolated_cpus;
@@ -7182,12 +7265,6 @@
cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
- hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
- hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
- hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
-
- init_hrtick();
-
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
BUG();
@@ -7196,7 +7273,16 @@
init_sched_rt_class();
init_sched_dl_class();
+ sched_smp_initialized = true;
}
+
+static int __init migration_init(void)
+{
+ sched_rq_cpu_starting(smp_processor_id());
+ return 0;
+}
+early_initcall(migration_init);
+
#else
void __init sched_init_smp(void)
{
@@ -7331,8 +7417,6 @@
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
rq->cpu_load[j] = 0;
- rq->last_load_update_tick = jiffies;
-
#ifdef CONFIG_SMP
rq->sd = NULL;
rq->rd = NULL;
@@ -7351,12 +7435,13 @@
rq_attach_root(rq, &def_root_domain);
#ifdef CONFIG_NO_HZ_COMMON
+ rq->last_load_update_tick = jiffies;
rq->nohz_flags = 0;
#endif
#ifdef CONFIG_NO_HZ_FULL
rq->last_sched_tick = 0;
#endif
-#endif
+#endif /* CONFIG_SMP */
init_rq_hrtick(rq);
atomic_set(&rq->nr_iowait, 0);
}
@@ -7394,7 +7479,7 @@
if (cpu_isolated_map == NULL)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
- set_cpu_rq_start_time();
+ set_cpu_rq_start_time(smp_processor_id());
#endif
init_sched_fair_class();
@@ -7639,10 +7724,10 @@
{
struct task_group *tg;
int queued, running;
- unsigned long flags;
+ struct rq_flags rf;
struct rq *rq;
- rq = task_rq_lock(tsk, &flags);
+ rq = task_rq_lock(tsk, &rf);
running = task_current(rq, tsk);
queued = task_on_rq_queued(tsk);
@@ -7674,7 +7759,7 @@
if (queued)
enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
- task_rq_unlock(rq, tsk, &flags);
+ task_rq_unlock(rq, tsk, &rf);
}
#endif /* CONFIG_CGROUP_SCHED */
@@ -7894,7 +7979,7 @@
static int sched_rt_global_constraints(void)
{
unsigned long flags;
- int i, ret = 0;
+ int i;
raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
for_each_possible_cpu(i) {
@@ -7906,7 +7991,7 @@
}
raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
- return ret;
+ return 0;
}
#endif /* CONFIG_RT_GROUP_SCHED */
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 4a81120..41f85c4 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -25,11 +25,22 @@
CPUACCT_STAT_NSTATS,
};
+enum cpuacct_usage_index {
+ CPUACCT_USAGE_USER, /* ... user mode */
+ CPUACCT_USAGE_SYSTEM, /* ... kernel mode */
+
+ CPUACCT_USAGE_NRUSAGE,
+};
+
+struct cpuacct_usage {
+ u64 usages[CPUACCT_USAGE_NRUSAGE];
+};
+
/* track cpu usage of a group of tasks and its child groups */
struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
- u64 __percpu *cpuusage;
+ struct cpuacct_usage __percpu *cpuusage;
struct kernel_cpustat __percpu *cpustat;
};
@@ -49,7 +60,7 @@
return css_ca(ca->css.parent);
}
-static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
+static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage);
static struct cpuacct root_cpuacct = {
.cpustat = &kernel_cpustat,
.cpuusage = &root_cpuacct_cpuusage,
@@ -68,7 +79,7 @@
if (!ca)
goto out;
- ca->cpuusage = alloc_percpu(u64);
+ ca->cpuusage = alloc_percpu(struct cpuacct_usage);
if (!ca->cpuusage)
goto out_free_ca;
@@ -96,20 +107,37 @@
kfree(ca);
}
-static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
+ enum cpuacct_usage_index index)
{
- u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
u64 data;
+ /*
+ * We allow index == CPUACCT_USAGE_NRUSAGE here to read
+ * the sum of suages.
+ */
+ BUG_ON(index > CPUACCT_USAGE_NRUSAGE);
+
#ifndef CONFIG_64BIT
/*
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
*/
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
- data = *cpuusage;
+#endif
+
+ if (index == CPUACCT_USAGE_NRUSAGE) {
+ int i = 0;
+
+ data = 0;
+ for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+ data += cpuusage->usages[i];
+ } else {
+ data = cpuusage->usages[index];
+ }
+
+#ifndef CONFIG_64BIT
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
-#else
- data = *cpuusage;
#endif
return data;
@@ -117,69 +145,103 @@
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
- u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ int i;
#ifndef CONFIG_64BIT
/*
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
*/
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
- *cpuusage = val;
+#endif
+
+ for (i = 0; i < CPUACCT_USAGE_NRUSAGE; i++)
+ cpuusage->usages[i] = val;
+
+#ifndef CONFIG_64BIT
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
-#else
- *cpuusage = val;
#endif
}
/* return total cpu usage (in nanoseconds) of a group */
-static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
+static u64 __cpuusage_read(struct cgroup_subsys_state *css,
+ enum cpuacct_usage_index index)
{
struct cpuacct *ca = css_ca(css);
u64 totalcpuusage = 0;
int i;
- for_each_present_cpu(i)
- totalcpuusage += cpuacct_cpuusage_read(ca, i);
+ for_each_possible_cpu(i)
+ totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
return totalcpuusage;
}
+static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ return __cpuusage_read(css, CPUACCT_USAGE_USER);
+}
+
+static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
+ struct cftype *cft)
+{
+ return __cpuusage_read(css, CPUACCT_USAGE_SYSTEM);
+}
+
+static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ return __cpuusage_read(css, CPUACCT_USAGE_NRUSAGE);
+}
+
static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
u64 val)
{
struct cpuacct *ca = css_ca(css);
- int err = 0;
- int i;
+ int cpu;
/*
* Only allow '0' here to do a reset.
*/
- if (val) {
- err = -EINVAL;
- goto out;
- }
+ if (val)
+ return -EINVAL;
- for_each_present_cpu(i)
- cpuacct_cpuusage_write(ca, i, 0);
+ for_each_possible_cpu(cpu)
+ cpuacct_cpuusage_write(ca, cpu, 0);
-out:
- return err;
+ return 0;
}
-static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+static int __cpuacct_percpu_seq_show(struct seq_file *m,
+ enum cpuacct_usage_index index)
{
struct cpuacct *ca = css_ca(seq_css(m));
u64 percpu;
int i;
- for_each_present_cpu(i) {
- percpu = cpuacct_cpuusage_read(ca, i);
+ for_each_possible_cpu(i) {
+ percpu = cpuacct_cpuusage_read(ca, i, index);
seq_printf(m, "%llu ", (unsigned long long) percpu);
}
seq_printf(m, "\n");
return 0;
}
+static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
+{
+ return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_USER);
+}
+
+static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
+{
+ return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_SYSTEM);
+}
+
+static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
+{
+ return __cpuacct_percpu_seq_show(m, CPUACCT_USAGE_NRUSAGE);
+}
+
static const char * const cpuacct_stat_desc[] = {
[CPUACCT_STAT_USER] = "user",
[CPUACCT_STAT_SYSTEM] = "system",
@@ -191,7 +253,7 @@
int cpu;
s64 val = 0;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
val += kcpustat->cpustat[CPUTIME_USER];
val += kcpustat->cpustat[CPUTIME_NICE];
@@ -200,7 +262,7 @@
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
val = 0;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
val += kcpustat->cpustat[CPUTIME_SYSTEM];
val += kcpustat->cpustat[CPUTIME_IRQ];
@@ -220,10 +282,26 @@
.write_u64 = cpuusage_write,
},
{
+ .name = "usage_user",
+ .read_u64 = cpuusage_user_read,
+ },
+ {
+ .name = "usage_sys",
+ .read_u64 = cpuusage_sys_read,
+ },
+ {
.name = "usage_percpu",
.seq_show = cpuacct_percpu_seq_show,
},
{
+ .name = "usage_percpu_user",
+ .seq_show = cpuacct_percpu_user_seq_show,
+ },
+ {
+ .name = "usage_percpu_sys",
+ .seq_show = cpuacct_percpu_sys_seq_show,
+ },
+ {
.name = "stat",
.seq_show = cpuacct_stats_show,
},
@@ -238,10 +316,17 @@
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
{
struct cpuacct *ca;
+ int index = CPUACCT_USAGE_SYSTEM;
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ if (regs && user_mode(regs))
+ index = CPUACCT_USAGE_USER;
rcu_read_lock();
+
for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
- *this_cpu_ptr(ca->cpuusage) += cputime;
+ this_cpu_ptr(ca->cpuusage)->usages[index] += cputime;
+
rcu_read_unlock();
}
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5a75b08..5be5882 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -103,10 +103,10 @@
const struct sched_dl_entity *dl_se = &p->dl;
if (later_mask &&
- cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+ cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) {
best_cpu = cpumask_any(later_mask);
goto out;
- } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
+ } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
best_cpu = cpudl_maximum(cp);
if (later_mask)
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 928c4ba..1141954 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -14,24 +14,50 @@
DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
- * cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
+ * cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer.
* @cpu: The CPU to set the pointer for.
* @data: New pointer value.
+ * @func: Callback function to set for the CPU.
*
- * Set and publish the update_util_data pointer for the given CPU. That pointer
- * points to a struct update_util_data object containing a callback function
- * to call from cpufreq_update_util(). That function will be called from an RCU
- * read-side critical section, so it must not sleep.
+ * Set and publish the update_util_data pointer for the given CPU.
+ *
+ * The update_util_data pointer of @cpu is set to @data and the callback
+ * function pointer in the target struct update_util_data is set to @func.
+ * That function will be called by cpufreq_update_util() from RCU-sched
+ * read-side critical sections, so it must not sleep. @data will always be
+ * passed to it as the first argument which allows the function to get to the
+ * target update_util_data structure and its container.
+ *
+ * The update_util_data pointer of @cpu must be NULL when this function is
+ * called or it will WARN() and return with no effect.
+ */
+void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned long util, unsigned long max))
+{
+ if (WARN_ON(!data || !func))
+ return;
+
+ if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
+ return;
+
+ data->func = func;
+ rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+}
+EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
+
+/**
+ * cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer.
+ * @cpu: The CPU to clear the pointer for.
+ *
+ * Clear the update_util_data pointer for the given CPU.
*
* Callers must use RCU-sched callbacks to free any memory that might be
* accessed via the old update_util_data pointer or invoke synchronize_sched()
* right after this function to avoid use-after-free.
*/
-void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
+void cpufreq_remove_update_util_hook(int cpu)
{
- if (WARN_ON(data && !data->func))
- return;
-
- rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
+ rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
}
-EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
+EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
new file mode 100644
index 0000000..154ae3a
--- /dev/null
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -0,0 +1,530 @@
+/*
+ * CPUFreq governor based on scheduler-provided CPU utilization data.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <trace/events/power.h>
+
+#include "sched.h"
+
+struct sugov_tunables {
+ struct gov_attr_set attr_set;
+ unsigned int rate_limit_us;
+};
+
+struct sugov_policy {
+ struct cpufreq_policy *policy;
+
+ struct sugov_tunables *tunables;
+ struct list_head tunables_hook;
+
+ raw_spinlock_t update_lock; /* For shared policies */
+ u64 last_freq_update_time;
+ s64 freq_update_delay_ns;
+ unsigned int next_freq;
+
+ /* The next fields are only needed if fast switch cannot be used. */
+ struct irq_work irq_work;
+ struct work_struct work;
+ struct mutex work_lock;
+ bool work_in_progress;
+
+ bool need_freq_update;
+};
+
+struct sugov_cpu {
+ struct update_util_data update_util;
+ struct sugov_policy *sg_policy;
+
+ /* The fields below are only needed when sharing a policy. */
+ unsigned long util;
+ unsigned long max;
+ u64 last_update;
+};
+
+static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
+
+/************************ Governor internals ***********************/
+
+static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
+{
+ s64 delta_ns;
+
+ if (sg_policy->work_in_progress)
+ return false;
+
+ if (unlikely(sg_policy->need_freq_update)) {
+ sg_policy->need_freq_update = false;
+ /*
+ * This happens when limits change, so forget the previous
+ * next_freq value and force an update.
+ */
+ sg_policy->next_freq = UINT_MAX;
+ return true;
+ }
+
+ delta_ns = time - sg_policy->last_freq_update_time;
+ return delta_ns >= sg_policy->freq_update_delay_ns;
+}
+
+static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
+ unsigned int next_freq)
+{
+ struct cpufreq_policy *policy = sg_policy->policy;
+
+ sg_policy->last_freq_update_time = time;
+
+ if (policy->fast_switch_enabled) {
+ if (sg_policy->next_freq == next_freq) {
+ trace_cpu_frequency(policy->cur, smp_processor_id());
+ return;
+ }
+ sg_policy->next_freq = next_freq;
+ next_freq = cpufreq_driver_fast_switch(policy, next_freq);
+ if (next_freq == CPUFREQ_ENTRY_INVALID)
+ return;
+
+ policy->cur = next_freq;
+ trace_cpu_frequency(next_freq, smp_processor_id());
+ } else if (sg_policy->next_freq != next_freq) {
+ sg_policy->next_freq = next_freq;
+ sg_policy->work_in_progress = true;
+ irq_work_queue(&sg_policy->irq_work);
+ }
+}
+
+/**
+ * get_next_freq - Compute a new frequency for a given cpufreq policy.
+ * @policy: cpufreq policy object to compute the new frequency for.
+ * @util: Current CPU utilization.
+ * @max: CPU capacity.
+ *
+ * If the utilization is frequency-invariant, choose the new frequency to be
+ * proportional to it, that is
+ *
+ * next_freq = C * max_freq * util / max
+ *
+ * Otherwise, approximate the would-be frequency-invariant utilization by
+ * util_raw * (curr_freq / max_freq) which leads to
+ *
+ * next_freq = C * curr_freq * util_raw / max
+ *
+ * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
+ */
+static unsigned int get_next_freq(struct cpufreq_policy *policy,
+ unsigned long util, unsigned long max)
+{
+ unsigned int freq = arch_scale_freq_invariant() ?
+ policy->cpuinfo.max_freq : policy->cur;
+
+ return (freq + (freq >> 2)) * util / max;
+}
+
+static void sugov_update_single(struct update_util_data *hook, u64 time,
+ unsigned long util, unsigned long max)
+{
+ struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+ struct cpufreq_policy *policy = sg_policy->policy;
+ unsigned int next_f;
+
+ if (!sugov_should_update_freq(sg_policy, time))
+ return;
+
+ next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
+ get_next_freq(policy, util, max);
+ sugov_update_commit(sg_policy, time, next_f);
+}
+
+static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
+ unsigned long util, unsigned long max)
+{
+ struct cpufreq_policy *policy = sg_policy->policy;
+ unsigned int max_f = policy->cpuinfo.max_freq;
+ u64 last_freq_update_time = sg_policy->last_freq_update_time;
+ unsigned int j;
+
+ if (util == ULONG_MAX)
+ return max_f;
+
+ for_each_cpu(j, policy->cpus) {
+ struct sugov_cpu *j_sg_cpu;
+ unsigned long j_util, j_max;
+ s64 delta_ns;
+
+ if (j == smp_processor_id())
+ continue;
+
+ j_sg_cpu = &per_cpu(sugov_cpu, j);
+ /*
+ * If the CPU utilization was last updated before the previous
+ * frequency update and the time elapsed between the last update
+ * of the CPU utilization and the last frequency update is long
+ * enough, don't take the CPU into account as it probably is
+ * idle now.
+ */
+ delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+ if (delta_ns > TICK_NSEC)
+ continue;
+
+ j_util = j_sg_cpu->util;
+ if (j_util == ULONG_MAX)
+ return max_f;
+
+ j_max = j_sg_cpu->max;
+ if (j_util * max > j_max * util) {
+ util = j_util;
+ max = j_max;
+ }
+ }
+
+ return get_next_freq(policy, util, max);
+}
+
+static void sugov_update_shared(struct update_util_data *hook, u64 time,
+ unsigned long util, unsigned long max)
+{
+ struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+ unsigned int next_f;
+
+ raw_spin_lock(&sg_policy->update_lock);
+
+ sg_cpu->util = util;
+ sg_cpu->max = max;
+ sg_cpu->last_update = time;
+
+ if (sugov_should_update_freq(sg_policy, time)) {
+ next_f = sugov_next_freq_shared(sg_policy, util, max);
+ sugov_update_commit(sg_policy, time, next_f);
+ }
+
+ raw_spin_unlock(&sg_policy->update_lock);
+}
+
+static void sugov_work(struct work_struct *work)
+{
+ struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
+
+ mutex_lock(&sg_policy->work_lock);
+ __cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
+ CPUFREQ_RELATION_L);
+ mutex_unlock(&sg_policy->work_lock);
+
+ sg_policy->work_in_progress = false;
+}
+
+static void sugov_irq_work(struct irq_work *irq_work)
+{
+ struct sugov_policy *sg_policy;
+
+ sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
+ schedule_work_on(smp_processor_id(), &sg_policy->work);
+}
+
+/************************** sysfs interface ************************/
+
+static struct sugov_tunables *global_tunables;
+static DEFINE_MUTEX(global_tunables_lock);
+
+static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
+{
+ return container_of(attr_set, struct sugov_tunables, attr_set);
+}
+
+static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
+{
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+
+ return sprintf(buf, "%u\n", tunables->rate_limit_us);
+}
+
+static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
+ size_t count)
+{
+ struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
+ struct sugov_policy *sg_policy;
+ unsigned int rate_limit_us;
+
+ if (kstrtouint(buf, 10, &rate_limit_us))
+ return -EINVAL;
+
+ tunables->rate_limit_us = rate_limit_us;
+
+ list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
+ sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
+
+ return count;
+}
+
+static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
+
+static struct attribute *sugov_attributes[] = {
+ &rate_limit_us.attr,
+ NULL
+};
+
+static struct kobj_type sugov_tunables_ktype = {
+ .default_attrs = sugov_attributes,
+ .sysfs_ops = &governor_sysfs_ops,
+};
+
+/********************** cpufreq governor interface *********************/
+
+static struct cpufreq_governor schedutil_gov;
+
+static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
+{
+ struct sugov_policy *sg_policy;
+
+ sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
+ if (!sg_policy)
+ return NULL;
+
+ sg_policy->policy = policy;
+ init_irq_work(&sg_policy->irq_work, sugov_irq_work);
+ INIT_WORK(&sg_policy->work, sugov_work);
+ mutex_init(&sg_policy->work_lock);
+ raw_spin_lock_init(&sg_policy->update_lock);
+ return sg_policy;
+}
+
+static void sugov_policy_free(struct sugov_policy *sg_policy)
+{
+ mutex_destroy(&sg_policy->work_lock);
+ kfree(sg_policy);
+}
+
+static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
+{
+ struct sugov_tunables *tunables;
+
+ tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
+ if (tunables) {
+ gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
+ if (!have_governor_per_policy())
+ global_tunables = tunables;
+ }
+ return tunables;
+}
+
+static void sugov_tunables_free(struct sugov_tunables *tunables)
+{
+ if (!have_governor_per_policy())
+ global_tunables = NULL;
+
+ kfree(tunables);
+}
+
+static int sugov_init(struct cpufreq_policy *policy)
+{
+ struct sugov_policy *sg_policy;
+ struct sugov_tunables *tunables;
+ unsigned int lat;
+ int ret = 0;
+
+ /* State should be equivalent to EXIT */
+ if (policy->governor_data)
+ return -EBUSY;
+
+ sg_policy = sugov_policy_alloc(policy);
+ if (!sg_policy)
+ return -ENOMEM;
+
+ mutex_lock(&global_tunables_lock);
+
+ if (global_tunables) {
+ if (WARN_ON(have_governor_per_policy())) {
+ ret = -EINVAL;
+ goto free_sg_policy;
+ }
+ policy->governor_data = sg_policy;
+ sg_policy->tunables = global_tunables;
+
+ gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
+ goto out;
+ }
+
+ tunables = sugov_tunables_alloc(sg_policy);
+ if (!tunables) {
+ ret = -ENOMEM;
+ goto free_sg_policy;
+ }
+
+ tunables->rate_limit_us = LATENCY_MULTIPLIER;
+ lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+ if (lat)
+ tunables->rate_limit_us *= lat;
+
+ policy->governor_data = sg_policy;
+ sg_policy->tunables = tunables;
+
+ ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
+ get_governor_parent_kobj(policy), "%s",
+ schedutil_gov.name);
+ if (ret)
+ goto fail;
+
+ out:
+ mutex_unlock(&global_tunables_lock);
+
+ cpufreq_enable_fast_switch(policy);
+ return 0;
+
+ fail:
+ policy->governor_data = NULL;
+ sugov_tunables_free(tunables);
+
+ free_sg_policy:
+ mutex_unlock(&global_tunables_lock);
+
+ sugov_policy_free(sg_policy);
+ pr_err("cpufreq: schedutil governor initialization failed (error %d)\n", ret);
+ return ret;
+}
+
+static int sugov_exit(struct cpufreq_policy *policy)
+{
+ struct sugov_policy *sg_policy = policy->governor_data;
+ struct sugov_tunables *tunables = sg_policy->tunables;
+ unsigned int count;
+
+ cpufreq_disable_fast_switch(policy);
+
+ mutex_lock(&global_tunables_lock);
+
+ count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
+ policy->governor_data = NULL;
+ if (!count)
+ sugov_tunables_free(tunables);
+
+ mutex_unlock(&global_tunables_lock);
+
+ sugov_policy_free(sg_policy);
+ return 0;
+}
+
+static int sugov_start(struct cpufreq_policy *policy)
+{
+ struct sugov_policy *sg_policy = policy->governor_data;
+ unsigned int cpu;
+
+ sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
+ sg_policy->last_freq_update_time = 0;
+ sg_policy->next_freq = UINT_MAX;
+ sg_policy->work_in_progress = false;
+ sg_policy->need_freq_update = false;
+
+ for_each_cpu(cpu, policy->cpus) {
+ struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+
+ sg_cpu->sg_policy = sg_policy;
+ if (policy_is_shared(policy)) {
+ sg_cpu->util = ULONG_MAX;
+ sg_cpu->max = 0;
+ sg_cpu->last_update = 0;
+ cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+ sugov_update_shared);
+ } else {
+ cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+ sugov_update_single);
+ }
+ }
+ return 0;
+}
+
+static int sugov_stop(struct cpufreq_policy *policy)
+{
+ struct sugov_policy *sg_policy = policy->governor_data;
+ unsigned int cpu;
+
+ for_each_cpu(cpu, policy->cpus)
+ cpufreq_remove_update_util_hook(cpu);
+
+ synchronize_sched();
+
+ irq_work_sync(&sg_policy->irq_work);
+ cancel_work_sync(&sg_policy->work);
+ return 0;
+}
+
+static int sugov_limits(struct cpufreq_policy *policy)
+{
+ struct sugov_policy *sg_policy = policy->governor_data;
+
+ if (!policy->fast_switch_enabled) {
+ mutex_lock(&sg_policy->work_lock);
+
+ if (policy->max < policy->cur)
+ __cpufreq_driver_target(policy, policy->max,
+ CPUFREQ_RELATION_H);
+ else if (policy->min > policy->cur)
+ __cpufreq_driver_target(policy, policy->min,
+ CPUFREQ_RELATION_L);
+
+ mutex_unlock(&sg_policy->work_lock);
+ }
+
+ sg_policy->need_freq_update = true;
+ return 0;
+}
+
+int sugov_governor(struct cpufreq_policy *policy, unsigned int event)
+{
+ if (event == CPUFREQ_GOV_POLICY_INIT) {
+ return sugov_init(policy);
+ } else if (policy->governor_data) {
+ switch (event) {
+ case CPUFREQ_GOV_POLICY_EXIT:
+ return sugov_exit(policy);
+ case CPUFREQ_GOV_START:
+ return sugov_start(policy);
+ case CPUFREQ_GOV_STOP:
+ return sugov_stop(policy);
+ case CPUFREQ_GOV_LIMITS:
+ return sugov_limits(policy);
+ }
+ }
+ return -EINVAL;
+}
+
+static struct cpufreq_governor schedutil_gov = {
+ .name = "schedutil",
+ .governor = sugov_governor,
+ .owner = THIS_MODULE,
+};
+
+static int __init sugov_module_init(void)
+{
+ return cpufreq_register_governor(&schedutil_gov);
+}
+
+static void __exit sugov_module_exit(void)
+{
+ cpufreq_unregister_governor(&schedutil_gov);
+}
+
+MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
+MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
+MODULE_LICENSE("GPL");
+
+#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
+struct cpufreq_governor *cpufreq_default_governor(void)
+{
+ return &schedutil_gov;
+}
+
+fs_initcall(sugov_module_init);
+#else
+module_init(sugov_module_init);
+#endif
+module_exit(sugov_module_exit);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 981fcd7..11e9705 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@
if (skip)
continue;
- if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+ if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids)
continue;
if (lowest_mask) {
- cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+ cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask);
/*
* We have to ensure that we have at least one bit
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 686ec8a..fcb7f02 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -134,7 +134,7 @@
{
struct task_struct *p = dl_task_of(dl_se);
- if (p->nr_cpus_allowed > 1)
+ if (tsk_nr_cpus_allowed(p) > 1)
dl_rq->dl_nr_migratory++;
update_dl_migration(dl_rq);
@@ -144,7 +144,7 @@
{
struct task_struct *p = dl_task_of(dl_se);
- if (p->nr_cpus_allowed > 1)
+ if (tsk_nr_cpus_allowed(p) > 1)
dl_rq->dl_nr_migratory--;
update_dl_migration(dl_rq);
@@ -591,10 +591,10 @@
struct sched_dl_entity,
dl_timer);
struct task_struct *p = dl_task_of(dl_se);
- unsigned long flags;
+ struct rq_flags rf;
struct rq *rq;
- rq = task_rq_lock(p, &flags);
+ rq = task_rq_lock(p, &rf);
/*
* The task might have changed its scheduling policy to something
@@ -670,14 +670,14 @@
* Nothing relies on rq->lock after this, so its safe to drop
* rq->lock.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, rf.cookie);
push_dl_task(rq);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, rf.cookie);
}
#endif
unlock:
- task_rq_unlock(rq, p, &flags);
+ task_rq_unlock(rq, p, &rf);
/*
* This can free the task_struct, including this hrtimer, do not touch
@@ -717,10 +717,6 @@
if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
- /* Kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
-
/*
* Consumed budget is computed considering the time as
* observed by schedulable tasks (excluding time spent
@@ -736,6 +732,10 @@
return;
}
+ /* kick cpufreq (see the comment in linux/cpufreq.h). */
+ if (cpu_of(rq) == smp_processor_id())
+ cpufreq_trigger_update(rq_clock(rq));
+
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
@@ -966,7 +966,7 @@
enqueue_dl_entity(&p->dl, pi_se, flags);
- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+ if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_dl_task(rq, p);
}
@@ -1040,9 +1040,9 @@
* try to make it stay here, it might be important.
*/
if (unlikely(dl_task(curr)) &&
- (curr->nr_cpus_allowed < 2 ||
+ (tsk_nr_cpus_allowed(curr) < 2 ||
!dl_entity_preempt(&p->dl, &curr->dl)) &&
- (p->nr_cpus_allowed > 1)) {
+ (tsk_nr_cpus_allowed(p) > 1)) {
int target = find_later_rq(p);
if (target != -1 &&
@@ -1063,7 +1063,7 @@
* Current can't be migrated, useless to reschedule,
* let's hope p can move out.
*/
- if (rq->curr->nr_cpus_allowed == 1 ||
+ if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
return;
@@ -1071,7 +1071,7 @@
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
*/
- if (p->nr_cpus_allowed != 1 &&
+ if (tsk_nr_cpus_allowed(p) != 1 &&
cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
return;
@@ -1125,7 +1125,8 @@
return rb_entry(left, struct sched_dl_entity, rb_node);
}
-struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev)
+struct task_struct *
+pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
struct sched_dl_entity *dl_se;
struct task_struct *p;
@@ -1140,9 +1141,9 @@
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
pull_dl_task(rq);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, cookie);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a stop task can slip in, in which case we need to
@@ -1185,7 +1186,7 @@
{
update_curr_dl(rq);
- if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
+ if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_dl_task(rq, p);
}
@@ -1286,7 +1287,7 @@
if (unlikely(!later_mask))
return -1;
- if (task->nr_cpus_allowed == 1)
+ if (tsk_nr_cpus_allowed(task) == 1)
return -1;
/*
@@ -1392,7 +1393,7 @@
if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(later_rq->cpu,
- &task->cpus_allowed) ||
+ tsk_cpus_allowed(task)) ||
task_running(rq, task) ||
!dl_task(task) ||
!task_on_rq_queued(task))) {
@@ -1432,7 +1433,7 @@
BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p));
- BUG_ON(p->nr_cpus_allowed <= 1);
+ BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
BUG_ON(!task_on_rq_queued(p));
BUG_ON(!dl_task(p));
@@ -1471,7 +1472,7 @@
*/
if (dl_task(rq->curr) &&
dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
- rq->curr->nr_cpus_allowed > 1) {
+ tsk_nr_cpus_allowed(rq->curr) > 1) {
resched_curr(rq);
return 0;
}
@@ -1618,9 +1619,9 @@
{
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
- p->nr_cpus_allowed > 1 &&
+ tsk_nr_cpus_allowed(p) > 1 &&
dl_task(rq->curr) &&
- (rq->curr->nr_cpus_allowed < 2 ||
+ (tsk_nr_cpus_allowed(rq->curr) < 2 ||
!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
push_dl_tasks(rq);
}
@@ -1724,7 +1725,7 @@
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
- if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
+ if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
queue_push_tasks(rq);
#else
if (dl_task(rq->curr))
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4fbc3bd..cf905f6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -626,15 +626,16 @@
#undef P
#undef PN
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
-#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
-
#ifdef CONFIG_SMP
+#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
P64(avg_idle);
P64(max_idle_balance_cost);
+#undef P64
#endif
+#ifdef CONFIG_SCHEDSTATS
+#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
+
if (schedstat_enabled()) {
P(yld_count);
P(sched_count);
@@ -644,7 +645,6 @@
}
#undef P
-#undef P64
#endif
spin_lock_irqsave(&sched_debug_lock, flags);
print_cfs_stats(m, cpu);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e7dd0ec..218f8e8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -204,7 +204,7 @@
* OR
* (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
*
- * Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case
+ * Either weight := NICE_0_LOAD and lw \e sched_prio_to_wmult[], in which case
* we're guaranteed shift stays positive because inv_weight is guaranteed to
* fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
*
@@ -682,17 +682,68 @@
sa->period_contrib = 1023;
sa->load_avg = scale_load_down(se->load.weight);
sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
- sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
- sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+ /*
+ * At this point, util_avg won't be used in select_task_rq_fair anyway
+ */
+ sa->util_avg = 0;
+ sa->util_sum = 0;
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
}
+/*
+ * With new tasks being created, their initial util_avgs are extrapolated
+ * based on the cfs_rq's current util_avg:
+ *
+ * util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
+ *
+ * However, in many cases, the above util_avg does not give a desired
+ * value. Moreover, the sum of the util_avgs may be divergent, such
+ * as when the series is a harmonic series.
+ *
+ * To solve this problem, we also cap the util_avg of successive tasks to
+ * only 1/2 of the left utilization budget:
+ *
+ * util_avg_cap = (1024 - cfs_rq->avg.util_avg) / 2^n
+ *
+ * where n denotes the nth task.
+ *
+ * For example, a simplest series from the beginning would be like:
+ *
+ * task util_avg: 512, 256, 128, 64, 32, 16, 8, ...
+ * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
+ *
+ * Finally, that extrapolated util_avg is clamped to the cap (util_avg_cap)
+ * if util_avg > util_avg_cap.
+ */
+void post_init_entity_util_avg(struct sched_entity *se)
+{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ struct sched_avg *sa = &se->avg;
+ long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
+
+ if (cap > 0) {
+ if (cfs_rq->avg.util_avg != 0) {
+ sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
+ sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+
+ if (sa->util_avg > cap)
+ sa->util_avg = cap;
+ } else {
+ sa->util_avg = cap;
+ }
+ sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
+ }
+}
+
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq);
static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq);
#else
void init_entity_runnable_average(struct sched_entity *se)
{
}
+void post_init_entity_util_avg(struct sched_entity *se)
+{
+}
#endif
/*
@@ -2437,10 +2488,12 @@
update_load_sub(&cfs_rq->load, se->load.weight);
if (!parent_entity(se))
update_load_sub(&rq_of(cfs_rq)->load, se->load.weight);
+#ifdef CONFIG_SMP
if (entity_is_task(se)) {
account_numa_dequeue(rq_of(cfs_rq), task_of(se));
list_del_init(&se->group_node);
}
+#endif
cfs_rq->nr_running--;
}
@@ -2550,6 +2603,16 @@
};
/*
+ * Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
+ * lower integers. See Documentation/scheduler/sched-avg.txt how these
+ * were generated:
+ */
+static const u32 __accumulated_sum_N32[] = {
+ 0, 23371, 35056, 40899, 43820, 45281,
+ 46011, 46376, 46559, 46650, 46696, 46719,
+};
+
+/*
* Approximate:
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
*/
@@ -2597,22 +2660,13 @@
else if (unlikely(n >= LOAD_AVG_MAX_N))
return LOAD_AVG_MAX;
- /* Compute \Sum k^n combining precomputed values for k^i, \Sum k^j */
- do {
- contrib /= 2; /* y^LOAD_AVG_PERIOD = 1/2 */
- contrib += runnable_avg_yN_sum[LOAD_AVG_PERIOD];
-
- n -= LOAD_AVG_PERIOD;
- } while (n > LOAD_AVG_PERIOD);
-
+ /* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
+ contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
+ n %= LOAD_AVG_PERIOD;
contrib = decay_load(contrib, n);
return contrib + runnable_avg_yN_sum[n];
}
-#if (SCHED_LOAD_SHIFT - SCHED_LOAD_RESOLUTION) != 10 || SCHED_CAPACITY_SHIFT != 10
-#error "load tracking assumes 2^10 as unit"
-#endif
-
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
/*
@@ -2821,55 +2875,11 @@
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
-/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
-static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
- struct sched_avg *sa = &cfs_rq->avg;
- int decayed, removed = 0;
-
- if (atomic_long_read(&cfs_rq->removed_load_avg)) {
- s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
- sa->load_avg = max_t(long, sa->load_avg - r, 0);
- sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
- removed = 1;
- }
-
- if (atomic_long_read(&cfs_rq->removed_util_avg)) {
- long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
- sa->util_avg = max_t(long, sa->util_avg - r, 0);
- sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
- }
-
- decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
- scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
-
-#ifndef CONFIG_64BIT
- smp_wmb();
- cfs_rq->load_last_update_time_copy = sa->last_update_time;
-#endif
-
- return decayed || removed;
-}
-
-/* Update task and its cfs_rq load average */
-static inline void update_load_avg(struct sched_entity *se, int update_tg)
-{
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 now = cfs_rq_clock_task(cfs_rq);
struct rq *rq = rq_of(cfs_rq);
int cpu = cpu_of(rq);
- /*
- * Track task load average for carrying it to new CPU after migrated, and
- * track group sched_entity load average for task_h_load calc in migration
- */
- __update_load_avg(now, cpu, &se->avg,
- se->on_rq * scale_load_down(se->load.weight),
- cfs_rq->curr == se, NULL);
-
- if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
- update_tg_load_avg(cfs_rq, 0);
-
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
unsigned long max = rq->cpu_capacity_orig;
@@ -2894,6 +2904,61 @@
}
}
+/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
+static inline int
+update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
+{
+ struct sched_avg *sa = &cfs_rq->avg;
+ int decayed, removed_load = 0, removed_util = 0;
+
+ if (atomic_long_read(&cfs_rq->removed_load_avg)) {
+ s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
+ sa->load_avg = max_t(long, sa->load_avg - r, 0);
+ sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+ removed_load = 1;
+ }
+
+ if (atomic_long_read(&cfs_rq->removed_util_avg)) {
+ long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
+ sa->util_avg = max_t(long, sa->util_avg - r, 0);
+ sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
+ removed_util = 1;
+ }
+
+ decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
+ scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
+
+#ifndef CONFIG_64BIT
+ smp_wmb();
+ cfs_rq->load_last_update_time_copy = sa->last_update_time;
+#endif
+
+ if (update_freq && (decayed || removed_util))
+ cfs_rq_util_change(cfs_rq);
+
+ return decayed || removed_load;
+}
+
+/* Update task and its cfs_rq load average */
+static inline void update_load_avg(struct sched_entity *se, int update_tg)
+{
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ u64 now = cfs_rq_clock_task(cfs_rq);
+ struct rq *rq = rq_of(cfs_rq);
+ int cpu = cpu_of(rq);
+
+ /*
+ * Track task load average for carrying it to new CPU after migrated, and
+ * track group sched_entity load average for task_h_load calc in migration
+ */
+ __update_load_avg(now, cpu, &se->avg,
+ se->on_rq * scale_load_down(se->load.weight),
+ cfs_rq->curr == se, NULL);
+
+ if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
+ update_tg_load_avg(cfs_rq, 0);
+}
+
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (!sched_feat(ATTACH_AGE_LOAD))
@@ -2919,6 +2984,8 @@
cfs_rq->avg.load_sum += se->avg.load_sum;
cfs_rq->avg.util_avg += se->avg.util_avg;
cfs_rq->avg.util_sum += se->avg.util_sum;
+
+ cfs_rq_util_change(cfs_rq);
}
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -2931,6 +2998,8 @@
cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0);
cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+
+ cfs_rq_util_change(cfs_rq);
}
/* Add the load generated by se into cfs_rq's load average */
@@ -2948,7 +3017,7 @@
cfs_rq->curr == se, NULL);
}
- decayed = update_cfs_rq_load_avg(now, cfs_rq);
+ decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);
cfs_rq->runnable_load_avg += sa->load_avg;
cfs_rq->runnable_load_sum += sa->load_sum;
@@ -3185,20 +3254,61 @@
#endif
}
+
+/*
+ * MIGRATION
+ *
+ * dequeue
+ * update_curr()
+ * update_min_vruntime()
+ * vruntime -= min_vruntime
+ *
+ * enqueue
+ * update_curr()
+ * update_min_vruntime()
+ * vruntime += min_vruntime
+ *
+ * this way the vruntime transition between RQs is done when both
+ * min_vruntime are up-to-date.
+ *
+ * WAKEUP (remote)
+ *
+ * ->migrate_task_rq_fair() (p->state == TASK_WAKING)
+ * vruntime -= min_vruntime
+ *
+ * enqueue
+ * update_curr()
+ * update_min_vruntime()
+ * vruntime += min_vruntime
+ *
+ * this way we don't have the most up-to-date min_vruntime on the originating
+ * CPU and an up-to-date min_vruntime on the destination CPU.
+ */
+
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
- /*
- * Update the normalized vruntime before updating min_vruntime
- * through calling update_curr().
- */
- if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
- se->vruntime += cfs_rq->min_vruntime;
+ bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
+ bool curr = cfs_rq->curr == se;
/*
- * Update run-time statistics of the 'current'.
+ * If we're the current task, we must renormalise before calling
+ * update_curr().
*/
+ if (renorm && curr)
+ se->vruntime += cfs_rq->min_vruntime;
+
update_curr(cfs_rq);
+
+ /*
+ * Otherwise, renormalise after, such that we're placed at the current
+ * moment in time, instead of some random moment in the past. Being
+ * placed in the past could significantly boost this task to the
+ * fairness detriment of existing tasks.
+ */
+ if (renorm && !curr)
+ se->vruntime += cfs_rq->min_vruntime;
+
enqueue_entity_load_avg(cfs_rq, se);
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
@@ -3214,7 +3324,7 @@
update_stats_enqueue(cfs_rq, se);
check_spread(cfs_rq, se);
}
- if (se != cfs_rq->curr)
+ if (!curr)
__enqueue_entity(cfs_rq, se);
se->on_rq = 1;
@@ -4422,7 +4532,7 @@
}
#ifdef CONFIG_SMP
-
+#ifdef CONFIG_NO_HZ_COMMON
/*
* per rq 'load' arrray crap; XXX kill this.
*/
@@ -4488,13 +4598,13 @@
}
return load;
}
+#endif /* CONFIG_NO_HZ_COMMON */
/**
- * __update_cpu_load - update the rq->cpu_load[] statistics
+ * __cpu_load_update - update the rq->cpu_load[] statistics
* @this_rq: The rq to update statistics for
* @this_load: The current load
* @pending_updates: The number of missed updates
- * @active: !0 for NOHZ_FULL
*
* Update rq->cpu_load[] statistics. This function is usually called every
* scheduler tick (TICK_NSEC).
@@ -4523,12 +4633,12 @@
* load[i]_n = (1 - 1/2^i)^n * load[i]_0
*
* see decay_load_misses(). For NOHZ_FULL we get to subtract and add the extra
- * term. See the @active paramter.
+ * term.
*/
-static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
- unsigned long pending_updates, int active)
+static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
+ unsigned long pending_updates)
{
- unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
+ unsigned long __maybe_unused tickless_load = this_rq->cpu_load[0];
int i, scale;
this_rq->nr_load_updates++;
@@ -4541,6 +4651,7 @@
/* scale is effectively 1 << i now, and >> i divides by scale */
old_load = this_rq->cpu_load[i];
+#ifdef CONFIG_NO_HZ_COMMON
old_load = decay_load_missed(old_load, pending_updates - 1, i);
if (tickless_load) {
old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
@@ -4551,6 +4662,7 @@
*/
old_load += tickless_load;
}
+#endif
new_load = this_load;
/*
* Round up the averaging division if load is increasing. This
@@ -4573,10 +4685,23 @@
}
#ifdef CONFIG_NO_HZ_COMMON
-static void __update_cpu_load_nohz(struct rq *this_rq,
- unsigned long curr_jiffies,
- unsigned long load,
- int active)
+/*
+ * There is no sane way to deal with nohz on smp when using jiffies because the
+ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
+ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
+ *
+ * Therefore we need to avoid the delta approach from the regular tick when
+ * possible since that would seriously skew the load calculation. This is why we
+ * use cpu_load_update_periodic() for CPUs out of nohz. However we'll rely on
+ * jiffies deltas for updates happening while in nohz mode (idle ticks, idle
+ * loop exit, nohz_idle_balance, nohz full exit...)
+ *
+ * This means we might still be one tick off for nohz periods.
+ */
+
+static void cpu_load_update_nohz(struct rq *this_rq,
+ unsigned long curr_jiffies,
+ unsigned long load)
{
unsigned long pending_updates;
@@ -4588,28 +4713,15 @@
* In the NOHZ_FULL case, we were non-idle, we should consider
* its weighted load.
*/
- __update_cpu_load(this_rq, load, pending_updates, active);
+ cpu_load_update(this_rq, load, pending_updates);
}
}
/*
- * There is no sane way to deal with nohz on smp when using jiffies because the
- * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
- * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
- *
- * Therefore we cannot use the delta approach from the regular tick since that
- * would seriously skew the load calculation. However we'll make do for those
- * updates happening while idle (nohz_idle_balance) or coming out of idle
- * (tick_nohz_idle_exit).
- *
- * This means we might still be one tick off for nohz periods.
- */
-
-/*
* Called from nohz_idle_balance() to update the load ratings before doing the
* idle balance.
*/
-static void update_cpu_load_idle(struct rq *this_rq)
+static void cpu_load_update_idle(struct rq *this_rq)
{
/*
* bail if there's load or we're actually up-to-date.
@@ -4617,38 +4729,71 @@
if (weighted_cpuload(cpu_of(this_rq)))
return;
- __update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
+ cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0);
}
/*
- * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ * Record CPU load on nohz entry so we know the tickless load to account
+ * on nohz exit. cpu_load[0] happens then to be updated more frequently
+ * than other cpu_load[idx] but it should be fine as cpu_load readers
+ * shouldn't rely into synchronized cpu_load[*] updates.
*/
-void update_cpu_load_nohz(int active)
+void cpu_load_update_nohz_start(void)
{
struct rq *this_rq = this_rq();
+
+ /*
+ * This is all lockless but should be fine. If weighted_cpuload changes
+ * concurrently we'll exit nohz. And cpu_load write can race with
+ * cpu_load_update_idle() but both updater would be writing the same.
+ */
+ this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
+}
+
+/*
+ * Account the tickless load in the end of a nohz frame.
+ */
+void cpu_load_update_nohz_stop(void)
+{
unsigned long curr_jiffies = READ_ONCE(jiffies);
- unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
+ struct rq *this_rq = this_rq();
+ unsigned long load;
if (curr_jiffies == this_rq->last_load_update_tick)
return;
+ load = weighted_cpuload(cpu_of(this_rq));
raw_spin_lock(&this_rq->lock);
- __update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
+ update_rq_clock(this_rq);
+ cpu_load_update_nohz(this_rq, curr_jiffies, load);
raw_spin_unlock(&this_rq->lock);
}
-#endif /* CONFIG_NO_HZ */
+#else /* !CONFIG_NO_HZ_COMMON */
+static inline void cpu_load_update_nohz(struct rq *this_rq,
+ unsigned long curr_jiffies,
+ unsigned long load) { }
+#endif /* CONFIG_NO_HZ_COMMON */
+
+static void cpu_load_update_periodic(struct rq *this_rq, unsigned long load)
+{
+#ifdef CONFIG_NO_HZ_COMMON
+ /* See the mess around cpu_load_update_nohz(). */
+ this_rq->last_load_update_tick = READ_ONCE(jiffies);
+#endif
+ cpu_load_update(this_rq, load, 1);
+}
/*
* Called from scheduler_tick()
*/
-void update_cpu_load_active(struct rq *this_rq)
+void cpu_load_update_active(struct rq *this_rq)
{
unsigned long load = weighted_cpuload(cpu_of(this_rq));
- /*
- * See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
- */
- this_rq->last_load_update_tick = jiffies;
- __update_cpu_load(this_rq, load, 1, 1);
+
+ if (tick_nohz_tick_stopped())
+ cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), load);
+ else
+ cpu_load_update_periodic(this_rq, load);
}
/*
@@ -4706,46 +4851,6 @@
return 0;
}
-static void record_wakee(struct task_struct *p)
-{
- /*
- * Rough decay (wiping) for cost saving, don't worry
- * about the boundary, really active task won't care
- * about the loss.
- */
- if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
- current->wakee_flips >>= 1;
- current->wakee_flip_decay_ts = jiffies;
- }
-
- if (current->last_wakee != p) {
- current->last_wakee = p;
- current->wakee_flips++;
- }
-}
-
-static void task_waking_fair(struct task_struct *p)
-{
- struct sched_entity *se = &p->se;
- struct cfs_rq *cfs_rq = cfs_rq_of(se);
- u64 min_vruntime;
-
-#ifndef CONFIG_64BIT
- u64 min_vruntime_copy;
-
- do {
- min_vruntime_copy = cfs_rq->min_vruntime_copy;
- smp_rmb();
- min_vruntime = cfs_rq->min_vruntime;
- } while (min_vruntime != min_vruntime_copy);
-#else
- min_vruntime = cfs_rq->min_vruntime;
-#endif
-
- se->vruntime -= min_vruntime;
- record_wakee(p);
-}
-
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* effective_load() calculates the load change as seen from the root_task_group
@@ -4861,17 +4966,39 @@
#endif
+static void record_wakee(struct task_struct *p)
+{
+ /*
+ * Only decay a single time; tasks that have less then 1 wakeup per
+ * jiffy will not have built up many flips.
+ */
+ if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
+ current->wakee_flips >>= 1;
+ current->wakee_flip_decay_ts = jiffies;
+ }
+
+ if (current->last_wakee != p) {
+ current->last_wakee = p;
+ current->wakee_flips++;
+ }
+}
+
/*
* Detect M:N waker/wakee relationships via a switching-frequency heuristic.
+ *
* A waker of many should wake a different task than the one last awakened
- * at a frequency roughly N times higher than one of its wakees. In order
- * to determine whether we should let the load spread vs consolodating to
- * shared cache, we look for a minimum 'flip' frequency of llc_size in one
- * partner, and a factor of lls_size higher frequency in the other. With
- * both conditions met, we can be relatively sure that the relationship is
- * non-monogamous, with partner count exceeding socket size. Waker/wakee
- * being client/server, worker/dispatcher, interrupt source or whatever is
- * irrelevant, spread criteria is apparent partner count exceeds socket size.
+ * at a frequency roughly N times higher than one of its wakees.
+ *
+ * In order to determine whether we should let the load spread vs consolidating
+ * to shared cache, we look for a minimum 'flip' frequency of llc_size in one
+ * partner, and a factor of lls_size higher frequency in the other.
+ *
+ * With both conditions met, we can be relatively sure that the relationship is
+ * non-monogamous, with partner count exceeding socket size.
+ *
+ * Waker/wakee being client/server, worker/dispatcher, interrupt source or
+ * whatever is irrelevant, spread criteria is apparent partner count exceeds
+ * socket size.
*/
static int wake_wide(struct task_struct *p)
{
@@ -5176,8 +5303,10 @@
int want_affine = 0;
int sync = wake_flags & WF_SYNC;
- if (sd_flag & SD_BALANCE_WAKE)
+ if (sd_flag & SD_BALANCE_WAKE) {
+ record_wakee(p);
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+ }
rcu_read_lock();
for_each_domain(cpu, tmp) {
@@ -5257,6 +5386,32 @@
static void migrate_task_rq_fair(struct task_struct *p)
{
/*
+ * As blocked tasks retain absolute vruntime the migration needs to
+ * deal with this by subtracting the old and adding the new
+ * min_vruntime -- the latter is done by enqueue_entity() when placing
+ * the task on the new runqueue.
+ */
+ if (p->state == TASK_WAKING) {
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ u64 min_vruntime;
+
+#ifndef CONFIG_64BIT
+ u64 min_vruntime_copy;
+
+ do {
+ min_vruntime_copy = cfs_rq->min_vruntime_copy;
+ smp_rmb();
+ min_vruntime = cfs_rq->min_vruntime;
+ } while (min_vruntime != min_vruntime_copy);
+#else
+ min_vruntime = cfs_rq->min_vruntime;
+#endif
+
+ se->vruntime -= min_vruntime;
+ }
+
+ /*
* We are supposed to update the task to "current" time, then its up to date
* and ready to go to new CPU/cfs_rq. But we have difficulty in getting
* what current time is, so simply throw away the out-of-date time. This
@@ -5439,7 +5594,7 @@
}
static struct task_struct *
-pick_next_task_fair(struct rq *rq, struct task_struct *prev)
+pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
@@ -5552,9 +5707,9 @@
* further scheduler activity on it and we're being very careful to
* re-start the picking loop.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
new_tasks = idle_balance(rq);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, cookie);
/*
* Because idle_balance() releases (and re-acquires) rq->lock, it is
* possible for any higher priority task to appear. In that case we
@@ -5653,7 +5808,7 @@
* W_i,0 = \Sum_j w_i,j (2)
*
* Where w_i,j is the weight of the j-th runnable task on cpu i. This weight
- * is derived from the nice value as per prio_to_weight[].
+ * is derived from the nice value as per sched_prio_to_weight[].
*
* The weight average is an exponential decay average of the instantaneous
* weight:
@@ -6155,7 +6310,7 @@
if (throttled_hierarchy(cfs_rq))
continue;
- if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+ if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
update_tg_load_avg(cfs_rq, 0);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -6216,7 +6371,7 @@
raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
- update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+ update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@@ -6625,6 +6780,9 @@
if (!(env->sd->flags & SD_ASYM_PACKING))
return true;
+ /* No ASYM_PACKING if target cpu is already busy */
+ if (env->idle == CPU_NOT_IDLE)
+ return true;
/*
* ASYM_PACKING needs to move all the work to the lowest
* numbered CPUs in the group, therefore mark all groups
@@ -6634,7 +6792,8 @@
if (!sds->busiest)
return true;
- if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
+ /* Prefer to move from highest possible cpu's work */
+ if (group_first_cpu(sds->busiest) < group_first_cpu(sg))
return true;
}
@@ -6780,6 +6939,9 @@
if (!(env->sd->flags & SD_ASYM_PACKING))
return 0;
+ if (env->idle == CPU_NOT_IDLE)
+ return 0;
+
if (!sds->busiest)
return 0;
@@ -6888,9 +7050,10 @@
}
/*
- * In the presence of smp nice balancing, certain scenarios can have
- * max load less than avg load(as we skip the groups at or below
- * its cpu_capacity, while calculating max_load..)
+ * Avg load of busiest sg can be less and avg load of local sg can
+ * be greater than avg load across all sgs of sd because avg load
+ * factors in sg capacity and sgs with smaller group_type are
+ * skipped when updating the busiest sg:
*/
if (busiest->avg_load <= sds->avg_load ||
local->avg_load >= sds->avg_load) {
@@ -6903,11 +7066,12 @@
*/
if (busiest->group_type == group_overloaded &&
local->group_type == group_overloaded) {
- load_above_capacity = busiest->sum_nr_running *
- SCHED_LOAD_SCALE;
- if (load_above_capacity > busiest->group_capacity)
+ load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
+ if (load_above_capacity > busiest->group_capacity) {
load_above_capacity -= busiest->group_capacity;
- else
+ load_above_capacity *= NICE_0_LOAD;
+ load_above_capacity /= busiest->group_capacity;
+ } else
load_above_capacity = ~0UL;
}
@@ -6915,9 +7079,8 @@
* We're trying to get all the cpus to the average_load, so we don't
* want to push ourselves above the average load, nor do we wish to
* reduce the max loaded cpu below the average load. At the same time,
- * we also don't want to reduce the group load below the group capacity
- * (so that we can implement power-savings policies etc). Thus we look
- * for the minimum possible imbalance.
+ * we also don't want to reduce the group load below the group
+ * capacity. Thus we look for the minimum possible imbalance.
*/
max_pull = min(busiest->avg_load - sds->avg_load, load_above_capacity);
@@ -6941,10 +7104,7 @@
/**
* find_busiest_group - Returns the busiest group within the sched_domain
- * if there is an imbalance. If there isn't an imbalance, and
- * the user has opted for power-savings, it returns a group whose
- * CPUs can be put to idle by rebalancing those tasks elsewhere, if
- * such a group exists.
+ * if there is an imbalance.
*
* Also calculates the amount of weighted load which should be moved
* to restore balance.
@@ -6952,9 +7112,6 @@
* @env: The load balancing environment.
*
* Return: - The busiest group if imbalance exists.
- * - If no imbalance and user has opted for power-savings balance,
- * return the least loaded group whose CPUs can be
- * put to idle by rebalancing its tasks onto our group.
*/
static struct sched_group *find_busiest_group(struct lb_env *env)
{
@@ -6972,8 +7129,7 @@
busiest = &sds.busiest_stat;
/* ASYM feature bypasses nice load balance check */
- if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
- check_asym_packing(env, &sds))
+ if (check_asym_packing(env, &sds))
return sds.busiest;
/* There is no busy sibling group to pull tasks from */
@@ -7398,10 +7554,7 @@
&busiest->active_balance_work);
}
- /*
- * We've kicked active balancing, reset the failure
- * counter.
- */
+ /* We've kicked active balancing, force task migration. */
sd->nr_balance_failed = sd->cache_nice_tries+1;
}
} else
@@ -7636,10 +7789,13 @@
schedstat_inc(sd, alb_count);
p = detach_one_task(&env);
- if (p)
+ if (p) {
schedstat_inc(sd, alb_pushed);
- else
+ /* Active balancing done, reset the failure counter. */
+ sd->nr_balance_failed = 0;
+ } else {
schedstat_inc(sd, alb_failed);
+ }
}
rcu_read_unlock();
out_unlock:
@@ -7710,7 +7866,7 @@
return;
}
-static inline void nohz_balance_exit_idle(int cpu)
+void nohz_balance_exit_idle(unsigned int cpu)
{
if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
/*
@@ -7783,18 +7939,6 @@
atomic_inc(&nohz.nr_cpus);
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
}
-
-static int sched_ilb_notifier(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DYING:
- nohz_balance_exit_idle(smp_processor_id());
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
-}
#endif
static DEFINE_SPINLOCK(balancing);
@@ -7956,7 +8100,7 @@
if (time_after_eq(jiffies, rq->next_balance)) {
raw_spin_lock_irq(&rq->lock);
update_rq_clock(rq);
- update_cpu_load_idle(rq);
+ cpu_load_update_idle(rq);
raw_spin_unlock_irq(&rq->lock);
rebalance_domains(rq, CPU_IDLE);
}
@@ -8381,6 +8525,7 @@
init_cfs_rq(cfs_rq);
init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
init_entity_runnable_average(se);
+ post_init_entity_util_avg(se);
}
return 1;
@@ -8537,7 +8682,6 @@
.rq_online = rq_online_fair,
.rq_offline = rq_offline_fair,
- .task_waking = task_waking_fair,
.task_dead = task_dead_fair,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
@@ -8599,7 +8743,6 @@
#ifdef CONFIG_NO_HZ_COMMON
nohz.next_balance = jiffies;
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
- cpu_notifier(sched_ilb_notifier, 0);
#endif
#endif /* SMP */
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 47ce949..2ce5458 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -24,7 +24,7 @@
}
static struct task_struct *
-pick_next_task_idle(struct rq *rq, struct task_struct *prev)
+pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
put_prev_task(rq, prev);
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index ef71590..b0b93fd 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -99,10 +99,13 @@
static unsigned long
calc_load(unsigned long load, unsigned long exp, unsigned long active)
{
- load *= exp;
- load += active * (FIXED_1 - exp);
- load += 1UL << (FSHIFT - 1);
- return load >> FSHIFT;
+ unsigned long newload;
+
+ newload = load * exp + active * (FIXED_1 - exp);
+ if (active >= load)
+ newload += FIXED_1-1;
+
+ return newload / FIXED_1;
}
#ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ec4f538d..d5690b7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -334,7 +334,7 @@
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
rt_rq->rt_nr_total++;
- if (p->nr_cpus_allowed > 1)
+ if (tsk_nr_cpus_allowed(p) > 1)
rt_rq->rt_nr_migratory++;
update_rt_migration(rt_rq);
@@ -351,7 +351,7 @@
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
rt_rq->rt_nr_total--;
- if (p->nr_cpus_allowed > 1)
+ if (tsk_nr_cpus_allowed(p) > 1)
rt_rq->rt_nr_migratory--;
update_rt_migration(rt_rq);
@@ -953,14 +953,14 @@
if (curr->sched_class != &rt_sched_class)
return;
- /* Kick cpufreq (see the comment in linux/cpufreq.h). */
- if (cpu_of(rq) == smp_processor_id())
- cpufreq_trigger_update(rq_clock(rq));
-
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;
+ /* Kick cpufreq (see the comment in linux/cpufreq.h). */
+ if (cpu_of(rq) == smp_processor_id())
+ cpufreq_trigger_update(rq_clock(rq));
+
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
@@ -1324,7 +1324,7 @@
enqueue_rt_entity(rt_se, flags);
- if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
+ if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_task(rq, p);
}
@@ -1413,7 +1413,7 @@
* will have to sort it out.
*/
if (curr && unlikely(rt_task(curr)) &&
- (curr->nr_cpus_allowed < 2 ||
+ (tsk_nr_cpus_allowed(curr) < 2 ||
curr->prio <= p->prio)) {
int target = find_lowest_rq(p);
@@ -1437,7 +1437,7 @@
* Current can't be migrated, useless to reschedule,
* let's hope p can move out.
*/
- if (rq->curr->nr_cpus_allowed == 1 ||
+ if (tsk_nr_cpus_allowed(rq->curr) == 1 ||
!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
return;
@@ -1445,7 +1445,7 @@
* p is migratable, so let's not schedule it and
* see if it is pushed or pulled somewhere else.
*/
- if (p->nr_cpus_allowed != 1
+ if (tsk_nr_cpus_allowed(p) != 1
&& cpupri_find(&rq->rd->cpupri, p, NULL))
return;
@@ -1524,7 +1524,7 @@
}
static struct task_struct *
-pick_next_task_rt(struct rq *rq, struct task_struct *prev)
+pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
struct task_struct *p;
struct rt_rq *rt_rq = &rq->rt;
@@ -1536,9 +1536,9 @@
* disabled avoiding further scheduler activity on it and we're
* being very careful to re-start the picking loop.
*/
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, cookie);
pull_rt_task(rq);
- lockdep_pin_lock(&rq->lock);
+ lockdep_repin_lock(&rq->lock, cookie);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* means a dl or stop task can slip in, in which case we need
@@ -1579,7 +1579,7 @@
* The previous task needs to be made eligible for pushing
* if it is still active
*/
- if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
+ if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1)
enqueue_pushable_task(rq, p);
}
@@ -1629,7 +1629,7 @@
if (unlikely(!lowest_mask))
return -1;
- if (task->nr_cpus_allowed == 1)
+ if (tsk_nr_cpus_allowed(task) == 1)
return -1; /* No other targets possible */
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1762,7 +1762,7 @@
BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p));
- BUG_ON(p->nr_cpus_allowed <= 1);
+ BUG_ON(tsk_nr_cpus_allowed(p) <= 1);
BUG_ON(!task_on_rq_queued(p));
BUG_ON(!rt_task(p));
@@ -2122,9 +2122,9 @@
{
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
- p->nr_cpus_allowed > 1 &&
+ tsk_nr_cpus_allowed(p) > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) &&
- (rq->curr->nr_cpus_allowed < 2 ||
+ (tsk_nr_cpus_allowed(rq->curr) < 2 ||
rq->curr->prio <= p->prio))
push_rt_tasks(rq);
}
@@ -2197,7 +2197,7 @@
*/
if (task_on_rq_queued(p) && rq->curr != p) {
#ifdef CONFIG_SMP
- if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
+ if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded)
queue_push_tasks(rq);
#else
if (p->prio < rq->curr->prio)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec2e8d2..72f1f30 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -31,9 +31,9 @@
extern long calc_load_fold_active(struct rq *this_rq);
#ifdef CONFIG_SMP
-extern void update_cpu_load_active(struct rq *this_rq);
+extern void cpu_load_update_active(struct rq *this_rq);
#else
-static inline void update_cpu_load_active(struct rq *this_rq) { }
+static inline void cpu_load_update_active(struct rq *this_rq) { }
#endif
/*
@@ -49,25 +49,32 @@
* and does not change the user-interface for setting shares/weights.
*
* We increase resolution only if we have enough bits to allow this increased
- * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
- * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
- * increased costs.
+ * resolution (i.e. 64bit). The costs for increasing resolution when 32bit are
+ * pretty high and the returns do not justify the increased costs.
+ *
+ * Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to
+ * increase coverage and consistency always enable it on 64bit platforms.
*/
-#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */
-# define SCHED_LOAD_RESOLUTION 10
-# define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION)
-# define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION)
+#ifdef CONFIG_64BIT
+# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
+# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
+# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
#else
-# define SCHED_LOAD_RESOLUTION 0
+# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) (w)
# define scale_load_down(w) (w)
#endif
-#define SCHED_LOAD_SHIFT (10 + SCHED_LOAD_RESOLUTION)
-#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
-
-#define NICE_0_LOAD SCHED_LOAD_SCALE
-#define NICE_0_SHIFT SCHED_LOAD_SHIFT
+/*
+ * Task weight (visible to users) and its load (invisible to users) have
+ * independent resolution, but they should be well calibrated. We use
+ * scale_load() and scale_load_down(w) to convert between them. The
+ * following must be true:
+ *
+ * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
+ *
+ */
+#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
/*
* Single value that decides SCHED_DEADLINE internal math precision.
@@ -585,11 +592,13 @@
#endif
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
- unsigned long last_load_update_tick;
#ifdef CONFIG_NO_HZ_COMMON
+#ifdef CONFIG_SMP
+ unsigned long last_load_update_tick;
+#endif /* CONFIG_SMP */
u64 nohz_stamp;
unsigned long nohz_flags;
-#endif
+#endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
unsigned long last_sched_tick;
#endif
@@ -854,7 +863,7 @@
struct sched_group_capacity {
atomic_t ref;
/*
- * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
+ * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
* for a single CPU.
*/
unsigned int capacity;
@@ -1159,7 +1168,7 @@
*
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
- * ENQUEUE_WAKING - sched_class::task_waking was called
+ * ENQUEUE_MIGRATED - the task was migrated during wakeup
*
*/
@@ -1174,9 +1183,9 @@
#define ENQUEUE_HEAD 0x08
#define ENQUEUE_REPLENISH 0x10
#ifdef CONFIG_SMP
-#define ENQUEUE_WAKING 0x20
+#define ENQUEUE_MIGRATED 0x20
#else
-#define ENQUEUE_WAKING 0x00
+#define ENQUEUE_MIGRATED 0x00
#endif
#define RETRY_TASK ((void *)-1UL)
@@ -1200,14 +1209,14 @@
* tasks.
*/
struct task_struct * (*pick_next_task) (struct rq *rq,
- struct task_struct *prev);
+ struct task_struct *prev,
+ struct pin_cookie cookie);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p);
- void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p,
@@ -1313,6 +1322,7 @@
unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se);
+extern void post_init_entity_util_avg(struct sched_entity *se);
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq);
@@ -1448,86 +1458,32 @@
static inline void sched_avg_update(struct rq *rq) { }
#endif
-/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
- __acquires(rq->lock)
-{
- struct rq *rq;
+struct rq_flags {
+ unsigned long flags;
+ struct pin_cookie cookie;
+};
- lockdep_assert_held(&p->pi_lock);
-
- for (;;) {
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
- lockdep_pin_lock(&rq->lock);
- return rq;
- }
- raw_spin_unlock(&rq->lock);
-
- while (unlikely(task_on_rq_migrating(p)))
- cpu_relax();
- }
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
+ __acquires(rq->lock);
+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
__acquires(p->pi_lock)
- __acquires(rq->lock)
-{
- struct rq *rq;
+ __acquires(rq->lock);
- for (;;) {
- raw_spin_lock_irqsave(&p->pi_lock, *flags);
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- /*
- * move_queued_task() task_rq_lock()
- *
- * ACQUIRE (rq->lock)
- * [S] ->on_rq = MIGRATING [L] rq = task_rq()
- * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
- * [S] ->cpu = new_cpu [L] task_rq()
- * [L] ->on_rq
- * RELEASE (rq->lock)
- *
- * If we observe the old cpu in task_rq_lock, the acquire of
- * the old rq->lock will fully serialize against the stores.
- *
- * If we observe the new cpu in task_rq_lock, the acquire will
- * pair with the WMB to ensure we must then also see migrating.
- */
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
- lockdep_pin_lock(&rq->lock);
- return rq;
- }
- raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
- while (unlikely(task_on_rq_migrating(p)))
- cpu_relax();
- }
-}
-
-static inline void __task_rq_unlock(struct rq *rq)
+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, rf->cookie);
raw_spin_unlock(&rq->lock);
}
static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(rq->lock)
__releases(p->pi_lock)
{
- lockdep_unpin_lock(&rq->lock);
+ lockdep_unpin_lock(&rq->lock, rf->cookie);
raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+ raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
}
#ifdef CONFIG_SMP
@@ -1743,6 +1699,10 @@
};
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
+
+extern void nohz_balance_exit_idle(unsigned int cpu);
+#else
+static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -1842,6 +1802,14 @@
static inline void cpufreq_trigger_update(u64 time) {}
#endif /* CONFIG_CPU_FREQ */
+#ifdef arch_scale_freq_capacity
+#ifndef arch_scale_freq_invariant
+#define arch_scale_freq_invariant() (true)
+#endif
+#else /* arch_scale_freq_capacity */
+#define arch_scale_freq_invariant() (false)
+#endif
+
static inline void account_reset_rq(struct rq *rq)
{
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index cbc67da..604297a 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -24,7 +24,7 @@
}
static struct task_struct *
-pick_next_task_stop(struct rq *rq, struct task_struct *prev)
+pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
{
struct task_struct *stop = rq->stop;
diff --git a/kernel/signal.c b/kernel/signal.c
index aa9bf00..ab122a2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3099,12 +3099,14 @@
oss.ss_sp = (void __user *) current->sas_ss_sp;
oss.ss_size = current->sas_ss_size;
- oss.ss_flags = sas_ss_flags(sp);
+ oss.ss_flags = sas_ss_flags(sp) |
+ (current->sas_ss_flags & SS_FLAG_BITS);
if (uss) {
void __user *ss_sp;
size_t ss_size;
- int ss_flags;
+ unsigned ss_flags;
+ int ss_mode;
error = -EFAULT;
if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
@@ -3119,18 +3121,13 @@
if (on_sig_stack(sp))
goto out;
+ ss_mode = ss_flags & ~SS_FLAG_BITS;
error = -EINVAL;
- /*
- * Note - this code used to test ss_flags incorrectly:
- * old code may have been written using ss_flags==0
- * to mean ss_flags==SS_ONSTACK (as this was the only
- * way that worked) - this fix preserves that older
- * mechanism.
- */
- if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
+ if (ss_mode != SS_DISABLE && ss_mode != SS_ONSTACK &&
+ ss_mode != 0)
goto out;
- if (ss_flags == SS_DISABLE) {
+ if (ss_mode == SS_DISABLE) {
ss_size = 0;
ss_sp = NULL;
} else {
@@ -3141,6 +3138,7 @@
current->sas_ss_sp = (unsigned long) ss_sp;
current->sas_ss_size = ss_size;
+ current->sas_ss_flags = ss_flags;
}
error = 0;
@@ -3171,9 +3169,14 @@
int __save_altstack(stack_t __user *uss, unsigned long sp)
{
struct task_struct *t = current;
- return __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
- __put_user(sas_ss_flags(sp), &uss->ss_flags) |
+ int err = __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
+ __put_user(t->sas_ss_flags, &uss->ss_flags) |
__put_user(t->sas_ss_size, &uss->ss_size);
+ if (err)
+ return err;
+ if (t->sas_ss_flags & SS_AUTODISARM)
+ sas_ss_reset(t);
+ return 0;
}
#ifdef CONFIG_COMPAT
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 725587f..c8b3186 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -130,6 +130,9 @@
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
+#ifdef CONFIG_PERF_EVENTS
+static int six_hundred_forty_kb = 640 * 1024;
+#endif
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -1144,6 +1147,15 @@
.extra1 = &zero,
.extra2 = &one_hundred,
},
+ {
+ .procname = "perf_event_max_stack",
+ .data = NULL, /* filled in by handler */
+ .maxlen = sizeof(sysctl_perf_event_max_stack),
+ .mode = 0644,
+ .proc_handler = perf_event_max_stack_handler,
+ .extra1 = &zero,
+ .extra2 = &six_hundred_forty_kb,
+ },
#endif
#ifdef CONFIG_KMEMCHECK
{
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 58e3310..536ada8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -262,7 +262,7 @@
{
int prev;
- prev = atomic_fetch_or(dep, BIT(bit));
+ prev = atomic_fetch_or(BIT(bit), dep);
if (!prev)
tick_nohz_full_kick_all();
}
@@ -292,7 +292,7 @@
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
- prev = atomic_fetch_or(&ts->tick_dep_mask, BIT(bit));
+ prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
if (!prev) {
preempt_disable();
/* Perf needs local kick that is NMI safe */
@@ -776,6 +776,7 @@
if (!ts->tick_stopped) {
nohz_balance_enter_idle(cpu);
calc_load_enter_idle();
+ cpu_load_update_nohz_start();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
@@ -802,11 +803,11 @@
return tick;
}
-static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- update_cpu_load_nohz(active);
+ cpu_load_update_nohz_stop();
calc_load_exit_idle();
touch_softlockup_watchdog_sched();
@@ -833,7 +834,7 @@
if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
- tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
+ tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
}
@@ -1024,7 +1025,7 @@
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
- tick_nohz_restart_sched_tick(ts, now, 0);
+ tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
diff --git a/kernel/torture.c b/kernel/torture.c
index 44aa462..fa0bdee 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -451,6 +451,7 @@
torture_shutdown_hook();
else
VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping.");
+ ftrace_dump(DUMP_ALL);
kernel_power_off(); /* Shut down the system. */
return 0;
}
@@ -602,8 +603,9 @@
{
mutex_lock(&fullstop_mutex);
if (torture_type != NULL) {
- pr_alert("torture_init_begin: refusing %s init: %s running",
+ pr_alert("torture_init_begin: Refusing %s init: %s running.\n",
ttype, torture_type);
+ pr_alert("torture_init_begin: One torture test at a time!\n");
mutex_unlock(&fullstop_mutex);
return false;
}
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 81b8745..0c7dee2 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -15,5 +15,6 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
+EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_frequency);
EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 00df25f..e11108f 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -47,6 +47,9 @@
if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!is_sampling_event(p_event))
+ return 0;
+
/*
* We don't allow user space callchains for function trace
* event, due to issues with page faults while tracing page
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1e9a607..f4b797a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1289,6 +1289,39 @@
tristate
default n
+config RCU_PERF_TEST
+ tristate "performance tests for RCU"
+ depends on DEBUG_KERNEL
+ select TORTURE_TEST
+ select SRCU
+ select TASKS_RCU
+ default n
+ help
+ This option provides a kernel module that runs performance
+ tests on the RCU infrastructure. The kernel module may be built
+ after the fact on the running kernel to be tested, if desired.
+
+ Say Y here if you want RCU performance tests to be built into
+ the kernel.
+ Say M if you want the RCU performance tests to build as a module.
+ Say N if you are unsure.
+
+config RCU_PERF_TEST_RUNNABLE
+ bool "performance tests for RCU runnable by default"
+ depends on RCU_PERF_TEST = y
+ default n
+ help
+ This option provides a way to build the RCU performance tests
+ directly into the kernel without them starting up at boot time.
+ You can use /sys/module to manually override this setting.
+ This /proc file is available only when the RCU performance
+ tests have been built into the kernel.
+
+ Say Y here if you want the RCU performance tests to start during
+ boot (you probably don't).
+ Say N here if you want the RCU performance tests to start only
+ after being manually enabled via /sys/module.
+
config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd4..a65e9a8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,7 +23,7 @@
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
idr.o int_sqrt.o extable.o \
sha1.o md5.o irq_regs.o argv_split.o \
- proportions.o flex_proportions.o ratelimit.o show_mem.o \
+ flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o nmi_backtrace.o
diff --git a/lib/proportions.c b/lib/proportions.c
deleted file mode 100644
index efa54f25..0000000
--- a/lib/proportions.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Floating proportions
- *
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
- *
- * Description:
- *
- * The floating proportion is a time derivative with an exponentially decaying
- * history:
- *
- * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
- *
- * Where j is an element from {prop_local}, x_{j} is j's number of events,
- * and i the time period over which the differential is taken. So d/dt_{-i} is
- * the differential over the i-th last period.
- *
- * The decaying history gives smooth transitions. The time differential carries
- * the notion of speed.
- *
- * The denominator is 2^(1+i) because we want the series to be normalised, ie.
- *
- * \Sum_{i=0} 1/2^(1+i) = 1
- *
- * Further more, if we measure time (t) in the same events as x; so that:
- *
- * t = \Sum_{j} x_{j}
- *
- * we get that:
- *
- * \Sum_{j} p_{j} = 1
- *
- * Writing this in an iterative fashion we get (dropping the 'd's):
- *
- * if (++x_{j}, ++t > period)
- * t /= 2;
- * for_each (j)
- * x_{j} /= 2;
- *
- * so that:
- *
- * p_{j} = x_{j} / t;
- *
- * We optimize away the '/= 2' for the global time delta by noting that:
- *
- * if (++t > period) t /= 2:
- *
- * Can be approximated by:
- *
- * period/2 + (++t % period/2)
- *
- * [ Furthermore, when we choose period to be 2^n it can be written in terms of
- * binary operations and wraparound artefacts disappear. ]
- *
- * Also note that this yields a natural counter of the elapsed periods:
- *
- * c = t / (period/2)
- *
- * [ Its monotonic increasing property can be applied to mitigate the wrap-
- * around issue. ]
- *
- * This allows us to do away with the loop over all prop_locals on each period
- * expiration. By remembering the period count under which it was last accessed
- * as c_{j}, we can obtain the number of 'missed' cycles from:
- *
- * c - c_{j}
- *
- * We can then lazily catch up to the global period count every time we are
- * going to use x_{j}, by doing:
- *
- * x_{j} /= 2^(c - c_{j}), c_{j} = c
- */
-
-#include <linux/proportions.h>
-#include <linux/rcupdate.h>
-
-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
-{
- int err;
-
- if (shift > PROP_MAX_SHIFT)
- shift = PROP_MAX_SHIFT;
-
- pd->index = 0;
- pd->pg[0].shift = shift;
- mutex_init(&pd->mutex);
- err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
- if (err)
- goto out;
-
- err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
- if (err)
- percpu_counter_destroy(&pd->pg[0].events);
-
-out:
- return err;
-}
-
-/*
- * We have two copies, and flip between them to make it seem like an atomic
- * update. The update is not really atomic wrt the events counter, but
- * it is internally consistent with the bit layout depending on shift.
- *
- * We copy the events count, move the bits around and flip the index.
- */
-void prop_change_shift(struct prop_descriptor *pd, int shift)
-{
- int index;
- int offset;
- u64 events;
- unsigned long flags;
-
- if (shift > PROP_MAX_SHIFT)
- shift = PROP_MAX_SHIFT;
-
- mutex_lock(&pd->mutex);
-
- index = pd->index ^ 1;
- offset = pd->pg[pd->index].shift - shift;
- if (!offset)
- goto out;
-
- pd->pg[index].shift = shift;
-
- local_irq_save(flags);
- events = percpu_counter_sum(&pd->pg[pd->index].events);
- if (offset < 0)
- events <<= -offset;
- else
- events >>= offset;
- percpu_counter_set(&pd->pg[index].events, events);
-
- /*
- * ensure the new pg is fully written before the switch
- */
- smp_wmb();
- pd->index = index;
- local_irq_restore(flags);
-
- synchronize_rcu();
-
-out:
- mutex_unlock(&pd->mutex);
-}
-
-/*
- * wrap the access to the data in an rcu_read_lock() section;
- * this is used to track the active references.
- */
-static struct prop_global *prop_get_global(struct prop_descriptor *pd)
-__acquires(RCU)
-{
- int index;
-
- rcu_read_lock();
- index = pd->index;
- /*
- * match the wmb from vcd_flip()
- */
- smp_rmb();
- return &pd->pg[index];
-}
-
-static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
-__releases(RCU)
-{
- rcu_read_unlock();
-}
-
-static void
-prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
-{
- int offset = *pl_shift - new_shift;
-
- if (!offset)
- return;
-
- if (offset < 0)
- *pl_period <<= -offset;
- else
- *pl_period >>= offset;
-
- *pl_shift = new_shift;
-}
-
-/*
- * PERCPU
- */
-
-#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
-
-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
-{
- raw_spin_lock_init(&pl->lock);
- pl->shift = 0;
- pl->period = 0;
- return percpu_counter_init(&pl->events, 0, gfp);
-}
-
-void prop_local_destroy_percpu(struct prop_local_percpu *pl)
-{
- percpu_counter_destroy(&pl->events);
-}
-
-/*
- * Catch up with missed period expirations.
- *
- * until (c_{j} == c)
- * x_{j} -= x_{j}/2;
- * c_{j}++;
- */
-static
-void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
-{
- unsigned long period = 1UL << (pg->shift - 1);
- unsigned long period_mask = ~(period - 1);
- unsigned long global_period;
- unsigned long flags;
-
- global_period = percpu_counter_read(&pg->events);
- global_period &= period_mask;
-
- /*
- * Fast path - check if the local and global period count still match
- * outside of the lock.
- */
- if (pl->period == global_period)
- return;
-
- raw_spin_lock_irqsave(&pl->lock, flags);
- prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
-
- /*
- * For each missed period, we half the local counter.
- * basically:
- * pl->events >> (global_period - pl->period);
- */
- period = (global_period - pl->period) >> (pg->shift - 1);
- if (period < BITS_PER_LONG) {
- s64 val = percpu_counter_read(&pl->events);
-
- if (val < (nr_cpu_ids * PROP_BATCH))
- val = percpu_counter_sum(&pl->events);
-
- __percpu_counter_add(&pl->events, -val + (val >> period),
- PROP_BATCH);
- } else
- percpu_counter_set(&pl->events, 0);
-
- pl->period = global_period;
- raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- * ++x_{j}, ++t
- */
-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
-{
- struct prop_global *pg = prop_get_global(pd);
-
- prop_norm_percpu(pg, pl);
- __percpu_counter_add(&pl->events, 1, PROP_BATCH);
- percpu_counter_add(&pg->events, 1);
- prop_put_global(pd, pg);
-}
-
-/*
- * identical to __prop_inc_percpu, except that it limits this pl's fraction to
- * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
- */
-void __prop_inc_percpu_max(struct prop_descriptor *pd,
- struct prop_local_percpu *pl, long frac)
-{
- struct prop_global *pg = prop_get_global(pd);
-
- prop_norm_percpu(pg, pl);
-
- if (unlikely(frac != PROP_FRAC_BASE)) {
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
- long numerator, denominator;
-
- numerator = percpu_counter_read_positive(&pl->events);
- global_count = percpu_counter_read(&pg->events);
- denominator = period_2 + (global_count & counter_mask);
-
- if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
- goto out_put;
- }
-
- percpu_counter_add(&pl->events, 1);
- percpu_counter_add(&pg->events, 1);
-
-out_put:
- prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- * p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_percpu(struct prop_descriptor *pd,
- struct prop_local_percpu *pl,
- long *numerator, long *denominator)
-{
- struct prop_global *pg = prop_get_global(pd);
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
-
- prop_norm_percpu(pg, pl);
- *numerator = percpu_counter_read_positive(&pl->events);
-
- global_count = percpu_counter_read(&pg->events);
- *denominator = period_2 + (global_count & counter_mask);
-
- prop_put_global(pd, pg);
-}
-
-/*
- * SINGLE
- */
-
-int prop_local_init_single(struct prop_local_single *pl)
-{
- raw_spin_lock_init(&pl->lock);
- pl->shift = 0;
- pl->period = 0;
- pl->events = 0;
- return 0;
-}
-
-void prop_local_destroy_single(struct prop_local_single *pl)
-{
-}
-
-/*
- * Catch up with missed period expirations.
- */
-static
-void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
-{
- unsigned long period = 1UL << (pg->shift - 1);
- unsigned long period_mask = ~(period - 1);
- unsigned long global_period;
- unsigned long flags;
-
- global_period = percpu_counter_read(&pg->events);
- global_period &= period_mask;
-
- /*
- * Fast path - check if the local and global period count still match
- * outside of the lock.
- */
- if (pl->period == global_period)
- return;
-
- raw_spin_lock_irqsave(&pl->lock, flags);
- prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
- /*
- * For each missed period, we half the local counter.
- */
- period = (global_period - pl->period) >> (pg->shift - 1);
- if (likely(period < BITS_PER_LONG))
- pl->events >>= period;
- else
- pl->events = 0;
- pl->period = global_period;
- raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/*
- * ++x_{j}, ++t
- */
-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
-{
- struct prop_global *pg = prop_get_global(pd);
-
- prop_norm_single(pg, pl);
- pl->events++;
- percpu_counter_add(&pg->events, 1);
- prop_put_global(pd, pg);
-}
-
-/*
- * Obtain a fraction of this proportion
- *
- * p_{j} = x_{j} / (period/2 + t % period/2)
- */
-void prop_fraction_single(struct prop_descriptor *pd,
- struct prop_local_single *pl,
- long *numerator, long *denominator)
-{
- struct prop_global *pg = prop_get_global(pd);
- unsigned long period_2 = 1UL << (pg->shift - 1);
- unsigned long counter_mask = period_2 - 1;
- unsigned long global_count;
-
- prop_norm_single(pg, pl);
- *numerator = pl->events;
-
- global_count = percpu_counter_read(&pg->events);
- *denominator = period_2 + (global_count & counter_mask);
-
- prop_put_global(pd, pg);
-}
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index f802c2d..6f4d27c 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -4,9 +4,9 @@
*/
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/mmu_context.h>
#include <linux/export.h>
-#include <linux/sched.h>
#include <asm/mmu_context.h>
diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c
index 59ab6ce..f0955fd 100644
--- a/sound/pci/hda/thinkpad_helper.c
+++ b/sound/pci/hda/thinkpad_helper.c
@@ -13,7 +13,7 @@
static bool is_thinkpad(struct hda_codec *codec)
{
return (codec->core.subsystem_id >> 16 == 0x17aa) &&
- (acpi_dev_present("LEN0068") || acpi_dev_present("IBM0068"));
+ (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068"));
}
static void update_tpacpi_mute_led(void *private_data, int enabled)
diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index e609f08..ac60b04 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -296,7 +296,7 @@
if (!drv)
return -ENOMEM;
- drv->ts3a227e_present = acpi_dev_present("104C227E");
+ drv->ts3a227e_present = acpi_dev_found("104C227E");
if (!drv->ts3a227e_present) {
/* no need probe TI jack detection chip */
snd_soc_card_cht.aux_dev = NULL;
diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index 2a6f808..3f2c1ea 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -357,7 +357,7 @@
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(snd_soc_cards); i++) {
- if (acpi_dev_present(snd_soc_cards[i].codec_id)) {
+ if (acpi_dev_found(snd_soc_cards[i].codec_id)) {
dev_dbg(&pdev->dev,
"found codec %s\n", snd_soc_cards[i].codec_id);
card = snd_soc_cards[i].soc_card;
diff --git a/tools/Makefile b/tools/Makefile
index 60c7e6c..6bf68fe 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -137,7 +137,8 @@
$(call descend,lib/subcmd,clean)
perf_clean:
- $(call descend,$(@:_clean=),clean)
+ $(Q)mkdir -p $(PERF_O) .
+ $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
selftests_clean:
$(call descend,testing/$(@:_clean=),clean)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 9f87861..57c8f98 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -49,6 +49,10 @@
libslang \
libcrypto \
libunwind \
+ libunwind-x86 \
+ libunwind-x86_64 \
+ libunwind-arm \
+ libunwind-aarch64 \
pthread-attr-setaffinity-np \
stackprotector-all \
timerfd \
@@ -69,7 +73,9 @@
libbabeltrace \
liberty \
liberty-z \
- libunwind-debug-frame
+ libunwind-debug-frame \
+ libunwind-debug-frame-arm \
+ libunwind-debug-frame-aarch64
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 4ae94db..3d88f09 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -27,6 +27,12 @@
test-libcrypto.bin \
test-libunwind.bin \
test-libunwind-debug-frame.bin \
+ test-libunwind-x86.bin \
+ test-libunwind-x86_64.bin \
+ test-libunwind-arm.bin \
+ test-libunwind-aarch64.bin \
+ test-libunwind-debug-frame-arm.bin \
+ test-libunwind-debug-frame-aarch64.bin \
test-pthread-attr-setaffinity-np.bin \
test-stackprotector-all.bin \
test-timerfd.bin \
@@ -103,6 +109,23 @@
$(OUTPUT)test-libunwind-debug-frame.bin:
$(BUILD) -lelf
+$(OUTPUT)test-libunwind-x86.bin:
+ $(BUILD) -lelf -lunwind-x86
+
+$(OUTPUT)test-libunwind-x86_64.bin:
+ $(BUILD) -lelf -lunwind-x86_64
+
+$(OUTPUT)test-libunwind-arm.bin:
+ $(BUILD) -lelf -lunwind-arm
+
+$(OUTPUT)test-libunwind-aarch64.bin:
+ $(BUILD) -lelf -lunwind-aarch64
+
+$(OUTPUT)test-libunwind-debug-frame-arm.bin:
+ $(BUILD) -lelf -lunwind-arm
+
+$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
+ $(BUILD) -lelf -lunwind-aarch64
$(OUTPUT)test-libaudit.bin:
$(BUILD) -laudit
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
index b389026..e04ab89 100644
--- a/tools/build/feature/test-bpf.c
+++ b/tools/build/feature/test-bpf.c
@@ -27,10 +27,9 @@
attr.log_level = 0;
attr.kern_version = 0;
- attr = attr;
/*
* Test existence of __NR_bpf and BPF_PROG_LOAD.
* This call should fail if we run the testcase.
*/
- return syscall(__NR_bpf, BPF_PROG_LOAD, attr, sizeof(attr));
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
diff --git a/tools/build/feature/test-libunwind-aarch64.c b/tools/build/feature/test-libunwind-aarch64.c
new file mode 100644
index 0000000..fc03fb6
--- /dev/null
+++ b/tools/build/feature/test-libunwind-aarch64.c
@@ -0,0 +1,26 @@
+#include <libunwind-aarch64.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-arm.c b/tools/build/feature/test-libunwind-arm.c
new file mode 100644
index 0000000..632d95e
--- /dev/null
+++ b/tools/build/feature/test-libunwind-arm.c
@@ -0,0 +1,27 @@
+#include <libunwind-arm.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-debug-frame-aarch64.c b/tools/build/feature/test-libunwind-debug-frame-aarch64.c
new file mode 100644
index 0000000..2284467
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame-aarch64.c
@@ -0,0 +1,16 @@
+#include <libunwind-aarch64.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip, unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+ dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-debug-frame-arm.c b/tools/build/feature/test-libunwind-debug-frame-arm.c
new file mode 100644
index 0000000..f988596
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame-arm.c
@@ -0,0 +1,16 @@
+#include <libunwind-arm.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip, unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+ dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-x86.c b/tools/build/feature/test-libunwind-x86.c
new file mode 100644
index 0000000..3561edc
--- /dev/null
+++ b/tools/build/feature/test-libunwind-x86.c
@@ -0,0 +1,27 @@
+#include <libunwind-x86.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-x86_64.c b/tools/build/feature/test-libunwind-x86_64.c
new file mode 100644
index 0000000..5add251
--- /dev/null
+++ b/tools/build/feature/test-libunwind-x86_64.c
@@ -0,0 +1,27 @@
+#include <libunwind-x86_64.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 80159e6..d9836c5 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -3351,12 +3351,18 @@
/* Boot protocol version: 2.07 supports the fields for lguest. */
boot->hdr.version = 0x207;
- /* The hardware_subarch value of "1" tells the Guest it's an lguest. */
- boot->hdr.hardware_subarch = 1;
+ /* X86_SUBARCH_LGUEST tells the Guest it's an lguest. */
+ boot->hdr.hardware_subarch = X86_SUBARCH_LGUEST;
/* Tell the entry path not to try to reload segment registers. */
boot->hdr.loadflags |= KEEP_SEGMENTS;
+ /* We don't support tboot: */
+ boot->tboot_addr = 0;
+
+ /* Ensure this is 0 to prevent APM from loading: */
+ boot->apm_bios_info.version = 0;
+
/* We tell the kernel to initialize the Guest. */
tell_kernel(start);
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ef78c22..08556cf 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -351,6 +351,19 @@
return err;
}
+int procfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+ char path[PATH_MAX];
+ const char *procfs = procfs__mountpoint();
+
+ if (!procfs)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/%s", procfs, entry);
+
+ return filename__read_str(path, buf, sizep);
+}
+
int sysfs__read_ull(const char *entry, unsigned long long *value)
{
char path[PATH_MAX];
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 9f65980..16c9c2e 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -29,6 +29,8 @@
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_str(const char *filename, char **buf, size_t *sizep);
+int procfs__read_str(const char *entry, char **buf, size_t *sizep);
+
int sysctl__read_int(const char *sysctl, int *value);
int sysfs__read_int(const char *entry, int *value);
int sysfs__read_ull(const char *entry, unsigned long long *value);
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index be764f9..c6c8318 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -672,6 +672,7 @@
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
+ s skip initial number of events
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -730,6 +731,12 @@
To disable trace decoding entirely, use the option --no-itrace.
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+skips the first million instructions.
dump option
-----------
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 65453f4..e2a4c5e 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -7,6 +7,7 @@
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
+ s skip initial number of events
The default is all events i.e. the same as --itrace=ibxe
@@ -24,3 +25,10 @@
Also the number of last branch entries (default 64, max. 1024) for
instructions or transactions events can be specified.
+
+ It is also possible to skip events generated (instructions, branches, transactions)
+ at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+ skips the first million instructions.
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index e9cd39a..778f54d 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -33,7 +33,7 @@
-f::
--force::
- Don't complain, do it.
+ Don't do ownership validation.
-v::
--verbose::
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index d1deb57..3e9490b 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -75,7 +75,7 @@
-f::
--force::
- Don't complain, do it.
+ Don't do ownership validation.
--symfs=<directory>::
Look for files with symbols relative to this directory.
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index ec723d0..a126e97 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -93,6 +93,67 @@
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
+ARBITRARY PMUS
+--------------
+
+perf also supports an extended syntax for specifying raw parameters
+to PMUs. Using this typically requires looking up the specific event
+in the CPU vendor specific documentation.
+
+The available PMUs and their raw parameters can be listed with
+
+ ls /sys/devices/*/format
+
+For example the raw event "LSD.UOPS" core pmu event above could
+be specified as
+
+ perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ...
+
+PER SOCKET PMUS
+---------------
+
+Some PMUs are not associated with a core, but with a whole CPU socket.
+Events on these PMUs generally cannot be sampled, but only counted globally
+with perf stat -a. They can be bound to one logical CPU, but will measure
+all the CPUs in the same socket.
+
+This example measures memory bandwidth every second
+on the first memory controller on socket 0 of a Intel Xeon system
+
+ perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
+
+Each memory controller has its own PMU. Measuring the complete system
+bandwidth would require specifying all imc PMUs (see perf list output),
+and adding the values together.
+
+This example measures the combined core power every second
+
+ perf stat -I 1000 -e power/energy-cores/ -a
+
+ACCESS RESTRICTIONS
+-------------------
+
+For non root users generally only context switched PMU events are available.
+This is normally only the events in the cpu PMU, the predefined events
+like cycles and instructions and some software events.
+
+Other PMUs and global measurements are normally root only.
+Some event qualifiers, such as "any", are also root only.
+
+This can be overriden by setting the kernel.perf_event_paranoid
+sysctl to -1, which allows non root to use these events.
+
+For accessing trace point events perf needs to have read access to
+/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
+setting.
+
+TRACING
+-------
+
+Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
+that allows low overhead execution tracing. These are described in a separate
+intel-pt.txt document.
+
PARAMETERIZED EVENTS
--------------------
@@ -106,6 +167,50 @@
perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
+EVENT GROUPS
+------------
+
+Perf supports time based multiplexing of events, when the number of events
+active exceeds the number of hardware performance counters. Multiplexing
+can cause measurement errors when the workload changes its execution
+profile.
+
+When metrics are computed using formulas from event counts, it is useful to
+ensure some events are always measured together as a group to minimize multiplexing
+errors. Event groups can be specified using { }.
+
+ perf stat -e '{instructions,cycles}' ...
+
+The number of available performance counters depend on the CPU. A group
+cannot contain more events than available counters.
+For example Intel Core CPUs typically have four generic performance counters
+for the core, plus three fixed counters for instructions, cycles and
+ref-cycles. Some special events have restrictions on which counter they
+can schedule, and may not support multiple instances in a single group.
+When too many events are specified in the group none of them will not
+be measured.
+
+Globally pinned events can limit the number of counters available for
+other groups. On x86 systems, the NMI watchdog pins a counter by default.
+The nmi watchdog can be disabled as root with
+
+ echo 0 > /proc/sys/kernel/nmi_watchdog
+
+Events from multiple different PMUs cannot be mixed in a group, with
+some exceptions for software events.
+
+LEADER SAMPLING
+---------------
+
+perf also supports group leader sampling using the :S specifier.
+
+ perf record -e '{cycles,instructions}:S' ...
+ perf report --group
+
+Normally all events in a event group sample, but with :S only
+the first event (the leader) samples, and it only reads the values of the
+other events in the group.
+
OPTIONS
-------
@@ -143,5 +248,5 @@
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1],
-http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
+http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 43310d8..1d6092c 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -48,6 +48,14 @@
option can be passed in record mode. It will be interpreted the same way as perf
record.
+-K::
+--all-kernel::
+ Configure all used events to run in kernel space.
+
+-U::
+--all-user::
+ Configure all used events to run in user space.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 19aa175..8dbee83 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -347,6 +347,19 @@
--all-user::
Configure all used events to run in user space.
+--timestamp-filename
+Append timestamp to output file name.
+
+--switch-output::
+Generate multiple perf.data files, timestamp prefixed, switching to a new one
+when receiving a SIGUSR2.
+
+A possible use case is to, given an external event, slice the perf.data file
+that gets then processed, possibly via a perf script, to decide if that
+particular perf.data snapshot should be kept or not.
+
+Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 1211399..ebaf849 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -248,7 +248,7 @@
Note that when using the --itrace option the synthesized callchain size
will override this value if the synthesized callchain size is bigger.
- Default: 127
+ Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
-G::
--inverted::
@@ -285,7 +285,7 @@
-f::
--force::
- Don't complain, do it.
+ Don't do ownership validation.
--symfs=<directory>::
Look for files with symbols relative to this directory.
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 8ff4df9..1cc08cc 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -50,6 +50,22 @@
--dump-raw-trace=::
Display verbose dump of the sched data.
+OPTIONS for 'perf sched map'
+----------------------------
+
+--compact::
+ Show only CPUs with activity. Helps visualizing on high core
+ count systems.
+
+--cpus::
+ Show just entries with activities for the given CPUs.
+
+--color-cpus::
+ Highlight the given cpus.
+
+--color-pids::
+ Highlight the given pids.
+
SEE ALSO
--------
linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 382ddfb..a856a10 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -259,9 +259,23 @@
--full-source-path::
Show the full path for source files for srcline output.
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+ Note that when using the --itrace option the synthesized callchain size
+ will override this value if the synthesized callchain size is bigger.
+
+ Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
--ns::
Use 9 decimal places when displaying time (i.e. show the nanoseconds)
+-f::
+--force::
+ Don't do ownership validation.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 19f046f..91d638d 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -177,7 +177,7 @@
between information loss and faster processing especially for
workloads that can have a very long callchain stack.
- Default: 127
+ Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
--ignore-callees=<regex>::
Ignore callees of the function(s) matching the given regex.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 13293de..6afe201 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,9 +117,41 @@
--syscalls::
Trace system calls. This options is enabled by default.
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+ Setup and enable call-graph (stack chain/backtrace) recording.
+ See `--call-graph` section in perf-record and perf-report
+ man pages for details. The ones that are most useful in 'perf trace'
+ are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
+ Using this will, for the root user, bump the value of --mmap-pages to 4
+ times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
+ sysctl. This is done only if the user doesn't specify a --mmap-pages value.
+
+--kernel-syscall-graph::
+ Show the kernel callchains on the syscall exit path.
+
--event::
Trace other events, see 'perf list' for a complete list.
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. Note that at this point
+ this is just about the presentation part, i.e. the kernel is still
+ not limiting, the overhead of callchains needs to be set via the
+ knobs in --call-graph dwarf.
+
+ Implies '--call-graph dwarf' when --call-graph not present on the
+ command line, on systems where DWARF unwinding was built in.
+
+ Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
+--min-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ below the specified depth will be ignored. Disabled by default.
+
+ Implies '--call-graph dwarf' when --call-graph not present on the
+ command line, on systems where DWARF unwinding was built in.
+
--proc-map-timeout::
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
because the file may be huge. A time out is needed in such cases.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 000ea21..bde8cba 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -183,6 +183,11 @@
include config/Makefile
endif
+ifeq ($(config),0)
+include $(srctree)/tools/scripts/Makefile.arch
+-include arch/$(ARCH)/Makefile
+endif
+
# The FEATURE_DUMP_EXPORT holds location of the actual
# FEATURE_DUMP file to be used to bypass feature detection
# (for bpf or any other subproject)
@@ -297,8 +302,6 @@
# because maintaining the nesting to match is a pain. If
# we had "elif" things would have been much nicer...
--include arch/$(ARCH)/Makefile
-
ifneq ($(OUTPUT),)
CFLAGS += -I$(OUTPUT)
endif
@@ -390,7 +393,7 @@
__build-dir = $(subst $(OUTPUT),,$(dir $@))
build-dir = $(if $(__build-dir),$(__build-dir),.)
-prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h fixdep archheaders
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -430,7 +433,7 @@
LIBPERF_IN := $(OUTPUT)libperf-in.o
-$(LIBPERF_IN): fixdep FORCE
+$(LIBPERF_IN): prepare fixdep FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIB_FILE): $(LIBPERF_IN)
@@ -625,7 +628,7 @@
$(call QUIET_CLEAN, config)
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
@@ -662,5 +665,5 @@
.PHONY: all install clean config-clean strip install-gtk
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
-.PHONY: libtraceevent_plugins
+.PHONY: libtraceevent_plugins archheaders
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 56e05f1..cc39309 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -3,4 +3,5 @@
endif
HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 733151c..41bdf95 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -10,19 +10,26 @@
*/
#include <stddef.h>
+#include <errno.h>
+#include <string.h>
#include <dwarf-regs.h>
-
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include "util.h"
struct pt_regs_dwarfnum {
const char *name;
unsigned int dwarfnum;
+ unsigned int ptregs_offset;
};
-#define STR(s) #s
-#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
-#define GPR_DWARFNUM_NAME(num) \
- {.name = STR(%gpr##num), .dwarfnum = num}
-#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+#define REG_DWARFNUM_NAME(r, num) \
+ {.name = STR(%)STR(r), .dwarfnum = num, \
+ .ptregs_offset = offsetof(struct pt_regs, r)}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%gpr##num), .dwarfnum = num, \
+ .ptregs_offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
/*
* Reference:
@@ -61,12 +68,12 @@
GPR_DWARFNUM_NAME(29),
GPR_DWARFNUM_NAME(30),
GPR_DWARFNUM_NAME(31),
- REG_DWARFNUM_NAME("%msr", 66),
- REG_DWARFNUM_NAME("%ctr", 109),
- REG_DWARFNUM_NAME("%link", 108),
- REG_DWARFNUM_NAME("%xer", 101),
- REG_DWARFNUM_NAME("%dar", 119),
- REG_DWARFNUM_NAME("%dsisr", 118),
+ REG_DWARFNUM_NAME(msr, 66),
+ REG_DWARFNUM_NAME(ctr, 109),
+ REG_DWARFNUM_NAME(link, 108),
+ REG_DWARFNUM_NAME(xer, 101),
+ REG_DWARFNUM_NAME(dar, 119),
+ REG_DWARFNUM_NAME(dsisr, 118),
REG_DWARFNUM_END,
};
@@ -86,3 +93,12 @@
return roff->name;
return NULL;
}
+
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->ptregs_offset;
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index bbc1a50..c6d0f91 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -19,12 +19,6 @@
ehdr.e_type == ET_DYN;
}
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-void arch__elf_sym_adjust(GElf_Sym *sym)
-{
- sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
-}
-#endif
#endif
#if !defined(_CALL_ELF) || _CALL_ELF != 2
@@ -65,18 +59,45 @@
return true;
}
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
+{
+ s->arch_sym = sym->st_other;
+}
+#endif
+
#define PPC64LE_LEP_OFFSET 8
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
- struct probe_trace_event *tev, struct map *map)
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym)
{
+ int lep_offset;
+
/*
- * ppc64 ABIv2 local entry point is currently always 2 instructions
- * (8 bytes) after the global entry point.
+ * When probing at a function entry point, we normally always want the
+ * LEP since that catches calls to the function through both the GEP and
+ * the LEP. Hence, we would like to probe at an offset of 8 bytes if
+ * the user only specified the function entry.
+ *
+ * However, if the user specifies an offset, we fall back to using the
+ * GEP since all userspace applications (objdump/readelf) show function
+ * disassembly with offsets from the GEP.
+ *
+ * In addition, we shouldn't specify an offset for kretprobes.
*/
- if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
- tev->point.address += PPC64LE_LEP_OFFSET;
+ if (pev->point.offset || pev->point.retprobe || !map || !sym)
+ return;
+
+ lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+
+ if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
tev->point.offset += PPC64LE_LEP_OFFSET;
+ else if (lep_offset) {
+ if (pev->uprobes)
+ tev->point.address += lep_offset;
+ else
+ tev->point.offset += lep_offset;
}
}
#endif
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 269af21..6c9211b 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -4,3 +4,26 @@
HAVE_KVM_STAT_SUPPORT := 1
PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
PERF_HAVE_JITDUMP := 1
+
+###
+# Syscall table generation
+#
+
+out := $(OUTPUT)arch/x86/include/generated/asm
+header := $(out)/syscalls_64.c
+sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sys)/syscall_64.tbl $(systbl)
+ @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \
+ || echo "Warning: x86_64's syscall_64.tbl differs from kernel" >&2 )) || true
+ $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
+
+clean::
+ $(call QUIET_CLEAN, x86) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
new file mode 100644
index 0000000..cac6d17
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -0,0 +1,376 @@
+#
+# 64-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The abi is "common", "64" or "x32" for this file.
+#
+0 common read sys_read
+1 common write sys_write
+2 common open sys_open
+3 common close sys_close
+4 common stat sys_newstat
+5 common fstat sys_newfstat
+6 common lstat sys_newlstat
+7 common poll sys_poll
+8 common lseek sys_lseek
+9 common mmap sys_mmap
+10 common mprotect sys_mprotect
+11 common munmap sys_munmap
+12 common brk sys_brk
+13 64 rt_sigaction sys_rt_sigaction
+14 common rt_sigprocmask sys_rt_sigprocmask
+15 64 rt_sigreturn sys_rt_sigreturn/ptregs
+16 64 ioctl sys_ioctl
+17 common pread64 sys_pread64
+18 common pwrite64 sys_pwrite64
+19 64 readv sys_readv
+20 64 writev sys_writev
+21 common access sys_access
+22 common pipe sys_pipe
+23 common select sys_select
+24 common sched_yield sys_sched_yield
+25 common mremap sys_mremap
+26 common msync sys_msync
+27 common mincore sys_mincore
+28 common madvise sys_madvise
+29 common shmget sys_shmget
+30 common shmat sys_shmat
+31 common shmctl sys_shmctl
+32 common dup sys_dup
+33 common dup2 sys_dup2
+34 common pause sys_pause
+35 common nanosleep sys_nanosleep
+36 common getitimer sys_getitimer
+37 common alarm sys_alarm
+38 common setitimer sys_setitimer
+39 common getpid sys_getpid
+40 common sendfile sys_sendfile64
+41 common socket sys_socket
+42 common connect sys_connect
+43 common accept sys_accept
+44 common sendto sys_sendto
+45 64 recvfrom sys_recvfrom
+46 64 sendmsg sys_sendmsg
+47 64 recvmsg sys_recvmsg
+48 common shutdown sys_shutdown
+49 common bind sys_bind
+50 common listen sys_listen
+51 common getsockname sys_getsockname
+52 common getpeername sys_getpeername
+53 common socketpair sys_socketpair
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
+56 common clone sys_clone/ptregs
+57 common fork sys_fork/ptregs
+58 common vfork sys_vfork/ptregs
+59 64 execve sys_execve/ptregs
+60 common exit sys_exit
+61 common wait4 sys_wait4
+62 common kill sys_kill
+63 common uname sys_newuname
+64 common semget sys_semget
+65 common semop sys_semop
+66 common semctl sys_semctl
+67 common shmdt sys_shmdt
+68 common msgget sys_msgget
+69 common msgsnd sys_msgsnd
+70 common msgrcv sys_msgrcv
+71 common msgctl sys_msgctl
+72 common fcntl sys_fcntl
+73 common flock sys_flock
+74 common fsync sys_fsync
+75 common fdatasync sys_fdatasync
+76 common truncate sys_truncate
+77 common ftruncate sys_ftruncate
+78 common getdents sys_getdents
+79 common getcwd sys_getcwd
+80 common chdir sys_chdir
+81 common fchdir sys_fchdir
+82 common rename sys_rename
+83 common mkdir sys_mkdir
+84 common rmdir sys_rmdir
+85 common creat sys_creat
+86 common link sys_link
+87 common unlink sys_unlink
+88 common symlink sys_symlink
+89 common readlink sys_readlink
+90 common chmod sys_chmod
+91 common fchmod sys_fchmod
+92 common chown sys_chown
+93 common fchown sys_fchown
+94 common lchown sys_lchown
+95 common umask sys_umask
+96 common gettimeofday sys_gettimeofday
+97 common getrlimit sys_getrlimit
+98 common getrusage sys_getrusage
+99 common sysinfo sys_sysinfo
+100 common times sys_times
+101 64 ptrace sys_ptrace
+102 common getuid sys_getuid
+103 common syslog sys_syslog
+104 common getgid sys_getgid
+105 common setuid sys_setuid
+106 common setgid sys_setgid
+107 common geteuid sys_geteuid
+108 common getegid sys_getegid
+109 common setpgid sys_setpgid
+110 common getppid sys_getppid
+111 common getpgrp sys_getpgrp
+112 common setsid sys_setsid
+113 common setreuid sys_setreuid
+114 common setregid sys_setregid
+115 common getgroups sys_getgroups
+116 common setgroups sys_setgroups
+117 common setresuid sys_setresuid
+118 common getresuid sys_getresuid
+119 common setresgid sys_setresgid
+120 common getresgid sys_getresgid
+121 common getpgid sys_getpgid
+122 common setfsuid sys_setfsuid
+123 common setfsgid sys_setfsgid
+124 common getsid sys_getsid
+125 common capget sys_capget
+126 common capset sys_capset
+127 64 rt_sigpending sys_rt_sigpending
+128 64 rt_sigtimedwait sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
+130 common rt_sigsuspend sys_rt_sigsuspend
+131 64 sigaltstack sys_sigaltstack
+132 common utime sys_utime
+133 common mknod sys_mknod
+134 64 uselib
+135 common personality sys_personality
+136 common ustat sys_ustat
+137 common statfs sys_statfs
+138 common fstatfs sys_fstatfs
+139 common sysfs sys_sysfs
+140 common getpriority sys_getpriority
+141 common setpriority sys_setpriority
+142 common sched_setparam sys_sched_setparam
+143 common sched_getparam sys_sched_getparam
+144 common sched_setscheduler sys_sched_setscheduler
+145 common sched_getscheduler sys_sched_getscheduler
+146 common sched_get_priority_max sys_sched_get_priority_max
+147 common sched_get_priority_min sys_sched_get_priority_min
+148 common sched_rr_get_interval sys_sched_rr_get_interval
+149 common mlock sys_mlock
+150 common munlock sys_munlock
+151 common mlockall sys_mlockall
+152 common munlockall sys_munlockall
+153 common vhangup sys_vhangup
+154 common modify_ldt sys_modify_ldt
+155 common pivot_root sys_pivot_root
+156 64 _sysctl sys_sysctl
+157 common prctl sys_prctl
+158 common arch_prctl sys_arch_prctl
+159 common adjtimex sys_adjtimex
+160 common setrlimit sys_setrlimit
+161 common chroot sys_chroot
+162 common sync sys_sync
+163 common acct sys_acct
+164 common settimeofday sys_settimeofday
+165 common mount sys_mount
+166 common umount2 sys_umount
+167 common swapon sys_swapon
+168 common swapoff sys_swapoff
+169 common reboot sys_reboot
+170 common sethostname sys_sethostname
+171 common setdomainname sys_setdomainname
+172 common iopl sys_iopl/ptregs
+173 common ioperm sys_ioperm
+174 64 create_module
+175 common init_module sys_init_module
+176 common delete_module sys_delete_module
+177 64 get_kernel_syms
+178 64 query_module
+179 common quotactl sys_quotactl
+180 64 nfsservctl
+181 common getpmsg
+182 common putpmsg
+183 common afs_syscall
+184 common tuxcall
+185 common security
+186 common gettid sys_gettid
+187 common readahead sys_readahead
+188 common setxattr sys_setxattr
+189 common lsetxattr sys_lsetxattr
+190 common fsetxattr sys_fsetxattr
+191 common getxattr sys_getxattr
+192 common lgetxattr sys_lgetxattr
+193 common fgetxattr sys_fgetxattr
+194 common listxattr sys_listxattr
+195 common llistxattr sys_llistxattr
+196 common flistxattr sys_flistxattr
+197 common removexattr sys_removexattr
+198 common lremovexattr sys_lremovexattr
+199 common fremovexattr sys_fremovexattr
+200 common tkill sys_tkill
+201 common time sys_time
+202 common futex sys_futex
+203 common sched_setaffinity sys_sched_setaffinity
+204 common sched_getaffinity sys_sched_getaffinity
+205 64 set_thread_area
+206 64 io_setup sys_io_setup
+207 common io_destroy sys_io_destroy
+208 common io_getevents sys_io_getevents
+209 64 io_submit sys_io_submit
+210 common io_cancel sys_io_cancel
+211 64 get_thread_area
+212 common lookup_dcookie sys_lookup_dcookie
+213 common epoll_create sys_epoll_create
+214 64 epoll_ctl_old
+215 64 epoll_wait_old
+216 common remap_file_pages sys_remap_file_pages
+217 common getdents64 sys_getdents64
+218 common set_tid_address sys_set_tid_address
+219 common restart_syscall sys_restart_syscall
+220 common semtimedop sys_semtimedop
+221 common fadvise64 sys_fadvise64
+222 64 timer_create sys_timer_create
+223 common timer_settime sys_timer_settime
+224 common timer_gettime sys_timer_gettime
+225 common timer_getoverrun sys_timer_getoverrun
+226 common timer_delete sys_timer_delete
+227 common clock_settime sys_clock_settime
+228 common clock_gettime sys_clock_gettime
+229 common clock_getres sys_clock_getres
+230 common clock_nanosleep sys_clock_nanosleep
+231 common exit_group sys_exit_group
+232 common epoll_wait sys_epoll_wait
+233 common epoll_ctl sys_epoll_ctl
+234 common tgkill sys_tgkill
+235 common utimes sys_utimes
+236 64 vserver
+237 common mbind sys_mbind
+238 common set_mempolicy sys_set_mempolicy
+239 common get_mempolicy sys_get_mempolicy
+240 common mq_open sys_mq_open
+241 common mq_unlink sys_mq_unlink
+242 common mq_timedsend sys_mq_timedsend
+243 common mq_timedreceive sys_mq_timedreceive
+244 64 mq_notify sys_mq_notify
+245 common mq_getsetattr sys_mq_getsetattr
+246 64 kexec_load sys_kexec_load
+247 64 waitid sys_waitid
+248 common add_key sys_add_key
+249 common request_key sys_request_key
+250 common keyctl sys_keyctl
+251 common ioprio_set sys_ioprio_set
+252 common ioprio_get sys_ioprio_get
+253 common inotify_init sys_inotify_init
+254 common inotify_add_watch sys_inotify_add_watch
+255 common inotify_rm_watch sys_inotify_rm_watch
+256 common migrate_pages sys_migrate_pages
+257 common openat sys_openat
+258 common mkdirat sys_mkdirat
+259 common mknodat sys_mknodat
+260 common fchownat sys_fchownat
+261 common futimesat sys_futimesat
+262 common newfstatat sys_newfstatat
+263 common unlinkat sys_unlinkat
+264 common renameat sys_renameat
+265 common linkat sys_linkat
+266 common symlinkat sys_symlinkat
+267 common readlinkat sys_readlinkat
+268 common fchmodat sys_fchmodat
+269 common faccessat sys_faccessat
+270 common pselect6 sys_pselect6
+271 common ppoll sys_ppoll
+272 common unshare sys_unshare
+273 64 set_robust_list sys_set_robust_list
+274 64 get_robust_list sys_get_robust_list
+275 common splice sys_splice
+276 common tee sys_tee
+277 common sync_file_range sys_sync_file_range
+278 64 vmsplice sys_vmsplice
+279 64 move_pages sys_move_pages
+280 common utimensat sys_utimensat
+281 common epoll_pwait sys_epoll_pwait
+282 common signalfd sys_signalfd
+283 common timerfd_create sys_timerfd_create
+284 common eventfd sys_eventfd
+285 common fallocate sys_fallocate
+286 common timerfd_settime sys_timerfd_settime
+287 common timerfd_gettime sys_timerfd_gettime
+288 common accept4 sys_accept4
+289 common signalfd4 sys_signalfd4
+290 common eventfd2 sys_eventfd2
+291 common epoll_create1 sys_epoll_create1
+292 common dup3 sys_dup3
+293 common pipe2 sys_pipe2
+294 common inotify_init1 sys_inotify_init1
+295 64 preadv sys_preadv
+296 64 pwritev sys_pwritev
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+298 common perf_event_open sys_perf_event_open
+299 64 recvmmsg sys_recvmmsg
+300 common fanotify_init sys_fanotify_init
+301 common fanotify_mark sys_fanotify_mark
+302 common prlimit64 sys_prlimit64
+303 common name_to_handle_at sys_name_to_handle_at
+304 common open_by_handle_at sys_open_by_handle_at
+305 common clock_adjtime sys_clock_adjtime
+306 common syncfs sys_syncfs
+307 64 sendmmsg sys_sendmmsg
+308 common setns sys_setns
+309 common getcpu sys_getcpu
+310 64 process_vm_readv sys_process_vm_readv
+311 64 process_vm_writev sys_process_vm_writev
+312 common kcmp sys_kcmp
+313 common finit_module sys_finit_module
+314 common sched_setattr sys_sched_setattr
+315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
+317 common seccomp sys_seccomp
+318 common getrandom sys_getrandom
+319 common memfd_create sys_memfd_create
+320 common kexec_file_load sys_kexec_file_load
+321 common bpf sys_bpf
+322 64 execveat sys_execveat/ptregs
+323 common userfaultfd sys_userfaultfd
+324 common membarrier sys_membarrier
+325 common mlock2 sys_mlock2
+326 common copy_file_range sys_copy_file_range
+327 64 preadv2 sys_preadv2
+328 64 pwritev2 sys_pwritev2
+
+#
+# x32-specific system call numbers start at 512 to avoid cache impact
+# for native 64-bit operation.
+#
+512 x32 rt_sigaction compat_sys_rt_sigaction
+513 x32 rt_sigreturn sys32_x32_rt_sigreturn
+514 x32 ioctl compat_sys_ioctl
+515 x32 readv compat_sys_readv
+516 x32 writev compat_sys_writev
+517 x32 recvfrom compat_sys_recvfrom
+518 x32 sendmsg compat_sys_sendmsg
+519 x32 recvmsg compat_sys_recvmsg
+520 x32 execve compat_sys_execve/ptregs
+521 x32 ptrace compat_sys_ptrace
+522 x32 rt_sigpending compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack compat_sys_sigaltstack
+526 x32 timer_create compat_sys_timer_create
+527 x32 mq_notify compat_sys_mq_notify
+528 x32 kexec_load compat_sys_kexec_load
+529 x32 waitid compat_sys_waitid
+530 x32 set_robust_list compat_sys_set_robust_list
+531 x32 get_robust_list compat_sys_get_robust_list
+532 x32 vmsplice compat_sys_vmsplice
+533 x32 move_pages compat_sys_move_pages
+534 x32 preadv compat_sys_preadv64
+535 x32 pwritev compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg compat_sys_recvmmsg
+538 x32 sendmmsg compat_sys_sendmmsg
+539 x32 process_vm_readv compat_sys_process_vm_readv
+540 x32 process_vm_writev compat_sys_process_vm_writev
+541 x32 setsockopt compat_sys_setsockopt
+542 x32 getsockopt compat_sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
+545 x32 execveat compat_sys_execveat/ptregs
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
new file mode 100755
index 0000000..49a18b9
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+in="$1"
+arch="$2"
+
+syscall_macro() {
+ nr="$1"
+ name="$2"
+
+ echo " [$nr] = \"$name\","
+}
+
+emit() {
+ nr="$1"
+ entry="$2"
+
+ syscall_macro "$nr" "$entry"
+}
+
+echo "static const char *syscalltbl_${arch}[] = {"
+
+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
+grep '^[0-9]' "$in" | sort -n > $sorted_table
+
+max_nr=0
+while read nr abi name entry compat; do
+ if [ $nr -ge 512 ] ; then # discard compat sycalls
+ break
+ fi
+
+ emit "$nr" "$name"
+ max_nr=$nr
+done < $sorted_table
+
+rm -f $sorted_table
+
+echo "};"
+
+echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 9d29ee2..d4aa567 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -71,7 +71,7 @@
CHECK__(parse_events(evlist, "cycles:u", NULL));
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
evsel = perf_evlist__first(evlist);
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index d66f9ad..7dc3063 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -438,6 +438,11 @@
if (!intel_bts_pmu)
return NULL;
+ if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+ *err = -errno;
+ return NULL;
+ }
+
btsr = zalloc(sizeof(struct intel_bts_recording));
if (!btsr) {
*err = -ENOMEM;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a339517..a07b960 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -1027,6 +1027,11 @@
if (!intel_pt_pmu)
return NULL;
+ if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+ *err = -errno;
+ return NULL;
+ }
+
ptr = zalloc(sizeof(struct intel_pt_recording));
if (!ptr) {
*err = -ENOMEM;
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index fd28684..357f1b1 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -7,7 +7,6 @@
#include <linux/types.h>
#include "../../util/debug.h"
#include "../../util/tsc.h"
-#include "tsc.h"
int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
struct perf_tsc_conversion *tc)
@@ -46,3 +45,34 @@
return low | ((u64)high) << 32;
}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event = {
+ .time_conv = {
+ .header = {
+ .type = PERF_RECORD_TIME_CONV,
+ .size = sizeof(struct time_conv_event),
+ },
+ },
+ };
+ struct perf_tsc_conversion tc;
+ int err;
+
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ pr_debug2("Synthesizing TSC conversion information\n");
+
+ event.time_conv.time_mult = tc.time_mult;
+ event.time_conv.time_shift = tc.time_shift;
+ event.time_conv.time_zero = tc.time_zero;
+
+ return process(tool, &event, NULL, machine);
+}
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
deleted file mode 100644
index 2edc4d3..0000000
--- a/tools/perf/arch/x86/util/tsc.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-
-#include <linux/types.h>
-
-struct perf_tsc_conversion {
- u16 time_shift;
- u32 time_mult;
- u64 time_zero;
-};
-
-struct perf_event_mmap_page;
-
-int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
- struct perf_tsc_conversion *tc);
-
-#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 6a18ce2..6952db6 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -83,7 +83,7 @@
do {
int ret;
again:
- ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);
+ ret = futex_lock_pi(w->futex, NULL, futex_flag);
if (ret) { /* handle lock acquisition */
if (!silent)
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index d44de9f..b2e06d1 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -57,13 +57,11 @@
/**
* futex_lock_pi() - block on uaddr as a PI mutex
- * @detect: whether (1) or not (0) to perform deadlock detection
*/
static inline int
-futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
- int opflags)
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
{
- return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+ return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
}
/**
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index a91aa85..2b54d0f 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -6,6 +6,7 @@
* Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*/
+#include "debug.h"
#include "../perf.h"
#include "../util/util.h"
#include <subcmd/parse-options.h>
@@ -63,14 +64,16 @@
.config = PERF_COUNT_HW_CPU_CYCLES
};
-static void init_cycles(void)
+static int init_cycles(void)
{
cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
- if (cycles_fd < 0 && errno == ENOSYS)
- die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
- else
- BUG_ON(cycles_fd < 0);
+ if (cycles_fd < 0 && errno == ENOSYS) {
+ pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+ return -1;
+ }
+
+ return cycles_fd;
}
static u64 get_cycles(void)
@@ -155,8 +158,13 @@
argc = parse_options(argc, argv, options, info->usage, 0);
- if (use_cycles)
- init_cycles();
+ if (use_cycles) {
+ i = init_cycles();
+ if (i < 0) {
+ fprintf(stderr, "Failed to open cycles counter\n");
+ return i;
+ }
+ }
size = (size_t)perf_atoll((char *)size_str);
size_total = (double)size * nr_loops;
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index c42448e..fe1b77f 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -12,6 +12,7 @@
#include <subcmd/parse-options.h>
#include "util/util.h"
#include "util/debug.h"
+#include "util/config.h"
static bool use_system_config, use_user_config;
@@ -32,13 +33,28 @@
OPT_END()
};
-static int show_config(const char *key, const char *value,
- void *cb __maybe_unused)
+static int show_config(struct perf_config_set *set)
{
- if (value)
- printf("%s=%s\n", key, value);
- else
- printf("%s\n", key);
+ struct perf_config_section *section;
+ struct perf_config_item *item;
+ struct list_head *sections;
+
+ if (set == NULL)
+ return -1;
+
+ sections = &set->sections;
+ if (list_empty(sections))
+ return -1;
+
+ list_for_each_entry(section, sections, node) {
+ list_for_each_entry(item, §ion->items, node) {
+ char *value = item->value;
+
+ if (value)
+ printf("%s.%s=%s\n", section->name,
+ item->name, value);
+ }
+ }
return 0;
}
@@ -46,6 +62,7 @@
int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused)
{
int ret = 0;
+ struct perf_config_set *set;
char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
argc = parse_options(argc, argv, config_options, config_usage,
@@ -63,13 +80,19 @@
else if (use_user_config)
config_exclusive_filename = user_config;
+ set = perf_config_set__new();
+ if (!set) {
+ ret = -1;
+ goto out_err;
+ }
+
switch (actions) {
case ACTION_LIST:
if (argc) {
pr_err("Error: takes no arguments\n");
parse_options_usage(config_usage, config_options, "l", 1);
} else {
- ret = perf_config(show_config, NULL);
+ ret = show_config(set);
if (ret < 0) {
const char * config_filename = config_exclusive_filename;
if (!config_exclusive_filename)
@@ -83,5 +106,7 @@
usage_with_options(config_usage, config_options);
}
+ perf_config_set__delete(set);
+out_err:
return ret;
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8053a8c..9ce354f 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -428,7 +428,7 @@
struct rb_root *root;
struct rb_node *next;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -450,7 +450,7 @@
struct rb_root *root;
struct rb_node *next;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index bc1de9b..f9830c9 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -61,6 +61,7 @@
struct child_process ec_process;
const char *argv_ec[] = { "emacsclient", "--version", NULL };
int version;
+ int ret = -1;
/* emacsclient prints its version number on stderr */
memset(&ec_process, 0, sizeof(ec_process));
@@ -71,7 +72,10 @@
fprintf(stderr, "Failed to start emacsclient.\n");
return -1;
}
- strbuf_read(&buffer, ec_process.err, 20);
+ if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
+ fprintf(stderr, "Failed to read emacsclient version\n");
+ goto out;
+ }
close(ec_process.err);
/*
@@ -82,8 +86,7 @@
if (prefixcmp(buffer.buf, "emacsclient")) {
fprintf(stderr, "Failed to parse emacsclient version.\n");
- strbuf_release(&buffer);
- return -1;
+ goto out;
}
version = atoi(buffer.buf + strlen("emacsclient"));
@@ -92,12 +95,11 @@
fprintf(stderr,
"emacsclient version '%d' too old (< 22).\n",
version);
- strbuf_release(&buffer);
- return -1;
- }
-
+ } else
+ ret = 0;
+out:
strbuf_release(&buffer);
- return 0;
+ return ret;
}
static void exec_woman_emacs(const char *path, const char *page)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index d1a2d10..e5afa8f 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -748,6 +748,7 @@
.auxtrace_info = perf_event__repipe_op2_synth,
.auxtrace = perf_event__repipe_auxtrace,
.auxtrace_error = perf_event__repipe_op2_synth,
+ .time_conv = perf_event__repipe_op2_synth,
.finished_round = perf_event__repipe_oe_synth,
.build_id = perf_event__repipe_op2_synth,
.id_index = perf_event__repipe_op2_synth,
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index c9cb3be..58adfee 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -375,7 +375,7 @@
}
al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+ sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
callchain_cursor_commit(&callchain_cursor);
while (true) {
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index bff6664..6487c06 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -982,7 +982,7 @@
struct perf_evlist *evlist = kvm->evlist;
char sbuf[STRERR_BUFSIZE];
- perf_evlist__config(evlist, &kvm->opts);
+ perf_evlist__config(evlist, &kvm->opts, NULL);
/*
* Note: exclude_{guest,host} do not apply here.
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 85db3be..1dc140c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -62,19 +62,22 @@
int rec_argc, i = 0, j;
const char **rec_argv;
int ret;
+ bool all_user = false, all_kernel = false;
struct option options[] = {
OPT_CALLBACK('e', "event", &mem, "event",
"event selector. use 'perf mem record -e list' to list available events",
parse_record_events),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"),
+ OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"),
OPT_END()
};
argc = parse_options(argc, argv, options, record_mem_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- rec_argc = argc + 7; /* max number of arguments */
+ rec_argc = argc + 9; /* max number of arguments */
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (!rec_argv)
return -1;
@@ -103,6 +106,12 @@
rec_argv[i++] = perf_mem_events__name(j);
};
+ if (all_user)
+ rec_argv[i++] = "--all-user";
+
+ if (all_kernel)
+ rec_argv[i++] = "--all-kernel";
+
for (j = 0; j < argc; j++, i++)
rec_argv[i] = argv[j];
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 515510e..f3679c4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -29,10 +29,12 @@
#include "util/data.h"
#include "util/perf_regs.h"
#include "util/auxtrace.h"
+#include "util/tsc.h"
#include "util/parse-branch-options.h"
#include "util/parse-regs-options.h"
#include "util/llvm-utils.h"
#include "util/bpf-loader.h"
+#include "util/trigger.h"
#include "asm/bug.h"
#include <unistd.h>
@@ -55,6 +57,8 @@
bool no_buildid_cache;
bool no_buildid_cache_set;
bool buildid_all;
+ bool timestamp_filename;
+ bool switch_output;
unsigned long long samples;
};
@@ -124,9 +128,10 @@
static volatile int done;
static volatile int signr = -1;
static volatile int child_finished;
-static volatile int auxtrace_snapshot_enabled;
-static volatile int auxtrace_snapshot_err;
+
static volatile int auxtrace_record__snapshot_started;
+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
+static DEFINE_TRIGGER(switch_output_trigger);
static void sig_handler(int sig)
{
@@ -244,11 +249,12 @@
{
pr_debug("Recording AUX area tracing snapshot\n");
if (record__auxtrace_read_snapshot_all(rec) < 0) {
- auxtrace_snapshot_err = -1;
+ trigger_error(&auxtrace_snapshot_trigger);
} else {
- auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
- if (!auxtrace_snapshot_err)
- auxtrace_snapshot_enabled = 1;
+ if (auxtrace_record__snapshot_finish(rec->itr))
+ trigger_error(&auxtrace_snapshot_trigger);
+ else
+ trigger_ready(&auxtrace_snapshot_trigger);
}
}
@@ -283,7 +289,7 @@
struct record_opts *opts = &rec->opts;
int rc = 0;
- perf_evlist__config(evlist, opts);
+ perf_evlist__config(evlist, opts, &callchain_param);
evlist__for_each(evlist, pos) {
try_again:
@@ -494,6 +500,73 @@
return;
}
+static int record__synthesize_workload(struct record *rec)
+{
+ struct {
+ struct thread_map map;
+ struct thread_map_data map_data;
+ } thread_map;
+
+ thread_map.map.nr = 1;
+ thread_map.map.map[0].pid = rec->evlist->workload.pid;
+ thread_map.map.map[0].comm = NULL;
+ return perf_event__synthesize_thread_map(&rec->tool, &thread_map.map,
+ process_synthesized_event,
+ &rec->session->machines.host,
+ rec->opts.sample_address,
+ rec->opts.proc_map_timeout);
+}
+
+static int record__synthesize(struct record *rec);
+
+static int
+record__switch_output(struct record *rec, bool at_exit)
+{
+ struct perf_data_file *file = &rec->file;
+ int fd, err;
+
+ /* Same Size: "2015122520103046"*/
+ char timestamp[] = "InvalidTimestamp";
+
+ rec->samples = 0;
+ record__finish_output(rec);
+ err = fetch_current_timestamp(timestamp, sizeof(timestamp));
+ if (err) {
+ pr_err("Failed to get current timestamp\n");
+ return -EINVAL;
+ }
+
+ fd = perf_data_file__switch(file, timestamp,
+ rec->session->header.data_offset,
+ at_exit);
+ if (fd >= 0 && !at_exit) {
+ rec->bytes_written = 0;
+ rec->session->header.data_size = 0;
+ }
+
+ if (!quiet)
+ fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
+ file->path, timestamp);
+
+ /* Output tracking events */
+ if (!at_exit) {
+ record__synthesize(rec);
+
+ /*
+ * In 'perf record --switch-output' without -a,
+ * record__synthesize() in record__switch_output() won't
+ * generate tracking events because there's no thread_map
+ * in evlist. Which causes newly created perf.data doesn't
+ * contain map and comm information.
+ * Create a fake thread_map and directly call
+ * perf_event__synthesize_thread_map() for those events.
+ */
+ if (target__none(&rec->opts.target))
+ record__synthesize_workload(rec);
+ }
+ return fd;
+}
+
static volatile int workload_exec_errno;
/*
@@ -512,6 +585,15 @@
static void snapshot_sig_handler(int sig);
+int __weak
+perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+ struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
static int record__synthesize(struct record *rec)
{
struct perf_session *session = rec->session;
@@ -549,6 +631,11 @@
}
}
+ err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool,
+ process_synthesized_event, machine);
+ if (err)
+ goto out;
+
if (rec->opts.full_auxtrace) {
err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
session, process_synthesized_event);
@@ -600,10 +687,16 @@
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
- if (rec->opts.auxtrace_snapshot_mode)
+
+ if (rec->opts.auxtrace_snapshot_mode || rec->switch_output) {
signal(SIGUSR2, snapshot_sig_handler);
- else
+ if (rec->opts.auxtrace_snapshot_mode)
+ trigger_on(&auxtrace_snapshot_trigger);
+ if (rec->switch_output)
+ trigger_on(&switch_output_trigger);
+ } else {
signal(SIGUSR2, SIG_IGN);
+ }
session = perf_session__new(file, false, tool);
if (session == NULL) {
@@ -729,27 +822,45 @@
perf_evlist__enable(rec->evlist);
}
- auxtrace_snapshot_enabled = 1;
+ trigger_ready(&auxtrace_snapshot_trigger);
+ trigger_ready(&switch_output_trigger);
for (;;) {
unsigned long long hits = rec->samples;
if (record__mmap_read_all(rec) < 0) {
- auxtrace_snapshot_enabled = 0;
+ trigger_error(&auxtrace_snapshot_trigger);
+ trigger_error(&switch_output_trigger);
err = -1;
goto out_child;
}
if (auxtrace_record__snapshot_started) {
auxtrace_record__snapshot_started = 0;
- if (!auxtrace_snapshot_err)
+ if (!trigger_is_error(&auxtrace_snapshot_trigger))
record__read_auxtrace_snapshot(rec);
- if (auxtrace_snapshot_err) {
+ if (trigger_is_error(&auxtrace_snapshot_trigger)) {
pr_err("AUX area tracing snapshot failed\n");
err = -1;
goto out_child;
}
}
+ if (trigger_is_hit(&switch_output_trigger)) {
+ trigger_ready(&switch_output_trigger);
+
+ if (!quiet)
+ fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
+ waking);
+ waking = 0;
+ fd = record__switch_output(rec, false);
+ if (fd < 0) {
+ pr_err("Failed to switch to new file\n");
+ trigger_error(&switch_output_trigger);
+ err = fd;
+ goto out_child;
+ }
+ }
+
if (hits == rec->samples) {
if (done || draining)
break;
@@ -772,12 +883,13 @@
* disable events in this case.
*/
if (done && !disabled && !target__none(&opts->target)) {
- auxtrace_snapshot_enabled = 0;
+ trigger_off(&auxtrace_snapshot_trigger);
perf_evlist__disable(rec->evlist);
disabled = true;
}
}
- auxtrace_snapshot_enabled = 0;
+ trigger_off(&auxtrace_snapshot_trigger);
+ trigger_off(&switch_output_trigger);
if (forks && workload_exec_errno) {
char msg[STRERR_BUFSIZE];
@@ -811,11 +923,22 @@
/* this will be recalculated during process_buildids() */
rec->samples = 0;
- if (!err)
- record__finish_output(rec);
+ if (!err) {
+ if (!rec->timestamp_filename) {
+ record__finish_output(rec);
+ } else {
+ fd = record__switch_output(rec, true);
+ if (fd < 0) {
+ status = fd;
+ goto out_delete_session;
+ }
+ }
+ }
if (!err && !quiet) {
char samples[128];
+ const char *postfix = rec->timestamp_filename ?
+ ".<timestamp>" : "";
if (rec->samples && !rec->opts.full_auxtrace)
scnprintf(samples, sizeof(samples),
@@ -823,9 +946,9 @@
else
samples[0] = '\0';
- fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n",
+ fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
perf_data_file__size(file) / 1024.0 / 1024.0,
- file->path, samples);
+ file->path, postfix, samples);
}
out_delete_session:
@@ -833,58 +956,61 @@
return status;
}
-static void callchain_debug(void)
+static void callchain_debug(struct callchain_param *callchain)
{
static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
- pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
+ pr_debug("callchain: type %s\n", str[callchain->record_mode]);
- if (callchain_param.record_mode == CALLCHAIN_DWARF)
+ if (callchain->record_mode == CALLCHAIN_DWARF)
pr_debug("callchain: stack dump size %d\n",
- callchain_param.dump_size);
+ callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+ struct callchain_param *callchain,
+ const char *arg, bool unset)
+{
+ int ret;
+ callchain->enabled = !unset;
+
+ /* --no-call-graph */
+ if (unset) {
+ callchain->record_mode = CALLCHAIN_NONE;
+ pr_debug("callchain: disabled\n");
+ return 0;
+ }
+
+ ret = parse_callchain_record_opt(arg, callchain);
+ if (!ret) {
+ /* Enable data address sampling for DWARF unwind. */
+ if (callchain->record_mode == CALLCHAIN_DWARF)
+ record->sample_address = true;
+ callchain_debug(callchain);
+ }
+
+ return ret;
}
int record_parse_callchain_opt(const struct option *opt,
const char *arg,
int unset)
{
- int ret;
- struct record_opts *record = (struct record_opts *)opt->value;
-
- record->callgraph_set = true;
- callchain_param.enabled = !unset;
-
- /* --no-call-graph */
- if (unset) {
- callchain_param.record_mode = CALLCHAIN_NONE;
- pr_debug("callchain: disabled\n");
- return 0;
- }
-
- ret = parse_callchain_record_opt(arg, &callchain_param);
- if (!ret) {
- /* Enable data address sampling for DWARF unwind. */
- if (callchain_param.record_mode == CALLCHAIN_DWARF)
- record->sample_address = true;
- callchain_debug();
- }
-
- return ret;
+ return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
}
int record_callchain_opt(const struct option *opt,
const char *arg __maybe_unused,
int unset __maybe_unused)
{
- struct record_opts *record = (struct record_opts *)opt->value;
+ struct callchain_param *callchain = opt->value;
- record->callgraph_set = true;
- callchain_param.enabled = true;
+ callchain->enabled = true;
- if (callchain_param.record_mode == CALLCHAIN_NONE)
- callchain_param.record_mode = CALLCHAIN_FP;
+ if (callchain->record_mode == CALLCHAIN_NONE)
+ callchain->record_mode = CALLCHAIN_FP;
- callchain_debug();
+ callchain_debug(callchain);
return 0;
}
@@ -1122,7 +1248,7 @@
record__parse_mmap_pages),
OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
- OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
+ OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
NULL, "enables call-graph recording" ,
&record_callchain_opt),
OPT_CALLBACK(0, "call-graph", &record.opts,
@@ -1195,6 +1321,10 @@
"file", "vmlinux pathname"),
OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
"Record build-id of all DSOs regardless of hits"),
+ OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
+ "append timestamp to output filename"),
+ OPT_BOOLEAN(0, "switch-output", &record.switch_output,
+ "Switch output when receive SIGUSR2"),
OPT_END()
};
@@ -1250,6 +1380,9 @@
return -EINVAL;
}
+ if (rec->switch_output)
+ rec->timestamp_filename = true;
+
if (!rec->itr) {
rec->itr = auxtrace_record__init(rec->evlist, &err);
if (err)
@@ -1261,6 +1394,14 @@
if (err)
return err;
+ err = bpf__setup_stdout(rec->evlist);
+ if (err) {
+ bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
+ pr_err("ERROR: Setup BPF stdout failed: %s\n",
+ errbuf);
+ return err;
+ }
+
err = -ENOMEM;
symbol__init(NULL);
@@ -1275,8 +1416,36 @@
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
- if (rec->no_buildid_cache || rec->no_buildid)
+ if (rec->no_buildid_cache || rec->no_buildid) {
disable_buildid_cache();
+ } else if (rec->switch_output) {
+ /*
+ * In 'perf record --switch-output', disable buildid
+ * generation by default to reduce data file switching
+ * overhead. Still generate buildid if they are required
+ * explicitly using
+ *
+ * perf record --signal-trigger --no-no-buildid \
+ * --no-no-buildid-cache
+ *
+ * Following code equals to:
+ *
+ * if ((rec->no_buildid || !rec->no_buildid_set) &&
+ * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
+ * disable_buildid_cache();
+ */
+ bool disable = true;
+
+ if (rec->no_buildid_set && !rec->no_buildid)
+ disable = false;
+ if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
+ disable = false;
+ if (disable) {
+ rec->no_buildid = true;
+ rec->no_buildid_cache = true;
+ disable_buildid_cache();
+ }
+ }
if (rec->evlist->nr_entries == 0 &&
perf_evlist__add_default(rec->evlist) < 0) {
@@ -1335,9 +1504,13 @@
static void snapshot_sig_handler(int sig __maybe_unused)
{
- if (!auxtrace_snapshot_enabled)
- return;
- auxtrace_snapshot_enabled = 0;
- auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
- auxtrace_record__snapshot_started = 1;
+ if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+ trigger_hit(&auxtrace_snapshot_trigger);
+ auxtrace_record__snapshot_started = 1;
+ if (auxtrace_record__snapshot_start(record.itr))
+ trigger_error(&auxtrace_snapshot_trigger);
+ }
+
+ if (trigger_is_ready(&switch_output_trigger))
+ trigger_hit(&switch_output_trigger);
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 160ea23..87d40e3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,7 +47,6 @@
struct perf_tool tool;
struct perf_session *session;
bool use_tui, use_gtk, use_stdio;
- bool dont_use_callchains;
bool show_full_info;
bool show_threads;
bool inverted_callchain;
@@ -235,7 +234,7 @@
sample_type |= PERF_SAMPLE_BRANCH_STACK;
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
- if (sort__has_parent) {
+ if (perf_hpp_list.parent) {
ui__error("Selected --sort parent, but no "
"callchain data. Did you call "
"'perf record' without -g?\n");
@@ -247,7 +246,7 @@
"you call 'perf record' without -g?\n");
return -1;
}
- } else if (!rep->dont_use_callchains &&
+ } else if (!callchain_param.enabled &&
callchain_param.mode != CHAIN_NONE &&
!symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
@@ -599,13 +598,15 @@
static int
report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
- struct report *rep = (struct report *)opt->value;
+ struct callchain_param *callchain = opt->value;
+ callchain->enabled = !unset;
/*
* --no-call-graph
*/
if (unset) {
- rep->dont_use_callchains = true;
+ symbol_conf.use_callchain = false;
+ callchain->mode = CHAIN_NONE;
return 0;
}
@@ -690,7 +691,7 @@
.ordered_events = true,
.ordering_requires_timestamps = true,
},
- .max_stack = PERF_MAX_STACK_DEPTH,
+ .max_stack = sysctl_perf_event_max_stack,
.pretty_printing_style = "normal",
.socket_filter = -1,
};
@@ -734,7 +735,7 @@
"regex filter to identify parent, see: '--sort parent'"),
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
- OPT_CALLBACK_DEFAULT('g', "call-graph", &report,
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
"print_type,threshold[,print_limit],order,sort_key[,branch],value",
report_callchain_help, &report_parse_callchain_opt,
callchain_default_opt),
@@ -743,7 +744,7 @@
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
- "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
"alias for inverted call graph"),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
@@ -935,7 +936,7 @@
goto error;
}
- sort__need_collapse = true;
+ perf_hpp_list.need_collapse = true;
}
/* Force tty output for header output and per-thread stat. */
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 871b55ae..afa0576 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -11,6 +11,8 @@
#include "util/session.h"
#include "util/tool.h"
#include "util/cloexec.h"
+#include "util/thread_map.h"
+#include "util/color.h"
#include <subcmd/parse-options.h>
#include "util/trace-event.h"
@@ -122,6 +124,21 @@
struct machine *machine);
};
+#define COLOR_PIDS PERF_COLOR_BLUE
+#define COLOR_CPUS PERF_COLOR_BG_RED
+
+struct perf_sched_map {
+ DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
+ int *comp_cpus;
+ bool comp;
+ struct thread_map *color_pids;
+ const char *color_pids_str;
+ struct cpu_map *color_cpus;
+ const char *color_cpus_str;
+ struct cpu_map *cpus;
+ const char *cpus_str;
+};
+
struct perf_sched {
struct perf_tool tool;
const char *sort_order;
@@ -173,6 +190,7 @@
struct list_head sort_list, cmp_pid;
bool force;
bool skip_merge;
+ struct perf_sched_map map;
};
static u64 get_nsecs(void)
@@ -1339,6 +1357,38 @@
return 0;
}
+union map_priv {
+ void *ptr;
+ bool color;
+};
+
+static bool thread__has_color(struct thread *thread)
+{
+ union map_priv priv = {
+ .ptr = thread__priv(thread),
+ };
+
+ return priv.color;
+}
+
+static struct thread*
+map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
+{
+ struct thread *thread = machine__findnew_thread(machine, pid, tid);
+ union map_priv priv = {
+ .color = false,
+ };
+
+ if (!sched->map.color_pids || !thread || thread__priv(thread))
+ return thread;
+
+ if (thread_map__has(sched->map.color_pids, tid))
+ priv.color = true;
+
+ thread__set_priv(thread, priv.ptr);
+ return thread;
+}
+
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
@@ -1347,13 +1397,25 @@
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
- int cpu, this_cpu = sample->cpu;
+ int i, this_cpu = sample->cpu;
+ int cpus_nr;
+ bool new_cpu = false;
+ const char *color = PERF_COLOR_NORMAL;
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
if (this_cpu > sched->max_cpu)
sched->max_cpu = this_cpu;
+ if (sched->map.comp) {
+ cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
+ if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+ sched->map.comp_cpus[cpus_nr++] = this_cpu;
+ new_cpu = true;
+ }
+ } else
+ cpus_nr = sched->max_cpu;
+
timestamp0 = sched->cpu_last_switched[this_cpu];
sched->cpu_last_switched[this_cpu] = timestamp;
if (timestamp0)
@@ -1366,7 +1428,7 @@
return -1;
}
- sched_in = machine__findnew_thread(machine, -1, next_pid);
+ sched_in = map__findnew_thread(sched, machine, -1, next_pid);
if (sched_in == NULL)
return -1;
@@ -1400,26 +1462,52 @@
new_shortname = 1;
}
- for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
+ for (i = 0; i < cpus_nr; i++) {
+ int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
+ struct thread *curr_thread = sched->curr_thread[cpu];
+ const char *pid_color = color;
+ const char *cpu_color = color;
+
+ if (curr_thread && thread__has_color(curr_thread))
+ pid_color = COLOR_PIDS;
+
+ if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+ continue;
+
+ if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+ cpu_color = COLOR_CPUS;
+
if (cpu != this_cpu)
- printf(" ");
+ color_fprintf(stdout, cpu_color, " ");
else
- printf("*");
+ color_fprintf(stdout, cpu_color, "*");
if (sched->curr_thread[cpu])
- printf("%2s ", sched->curr_thread[cpu]->shortname);
+ color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
else
- printf(" ");
+ color_fprintf(stdout, color, " ");
}
- printf(" %12.6f secs ", (double)timestamp/1e9);
+ if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+ goto out;
+
+ color_fprintf(stdout, color, " %12.6f secs ", (double)timestamp/1e9);
if (new_shortname) {
- printf("%s => %s:%d\n",
+ const char *pid_color = color;
+
+ if (thread__has_color(sched_in))
+ pid_color = COLOR_PIDS;
+
+ color_fprintf(stdout, pid_color, "%s => %s:%d",
sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
- } else {
- printf("\n");
}
+ if (sched->map.comp && new_cpu)
+ color_fprintf(stdout, color, " (CPU %d)", this_cpu);
+
+out:
+ color_fprintf(stdout, color, "\n");
+
thread__put(sched_in);
return 0;
@@ -1675,9 +1763,75 @@
return 0;
}
+static int setup_map_cpus(struct perf_sched *sched)
+{
+ struct cpu_map *map;
+
+ sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (sched->map.comp) {
+ sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+ if (!sched->map.comp_cpus)
+ return -1;
+ }
+
+ if (!sched->map.cpus_str)
+ return 0;
+
+ map = cpu_map__new(sched->map.cpus_str);
+ if (!map) {
+ pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
+ return -1;
+ }
+
+ sched->map.cpus = map;
+ return 0;
+}
+
+static int setup_color_pids(struct perf_sched *sched)
+{
+ struct thread_map *map;
+
+ if (!sched->map.color_pids_str)
+ return 0;
+
+ map = thread_map__new_by_tid_str(sched->map.color_pids_str);
+ if (!map) {
+ pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
+ return -1;
+ }
+
+ sched->map.color_pids = map;
+ return 0;
+}
+
+static int setup_color_cpus(struct perf_sched *sched)
+{
+ struct cpu_map *map;
+
+ if (!sched->map.color_cpus_str)
+ return 0;
+
+ map = cpu_map__new(sched->map.color_cpus_str);
+ if (!map) {
+ pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
+ return -1;
+ }
+
+ sched->map.color_cpus = map;
+ return 0;
+}
+
static int perf_sched__map(struct perf_sched *sched)
{
- sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+ if (setup_map_cpus(sched))
+ return -1;
+
+ if (setup_color_pids(sched))
+ return -1;
+
+ if (setup_color_cpus(sched))
+ return -1;
setup_pager();
if (perf_sched__read_events(sched))
@@ -1831,6 +1985,17 @@
"dump raw trace in ASCII"),
OPT_END()
};
+ const struct option map_options[] = {
+ OPT_BOOLEAN(0, "compact", &sched.map.comp,
+ "map output in compact mode"),
+ OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
+ "highlight given pids in map"),
+ OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
+ "highlight given CPUs in map"),
+ OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
+ "display given CPUs in map"),
+ OPT_END()
+ };
const char * const latency_usage[] = {
"perf sched latency [<options>]",
NULL
@@ -1839,6 +2004,10 @@
"perf sched replay [<options>]",
NULL
};
+ const char * const map_usage[] = {
+ "perf sched map [<options>]",
+ NULL
+ };
const char *const sched_subcommands[] = { "record", "latency", "map",
"replay", "script", NULL };
const char *sched_usage[] = {
@@ -1887,6 +2056,11 @@
setup_sorting(&sched, latency_options, latency_usage);
return perf_sched__lat(&sched);
} else if (!strcmp(argv[0], "map")) {
+ if (argc) {
+ argc = parse_options(argc, argv, map_options, map_usage, 0);
+ if (argc)
+ usage_with_options(map_usage, map_options);
+ }
sched.tp_handler = &map_ops;
setup_sorting(&sched, latency_options, latency_usage);
return perf_sched__map(&sched);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 5282669..efca816 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
#include "util/thread_map.h"
#include "util/stat.h"
#include <linux/bitmap.h>
+#include <linux/stringify.h>
#include "asm/bug.h"
#include "util/mem-events.h"
@@ -317,19 +318,19 @@
output[type].print_ip_opts = 0;
if (PRINT_FIELD(IP))
- output[type].print_ip_opts |= PRINT_IP_OPT_IP;
+ output[type].print_ip_opts |= EVSEL__PRINT_IP;
if (PRINT_FIELD(SYM))
- output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
+ output[type].print_ip_opts |= EVSEL__PRINT_SYM;
if (PRINT_FIELD(DSO))
- output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
+ output[type].print_ip_opts |= EVSEL__PRINT_DSO;
if (PRINT_FIELD(SYMOFFSET))
- output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
+ output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
if (PRINT_FIELD(SRCLINE))
- output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
+ output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
}
/*
@@ -569,18 +570,23 @@
/* print branch_from information */
if (PRINT_FIELD(IP)) {
unsigned int print_opts = output[attr->type].print_ip_opts;
+ struct callchain_cursor *cursor = NULL;
- if (symbol_conf.use_callchain && sample->callchain) {
- printf("\n");
- } else {
- printf(" ");
- if (print_opts & PRINT_IP_OPT_SRCLINE) {
+ if (symbol_conf.use_callchain && sample->callchain &&
+ thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, scripting_max_stack) == 0)
+ cursor = &callchain_cursor;
+
+ if (cursor == NULL) {
+ putchar(' ');
+ if (print_opts & EVSEL__PRINT_SRCLINE) {
print_srcline_last = true;
- print_opts &= ~PRINT_IP_OPT_SRCLINE;
+ print_opts &= ~EVSEL__PRINT_SRCLINE;
}
- }
- perf_evsel__print_ip(evsel, sample, al, print_opts,
- scripting_max_stack);
+ } else
+ putchar('\n');
+
+ sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
}
/* print branch_to information */
@@ -783,14 +789,15 @@
printf("%16" PRIu64, sample->weight);
if (PRINT_FIELD(IP)) {
- if (!symbol_conf.use_callchain)
- printf(" ");
- else
- printf("\n");
+ struct callchain_cursor *cursor = NULL;
- perf_evsel__print_ip(evsel, sample, al,
- output[attr->type].print_ip_opts,
- scripting_max_stack);
+ if (symbol_conf.use_callchain && sample->callchain &&
+ thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, scripting_max_stack) == 0)
+ cursor = &callchain_cursor;
+
+ putchar(cursor ? '\n' : ' ');
+ sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
}
if (PRINT_FIELD(IREGS))
@@ -1959,6 +1966,7 @@
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.attr = process_attr,
+ .event_update = perf_event__process_event_update,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
@@ -2020,6 +2028,10 @@
"only consider symbols in these pids"),
OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
"only consider symbols in these tids"),
+ OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
+ "Set the maximum stack depth when parsing the callchain, "
+ "anything beyond the specified depth will be ignored. "
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -2055,6 +2067,8 @@
NULL
};
+ scripting_max_stack = sysctl_perf_event_max_stack;
+
setup_scripting();
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 307e8a1..e459b68 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -298,6 +298,14 @@
return -1;
}
}
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
}
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8332149..1793da5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -688,7 +688,7 @@
struct hist_entry *he = iter->he;
struct perf_evsel *evsel = iter->evsel;
- if (sort__has_sym && single)
+ if (perf_hpp_list.sym && single)
perf_top__record_precise_ip(top, he, evsel->idx, al->addr);
hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
@@ -886,7 +886,7 @@
struct perf_evlist *evlist = top->evlist;
struct record_opts *opts = &top->record_opts;
- perf_evlist__config(evlist, opts);
+ perf_evlist__config(evlist, opts, &callchain_param);
evlist__for_each(evlist, counter) {
try_again:
@@ -917,15 +917,15 @@
return -1;
}
-static int perf_top__setup_sample_type(struct perf_top *top __maybe_unused)
+static int callchain_param__setup_sample_type(struct callchain_param *callchain)
{
- if (!sort__has_sym) {
- if (symbol_conf.use_callchain) {
+ if (!perf_hpp_list.sym) {
+ if (callchain->enabled) {
ui__error("Selected -g but \"sym\" not present in --sort/-s.");
return -EINVAL;
}
- } else if (callchain_param.mode != CHAIN_NONE) {
- if (callchain_register_param(&callchain_param) < 0) {
+ } else if (callchain->mode != CHAIN_NONE) {
+ if (callchain_register_param(callchain) < 0) {
ui__error("Can't register callchain params.\n");
return -EINVAL;
}
@@ -952,7 +952,7 @@
goto out_delete;
}
- ret = perf_top__setup_sample_type(top);
+ ret = callchain_param__setup_sample_type(&callchain_param);
if (ret)
goto out_delete;
@@ -962,7 +962,7 @@
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->threads, false, opts->proc_map_timeout);
- if (sort__has_socket) {
+ if (perf_hpp_list.socket) {
ret = perf_env__read_cpu_topology_map(&perf_env);
if (ret < 0)
goto out_err_cpu_topo;
@@ -1045,18 +1045,17 @@
static int
parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
- struct record_opts *record = (struct record_opts *)opt->value;
+ struct callchain_param *callchain = opt->value;
- record->callgraph_set = true;
- callchain_param.enabled = !unset;
- callchain_param.record_mode = CALLCHAIN_FP;
+ callchain->enabled = !unset;
+ callchain->record_mode = CALLCHAIN_FP;
/*
* --no-call-graph
*/
if (unset) {
symbol_conf.use_callchain = false;
- callchain_param.record_mode = CALLCHAIN_NONE;
+ callchain->record_mode = CALLCHAIN_NONE;
return 0;
}
@@ -1104,7 +1103,7 @@
},
.proc_map_timeout = 500,
},
- .max_stack = PERF_MAX_STACK_DEPTH,
+ .max_stack = sysctl_perf_event_max_stack,
.sym_pcnt_filter = 5,
};
struct record_opts *opts = &top.record_opts;
@@ -1162,17 +1161,17 @@
"output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
- OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
+ OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
NULL, "enables call-graph recording and display",
&callchain_opt),
- OPT_CALLBACK(0, "call-graph", &top.record_opts,
+ OPT_CALLBACK(0, "call-graph", &callchain_param,
"record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
top_callchain_help, &parse_callchain_opt),
OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
"Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &top.max_stack,
"Set the maximum stack depth when parsing the callchain. "
- "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
"ignore callees of these functions in call graphs",
report_parse_ignore_callees_opt),
@@ -1256,7 +1255,7 @@
sort__mode = SORT_MODE__TOP;
/* display thread wants entries to be collapsed in a different tree */
- sort__need_collapse = 1;
+ perf_hpp_list.need_collapse = 1;
if (top.use_stdio)
use_browser = 0;
@@ -1312,7 +1311,7 @@
top.sym_evsel = perf_evlist__first(top.evlist);
- if (!symbol_conf.use_callchain) {
+ if (!callchain_param.enabled) {
symbol_conf.cumulate_callchain = false;
perf_hpp__cancel_cumulate();
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 93ac724..6e5c325 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -34,79 +34,76 @@
#include "trace-event.h"
#include "util/parse-events.h"
#include "util/bpf-loader.h"
+#include "callchain.h"
+#include "syscalltbl.h"
+#include "rb_resort.h"
-#include <libaudit.h>
+#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
#include <stdlib.h>
-#include <sys/mman.h>
-#include <linux/futex.h>
#include <linux/err.h>
-
-/* For older distros: */
-#ifndef MAP_STACK
-# define MAP_STACK 0x20000
-#endif
-
-#ifndef MADV_HWPOISON
-# define MADV_HWPOISON 100
-
-#endif
-
-#ifndef MADV_MERGEABLE
-# define MADV_MERGEABLE 12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-# define MADV_UNMERGEABLE 13
-#endif
-
-#ifndef EFD_SEMAPHORE
-# define EFD_SEMAPHORE 1
-#endif
-
-#ifndef EFD_NONBLOCK
-# define EFD_NONBLOCK 00004000
-#endif
-
-#ifndef EFD_CLOEXEC
-# define EFD_CLOEXEC 02000000
-#endif
+#include <linux/filter.h>
+#include <linux/audit.h>
+#include <sys/ptrace.h>
+#include <linux/random.h>
+#include <linux/stringify.h>
#ifndef O_CLOEXEC
# define O_CLOEXEC 02000000
#endif
-#ifndef SOCK_DCCP
-# define SOCK_DCCP 6
-#endif
-
-#ifndef SOCK_CLOEXEC
-# define SOCK_CLOEXEC 02000000
-#endif
-
-#ifndef SOCK_NONBLOCK
-# define SOCK_NONBLOCK 00004000
-#endif
-
-#ifndef MSG_CMSG_CLOEXEC
-# define MSG_CMSG_CLOEXEC 0x40000000
-#endif
-
-#ifndef PERF_FLAG_FD_NO_GROUP
-# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#endif
-
-#ifndef PERF_FLAG_FD_OUTPUT
-# define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#endif
-
-#ifndef PERF_FLAG_PID_CGROUP
-# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#endif
-
-#ifndef PERF_FLAG_FD_CLOEXEC
-# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
-#endif
-
+struct trace {
+ struct perf_tool tool;
+ struct syscalltbl *sctbl;
+ struct {
+ int max;
+ struct syscall *table;
+ struct {
+ struct perf_evsel *sys_enter,
+ *sys_exit;
+ } events;
+ } syscalls;
+ struct record_opts opts;
+ struct perf_evlist *evlist;
+ struct machine *host;
+ struct thread *current;
+ u64 base_time;
+ FILE *output;
+ unsigned long nr_events;
+ struct strlist *ev_qualifier;
+ struct {
+ size_t nr;
+ int *entries;
+ } ev_qualifier_ids;
+ struct intlist *tid_list;
+ struct intlist *pid_list;
+ struct {
+ size_t nr;
+ pid_t *entries;
+ } filter_pids;
+ double duration_filter;
+ double runtime_ms;
+ struct {
+ u64 vfs_getname,
+ proc_getname;
+ } stats;
+ unsigned int max_stack;
+ unsigned int min_stack;
+ bool not_ev_qualifier;
+ bool live;
+ bool full_time;
+ bool sched;
+ bool multiple_threads;
+ bool summary;
+ bool summary_only;
+ bool show_comm;
+ bool show_tool_stats;
+ bool trace_syscalls;
+ bool kernel_syscallchains;
+ bool force;
+ bool vfs_getname;
+ int trace_pgfaults;
+ int open_id;
+};
struct tp_field {
int offset;
@@ -371,221 +368,6 @@
#define SCA_INT syscall_arg__scnprintf_int
-static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, prot = arg->val;
-
- if (prot == PROT_NONE)
- return scnprintf(bf, size, "NONE");
-#define P_MMAP_PROT(n) \
- if (prot & PROT_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- prot &= ~PROT_##n; \
- }
-
- P_MMAP_PROT(EXEC);
- P_MMAP_PROT(READ);
- P_MMAP_PROT(WRITE);
-#ifdef PROT_SEM
- P_MMAP_PROT(SEM);
-#endif
- P_MMAP_PROT(GROWSDOWN);
- P_MMAP_PROT(GROWSUP);
-#undef P_MMAP_PROT
-
- if (prot)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
-
- return printed;
-}
-
-#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
-
-static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, flags = arg->val;
-
-#define P_MMAP_FLAG(n) \
- if (flags & MAP_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~MAP_##n; \
- }
-
- P_MMAP_FLAG(SHARED);
- P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
- P_MMAP_FLAG(32BIT);
-#endif
- P_MMAP_FLAG(ANONYMOUS);
- P_MMAP_FLAG(DENYWRITE);
- P_MMAP_FLAG(EXECUTABLE);
- P_MMAP_FLAG(FILE);
- P_MMAP_FLAG(FIXED);
- P_MMAP_FLAG(GROWSDOWN);
-#ifdef MAP_HUGETLB
- P_MMAP_FLAG(HUGETLB);
-#endif
- P_MMAP_FLAG(LOCKED);
- P_MMAP_FLAG(NONBLOCK);
- P_MMAP_FLAG(NORESERVE);
- P_MMAP_FLAG(POPULATE);
- P_MMAP_FLAG(STACK);
-#ifdef MAP_UNINITIALIZED
- P_MMAP_FLAG(UNINITIALIZED);
-#endif
-#undef P_MMAP_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
-}
-
-#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-
-static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, flags = arg->val;
-
-#define P_MREMAP_FLAG(n) \
- if (flags & MREMAP_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~MREMAP_##n; \
- }
-
- P_MREMAP_FLAG(MAYMOVE);
-#ifdef MREMAP_FIXED
- P_MREMAP_FLAG(FIXED);
-#endif
-#undef P_MREMAP_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
-}
-
-#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
-
-static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int behavior = arg->val;
-
- switch (behavior) {
-#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
- P_MADV_BHV(NORMAL);
- P_MADV_BHV(RANDOM);
- P_MADV_BHV(SEQUENTIAL);
- P_MADV_BHV(WILLNEED);
- P_MADV_BHV(DONTNEED);
- P_MADV_BHV(REMOVE);
- P_MADV_BHV(DONTFORK);
- P_MADV_BHV(DOFORK);
- P_MADV_BHV(HWPOISON);
-#ifdef MADV_SOFT_OFFLINE
- P_MADV_BHV(SOFT_OFFLINE);
-#endif
- P_MADV_BHV(MERGEABLE);
- P_MADV_BHV(UNMERGEABLE);
-#ifdef MADV_HUGEPAGE
- P_MADV_BHV(HUGEPAGE);
-#endif
-#ifdef MADV_NOHUGEPAGE
- P_MADV_BHV(NOHUGEPAGE);
-#endif
-#ifdef MADV_DONTDUMP
- P_MADV_BHV(DONTDUMP);
-#endif
-#ifdef MADV_DODUMP
- P_MADV_BHV(DODUMP);
-#endif
-#undef P_MADV_PHV
- default: break;
- }
-
- return scnprintf(bf, size, "%#x", behavior);
-}
-
-#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
-
-static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, op = arg->val;
-
- if (op == 0)
- return scnprintf(bf, size, "NONE");
-#define P_CMD(cmd) \
- if ((op & LOCK_##cmd) == LOCK_##cmd) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
- op &= ~LOCK_##cmd; \
- }
-
- P_CMD(SH);
- P_CMD(EX);
- P_CMD(NB);
- P_CMD(UN);
- P_CMD(MAND);
- P_CMD(RW);
- P_CMD(READ);
- P_CMD(WRITE);
-#undef P_OP
-
- if (op)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
-
- return printed;
-}
-
-#define SCA_FLOCK syscall_arg__scnprintf_flock
-
-static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
-{
- enum syscall_futex_args {
- SCF_UADDR = (1 << 0),
- SCF_OP = (1 << 1),
- SCF_VAL = (1 << 2),
- SCF_TIMEOUT = (1 << 3),
- SCF_UADDR2 = (1 << 4),
- SCF_VAL3 = (1 << 5),
- };
- int op = arg->val;
- int cmd = op & FUTEX_CMD_MASK;
- size_t printed = 0;
-
- switch (cmd) {
-#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
- P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
- P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
- P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
- P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
- P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
- P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
- P_FUTEX_OP(WAKE_OP); break;
- P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
- P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
- P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
- P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
- P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
- P_FUTEX_OP(WAIT_REQUEUE_PI); break;
- default: printed = scnprintf(bf, size, "%#x", cmd); break;
- }
-
- if (op & FUTEX_PRIVATE_FLAG)
- printed += scnprintf(bf + printed, size - printed, "|PRIV");
-
- if (op & FUTEX_CLOCK_REALTIME)
- printed += scnprintf(bf + printed, size - printed, "|CLKRT");
-
- return printed;
-}
-
-#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
-
static const char *bpf_cmd[] = {
"MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
"MAP_GET_NEXT_KEY", "PROG_LOAD",
@@ -652,110 +434,6 @@
};
static DEFINE_STRARRAY(socket_families);
-#ifndef SOCK_TYPE_MASK
-#define SOCK_TYPE_MASK 0xf
-#endif
-
-static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- size_t printed;
- int type = arg->val,
- flags = type & ~SOCK_TYPE_MASK;
-
- type &= SOCK_TYPE_MASK;
- /*
- * Can't use a strarray, MIPS may override for ABI reasons.
- */
- switch (type) {
-#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
- P_SK_TYPE(STREAM);
- P_SK_TYPE(DGRAM);
- P_SK_TYPE(RAW);
- P_SK_TYPE(RDM);
- P_SK_TYPE(SEQPACKET);
- P_SK_TYPE(DCCP);
- P_SK_TYPE(PACKET);
-#undef P_SK_TYPE
- default:
- printed = scnprintf(bf, size, "%#x", type);
- }
-
-#define P_SK_FLAG(n) \
- if (flags & SOCK_##n) { \
- printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
- flags &= ~SOCK_##n; \
- }
-
- P_SK_FLAG(CLOEXEC);
- P_SK_FLAG(NONBLOCK);
-#undef P_SK_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
-
- return printed;
-}
-
-#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
-
-#ifndef MSG_PROBE
-#define MSG_PROBE 0x10
-#endif
-#ifndef MSG_WAITFORONE
-#define MSG_WAITFORONE 0x10000
-#endif
-#ifndef MSG_SENDPAGE_NOTLAST
-#define MSG_SENDPAGE_NOTLAST 0x20000
-#endif
-#ifndef MSG_FASTOPEN
-#define MSG_FASTOPEN 0x20000000
-#endif
-
-static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, flags = arg->val;
-
- if (flags == 0)
- return scnprintf(bf, size, "NONE");
-#define P_MSG_FLAG(n) \
- if (flags & MSG_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~MSG_##n; \
- }
-
- P_MSG_FLAG(OOB);
- P_MSG_FLAG(PEEK);
- P_MSG_FLAG(DONTROUTE);
- P_MSG_FLAG(TRYHARD);
- P_MSG_FLAG(CTRUNC);
- P_MSG_FLAG(PROBE);
- P_MSG_FLAG(TRUNC);
- P_MSG_FLAG(DONTWAIT);
- P_MSG_FLAG(EOR);
- P_MSG_FLAG(WAITALL);
- P_MSG_FLAG(FIN);
- P_MSG_FLAG(SYN);
- P_MSG_FLAG(CONFIRM);
- P_MSG_FLAG(RST);
- P_MSG_FLAG(ERRQUEUE);
- P_MSG_FLAG(NOSIGNAL);
- P_MSG_FLAG(MORE);
- P_MSG_FLAG(WAITFORONE);
- P_MSG_FLAG(SENDPAGE_NOTLAST);
- P_MSG_FLAG(FASTOPEN);
- P_MSG_FLAG(CMSG_CLOEXEC);
-#undef P_MSG_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
-}
-
-#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
-
static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -788,116 +466,6 @@
#define SCA_FILENAME syscall_arg__scnprintf_filename
-static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, flags = arg->val;
-
- if (!(flags & O_CREAT))
- arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
-
- if (flags == 0)
- return scnprintf(bf, size, "RDONLY");
-#define P_FLAG(n) \
- if (flags & O_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~O_##n; \
- }
-
- P_FLAG(APPEND);
- P_FLAG(ASYNC);
- P_FLAG(CLOEXEC);
- P_FLAG(CREAT);
- P_FLAG(DIRECT);
- P_FLAG(DIRECTORY);
- P_FLAG(EXCL);
- P_FLAG(LARGEFILE);
- P_FLAG(NOATIME);
- P_FLAG(NOCTTY);
-#ifdef O_NONBLOCK
- P_FLAG(NONBLOCK);
-#elif O_NDELAY
- P_FLAG(NDELAY);
-#endif
-#ifdef O_PATH
- P_FLAG(PATH);
-#endif
- P_FLAG(RDWR);
-#ifdef O_DSYNC
- if ((flags & O_SYNC) == O_SYNC)
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
- else {
- P_FLAG(DSYNC);
- }
-#else
- P_FLAG(SYNC);
-#endif
- P_FLAG(TRUNC);
- P_FLAG(WRONLY);
-#undef P_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
-}
-
-#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
-
-static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, flags = arg->val;
-
- if (flags == 0)
- return 0;
-
-#define P_FLAG(n) \
- if (flags & PERF_FLAG_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~PERF_FLAG_##n; \
- }
-
- P_FLAG(FD_NO_GROUP);
- P_FLAG(FD_OUTPUT);
- P_FLAG(PID_CGROUP);
- P_FLAG(FD_CLOEXEC);
-#undef P_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
-}
-
-#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
-
-static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
- struct syscall_arg *arg)
-{
- int printed = 0, flags = arg->val;
-
- if (flags == 0)
- return scnprintf(bf, size, "NONE");
-#define P_FLAG(n) \
- if (flags & EFD_##n) { \
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
- flags &= ~EFD_##n; \
- }
-
- P_FLAG(SEMAPHORE);
- P_FLAG(CLOEXEC);
- P_FLAG(NONBLOCK);
-#undef P_FLAG
-
- if (flags)
- printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
- return printed;
-}
-
-#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
-
static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -921,59 +489,6 @@
#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
-static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
-{
- int sig = arg->val;
-
- switch (sig) {
-#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
- P_SIGNUM(HUP);
- P_SIGNUM(INT);
- P_SIGNUM(QUIT);
- P_SIGNUM(ILL);
- P_SIGNUM(TRAP);
- P_SIGNUM(ABRT);
- P_SIGNUM(BUS);
- P_SIGNUM(FPE);
- P_SIGNUM(KILL);
- P_SIGNUM(USR1);
- P_SIGNUM(SEGV);
- P_SIGNUM(USR2);
- P_SIGNUM(PIPE);
- P_SIGNUM(ALRM);
- P_SIGNUM(TERM);
- P_SIGNUM(CHLD);
- P_SIGNUM(CONT);
- P_SIGNUM(STOP);
- P_SIGNUM(TSTP);
- P_SIGNUM(TTIN);
- P_SIGNUM(TTOU);
- P_SIGNUM(URG);
- P_SIGNUM(XCPU);
- P_SIGNUM(XFSZ);
- P_SIGNUM(VTALRM);
- P_SIGNUM(PROF);
- P_SIGNUM(WINCH);
- P_SIGNUM(IO);
- P_SIGNUM(PWR);
- P_SIGNUM(SYS);
-#ifdef SIGEMT
- P_SIGNUM(EMT);
-#endif
-#ifdef SIGSTKFLT
- P_SIGNUM(STKFLT);
-#endif
-#ifdef SIGSWI
- P_SIGNUM(SWI);
-#endif
- default: break;
- }
-
- return scnprintf(bf, size, "%#x", sig);
-}
-
-#define SCA_SIGNUM syscall_arg__scnprintf_signum
-
#if defined(__i386__) || defined(__x86_64__)
/*
* FIXME: Make this available to all arches.
@@ -1001,16 +516,62 @@
static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
#endif /* defined(__i386__) || defined(__x86_64__) */
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK 0x0001
+#endif
+#ifndef GRND_RANDOM
+#define GRND_RANDOM 0x0002
+#endif
+
+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_FLAG(n) \
+ if (flags & GRND_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~GRND_##n; \
+ }
+
+ P_FLAG(RANDOM);
+ P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+
#define STRARRAY(arg, name, array) \
.arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
.arg_parm = { [arg] = &strarray__##array, }
+#include "trace/beauty/eventfd.c"
+#include "trace/beauty/flock.c"
+#include "trace/beauty/futex_op.c"
+#include "trace/beauty/mmap.c"
+#include "trace/beauty/mode_t.c"
+#include "trace/beauty/msg_flags.c"
+#include "trace/beauty/open_flags.c"
+#include "trace/beauty/perf_event_open.c"
+#include "trace/beauty/pid.c"
+#include "trace/beauty/sched_policy.c"
+#include "trace/beauty/seccomp.c"
+#include "trace/beauty/signum.c"
+#include "trace/beauty/socket_type.c"
+#include "trace/beauty/waitid_options.c"
+
static struct syscall_fmt {
const char *name;
const char *alias;
size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
void *arg_parm[6];
bool errmsg;
+ bool errpid;
bool timeout;
bool hexret;
} syscall_fmts[] = {
@@ -1028,6 +589,7 @@
{ .name = "chroot", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
{ .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
+ { .name = "clone", .errpid = true, },
{ .name = "close", .errmsg = true,
.arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
{ .name = "connect", .errmsg = true, },
@@ -1093,6 +655,11 @@
{ .name = "getdents64", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
+ { .name = "getpid", .errpid = true, },
+ { .name = "getpgid", .errpid = true, },
+ { .name = "getppid", .errpid = true, },
+ { .name = "getrandom", .errmsg = true,
+ .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
{ .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
{ .name = "getxattr", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
@@ -1186,8 +753,7 @@
[1] = SCA_FILENAME, /* filename */
[2] = SCA_OPEN_FLAGS, /* flags */ }, },
{ .name = "perf_event_open", .errmsg = true,
- .arg_scnprintf = { [1] = SCA_INT, /* pid */
- [2] = SCA_INT, /* cpu */
+ .arg_scnprintf = { [2] = SCA_INT, /* cpu */
[3] = SCA_FD, /* group_fd */
[4] = SCA_PERF_FLAGS, /* flags */ }, },
{ .name = "pipe2", .errmsg = true,
@@ -1234,6 +800,11 @@
.arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
{ .name = "rt_tgsigqueueinfo", .errmsg = true,
.arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+ { .name = "sched_setscheduler", .errmsg = true,
+ .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
+ { .name = "seccomp", .errmsg = true,
+ .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
+ [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
{ .name = "select", .errmsg = true, .timeout = true, },
{ .name = "sendmmsg", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */
@@ -1244,7 +815,9 @@
{ .name = "sendto", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */
[3] = SCA_MSG_FLAGS, /* flags */ }, },
+ { .name = "set_tid_address", .errpid = true, },
{ .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
+ { .name = "setpgid", .errmsg = true, },
{ .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
{ .name = "setxattr", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
@@ -1287,6 +860,10 @@
.arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
{ .name = "vmsplice", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
+ { .name = "wait4", .errpid = true,
+ .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
+ { .name = "waitid", .errpid = true,
+ .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
{ .name = "write", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
{ .name = "writev", .errmsg = true,
@@ -1398,59 +975,6 @@
static const size_t trace__entry_str_size = 2048;
-struct trace {
- struct perf_tool tool;
- struct {
- int machine;
- int open_id;
- } audit;
- struct {
- int max;
- struct syscall *table;
- struct {
- struct perf_evsel *sys_enter,
- *sys_exit;
- } events;
- } syscalls;
- struct record_opts opts;
- struct perf_evlist *evlist;
- struct machine *host;
- struct thread *current;
- u64 base_time;
- FILE *output;
- unsigned long nr_events;
- struct strlist *ev_qualifier;
- struct {
- size_t nr;
- int *entries;
- } ev_qualifier_ids;
- struct intlist *tid_list;
- struct intlist *pid_list;
- struct {
- size_t nr;
- pid_t *entries;
- } filter_pids;
- double duration_filter;
- double runtime_ms;
- struct {
- u64 vfs_getname,
- proc_getname;
- } stats;
- bool not_ev_qualifier;
- bool live;
- bool full_time;
- bool sched;
- bool multiple_threads;
- bool summary;
- bool summary_only;
- bool show_comm;
- bool show_tool_stats;
- bool trace_syscalls;
- bool force;
- bool vfs_getname;
- int trace_pgfaults;
-};
-
static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
{
struct thread_trace *ttrace = thread__priv(thread);
@@ -1618,6 +1142,7 @@
color_fprintf(trace->output, PERF_COLOR_RED,
"LOST %" PRIu64 " events!\n", event->lost.lost);
ret = machine__process_lost_event(machine, event, sample);
+ break;
default:
ret = machine__process_event(machine, event, sample);
break;
@@ -1675,6 +1200,10 @@
sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
else if (field->flags & FIELD_IS_POINTER)
sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
+ else if (strcmp(field->type, "pid_t") == 0)
+ sc->arg_scnprintf[idx] = SCA_PID;
+ else if (strcmp(field->type, "umode_t") == 0)
+ sc->arg_scnprintf[idx] = SCA_MODE_T;
++idx;
}
@@ -1685,7 +1214,7 @@
{
char tp_name[128];
struct syscall *sc;
- const char *name = audit_syscall_to_name(id, trace->audit.machine);
+ const char *name = syscalltbl__name(trace->sctbl, id);
if (name == NULL)
return -1;
@@ -1760,7 +1289,7 @@
strlist__for_each(pos, trace->ev_qualifier) {
const char *sc = pos->s;
- int id = audit_name_to_syscall(sc, trace->audit.machine);
+ int id = syscalltbl__id(trace->sctbl, sc);
if (id < 0) {
if (err == 0) {
@@ -1846,7 +1375,12 @@
"%ld", val);
}
}
- } else {
+ } else if (IS_ERR(sc->tp_format)) {
+ /*
+ * If we managed to read the tracepoint /format file, then we
+ * may end up not having any args, like with gettid(), so only
+ * print the raw args when we didn't manage to read it.
+ */
int i = 0;
while (i < 6) {
@@ -1987,7 +1521,7 @@
goto out_put;
}
- if (!trace->summary_only)
+ if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
trace__printf_interrupted_entry(trace, sample);
ttrace->entry_time = sample->time;
@@ -1998,7 +1532,7 @@
args, trace, thread);
if (sc->is_exit) {
- if (!trace->duration_filter && !trace->summary_only) {
+ if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
fprintf(trace->output, "%-70s\n", ttrace->entry_str);
}
@@ -2018,6 +1552,29 @@
return err;
}
+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct callchain_cursor *cursor)
+{
+ struct addr_location al;
+
+ if (machine__resolve(trace->host, &al, sample) < 0 ||
+ thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
+ return -1;
+
+ return 0;
+}
+
+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
+{
+ /* TODO: user-configurable print_opts */
+ const unsigned int print_opts = EVSEL__PRINT_SYM |
+ EVSEL__PRINT_DSO |
+ EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+ return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+}
+
static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
@@ -2025,7 +1582,7 @@
long ret;
u64 duration = 0;
struct thread *thread;
- int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
struct syscall *sc = trace__syscall_info(trace, evsel, id);
struct thread_trace *ttrace;
@@ -2042,7 +1599,7 @@
ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
- if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
+ if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
trace__set_fd_pathname(thread, ret, ttrace->filename.name);
ttrace->filename.pending_open = false;
++trace->stats.vfs_getname;
@@ -2057,6 +1614,15 @@
} else if (trace->duration_filter)
goto out;
+ if (sample->callchain) {
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out;
+ callchain_ret = 1;
+ }
+ }
+
if (trace->summary_only)
goto out;
@@ -2073,7 +1639,7 @@
if (sc->fmt == NULL) {
signed_print:
fprintf(trace->output, ") = %ld", ret);
- } else if (ret < 0 && sc->fmt->errmsg) {
+ } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
char bf[STRERR_BUFSIZE];
const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
*e = audit_errno_to_name(-ret);
@@ -2083,10 +1649,24 @@
fprintf(trace->output, ") = 0 Timeout");
else if (sc->fmt->hexret)
fprintf(trace->output, ") = %#lx", ret);
- else
+ else if (sc->fmt->errpid) {
+ struct thread *child = machine__find_thread(trace->host, ret, ret);
+
+ if (child != NULL) {
+ fprintf(trace->output, ") = %ld", ret);
+ if (child->comm_set)
+ fprintf(trace->output, " (%s)", thread__comm_str(child));
+ thread__put(child);
+ }
+ } else
goto signed_print;
fputc('\n', trace->output);
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
out:
ttrace->entry_pending = false;
err = 0;
@@ -2217,6 +1797,17 @@
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
+ int callchain_ret = 0;
+
+ if (sample->callchain) {
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out;
+ callchain_ret = 1;
+ }
+ }
+
trace__printf_interrupted_entry(trace, sample);
trace__fprintf_tstamp(trace, sample->time, trace->output);
@@ -2234,6 +1825,12 @@
}
fprintf(trace->output, ")\n");
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
return 0;
}
@@ -2264,8 +1861,19 @@
char map_type = 'd';
struct thread_trace *ttrace;
int err = -1;
+ int callchain_ret = 0;
thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+ if (sample->callchain) {
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out_put;
+ callchain_ret = 1;
+ }
+ }
+
ttrace = thread__trace(thread, trace->output);
if (ttrace == NULL)
goto out_put;
@@ -2307,6 +1915,11 @@
print_location(trace->output, sample, &al, true, false);
fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
out:
err = 0;
out_put:
@@ -2326,6 +1939,23 @@
return false;
}
+static void trace__set_base_time(struct trace *trace,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ /*
+ * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
+ * and don't use sample->time unconditionally, we may end up having
+ * some other event in the future without PERF_SAMPLE_TIME for good
+ * reason, i.e. we may not be interested in its timestamps, just in
+ * it taking place, picking some piece of information when it
+ * appears in our event stream (vfs_getname comes to mind).
+ */
+ if (trace->base_time == 0 && !trace->full_time &&
+ (evsel->attr.sample_type & PERF_SAMPLE_TIME))
+ trace->base_time = sample->time;
+}
+
static int trace__process_sample(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -2340,8 +1970,7 @@
if (skip_sample(trace, sample))
return 0;
- if (!trace->full_time && trace->base_time == 0)
- trace->base_time = sample->time;
+ trace__set_base_time(trace, evsel, sample);
if (handler) {
++trace->nr_events;
@@ -2450,8 +2079,7 @@
return true;
}
-static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
- u64 config)
+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
{
struct perf_evsel *evsel;
struct perf_event_attr attr = {
@@ -2465,13 +2093,10 @@
event_attr_init(&attr);
evsel = perf_evsel__new(&attr);
- if (!evsel)
- return -ENOMEM;
+ if (evsel)
+ evsel->handler = trace__pgfault;
- evsel->handler = trace__pgfault;
- perf_evlist__add(evlist, evsel);
-
- return 0;
+ return evsel;
}
static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
@@ -2479,9 +2104,6 @@
const u32 type = event->header.type;
struct perf_evsel *evsel;
- if (!trace->full_time && trace->base_time == 0)
- trace->base_time = sample->time;
-
if (type != PERF_RECORD_SAMPLE) {
trace__process_event(trace, trace->host, event, sample);
return;
@@ -2493,6 +2115,8 @@
return;
}
+ trace__set_base_time(trace, evsel, sample);
+
if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
sample->raw_data == NULL) {
fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
@@ -2527,6 +2151,15 @@
perf_evlist__add(evlist, sys_enter);
perf_evlist__add(evlist, sys_exit);
+ if (callchain_param.enabled && !trace->kernel_syscallchains) {
+ /*
+ * We're interested only in the user space callchain
+ * leading to the syscall, allow overriding that for
+ * debugging reasons using --kernel_syscall_callchains
+ */
+ sys_exit->attr.exclude_callchain_kernel = 1;
+ }
+
trace->syscalls.events.sys_enter = sys_enter;
trace->syscalls.events.sys_exit = sys_exit;
@@ -2565,7 +2198,7 @@
static int trace__run(struct trace *trace, int argc, const char **argv)
{
struct perf_evlist *evlist = trace->evlist;
- struct perf_evsel *evsel;
+ struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
int err = -1, i;
unsigned long before;
const bool forks = argc > 0;
@@ -2579,14 +2212,19 @@
if (trace->trace_syscalls)
trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
- if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
- perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
- goto out_error_mem;
+ if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
+ pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+ if (pgfault_maj == NULL)
+ goto out_error_mem;
+ perf_evlist__add(evlist, pgfault_maj);
}
- if ((trace->trace_pgfaults & TRACE_PFMIN) &&
- perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
- goto out_error_mem;
+ if ((trace->trace_pgfaults & TRACE_PFMIN)) {
+ pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+ if (pgfault_min == NULL)
+ goto out_error_mem;
+ perf_evlist__add(evlist, pgfault_min);
+ }
if (trace->sched &&
perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
@@ -2605,7 +2243,45 @@
goto out_delete_evlist;
}
- perf_evlist__config(evlist, &trace->opts);
+ perf_evlist__config(evlist, &trace->opts, NULL);
+
+ if (callchain_param.enabled) {
+ bool use_identifier = false;
+
+ if (trace->syscalls.events.sys_exit) {
+ perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
+ &trace->opts, &callchain_param);
+ use_identifier = true;
+ }
+
+ if (pgfault_maj) {
+ perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+ use_identifier = true;
+ }
+
+ if (pgfault_min) {
+ perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+ use_identifier = true;
+ }
+
+ if (use_identifier) {
+ /*
+ * Now we have evsels with different sample_ids, use
+ * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
+ * from a fixed position in each ring buffer record.
+ *
+ * As of this the changeset introducing this comment, this
+ * isn't strictly needed, as the fields that can come before
+ * PERF_SAMPLE_ID are all used, but we'll probably disable
+ * some of those for things like copying the payload of
+ * pointer syscall arguments, and for vfs_getname we don't
+ * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
+ * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
+ */
+ perf_evlist__set_sample_bit(evlist, IDENTIFIER);
+ perf_evlist__reset_sample_bit(evlist, ID);
+ }
+ }
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
@@ -2883,15 +2559,29 @@
return printed;
}
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+ struct stats *stats;
+ double msecs;
+ int syscall;
+)
+{
+ struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+ struct stats *stats = source->priv;
+
+ entry->syscall = source->i;
+ entry->stats = stats;
+ entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
static size_t thread__dump_stats(struct thread_trace *ttrace,
struct trace *trace, FILE *fp)
{
- struct stats *stats;
size_t printed = 0;
struct syscall *sc;
- struct int_node *inode = intlist__first(ttrace->syscall_stats);
+ struct rb_node *nd;
+ DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
- if (inode == NULL)
+ if (syscall_stats == NULL)
return 0;
printed += fprintf(fp, "\n");
@@ -2900,9 +2590,8 @@
printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
- /* each int_node is a syscall */
- while (inode) {
- stats = inode->priv;
+ resort_rb__for_each(nd, syscall_stats) {
+ struct stats *stats = syscall_stats_entry->stats;
if (stats) {
double min = (double)(stats->min) / NSEC_PER_MSEC;
double max = (double)(stats->max) / NSEC_PER_MSEC;
@@ -2913,34 +2602,23 @@
pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
avg /= NSEC_PER_MSEC;
- sc = &trace->syscalls.table[inode->i];
+ sc = &trace->syscalls.table[syscall_stats_entry->syscall];
printed += fprintf(fp, " %-15s", sc->name);
printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
- n, avg * n, min, avg);
+ n, syscall_stats_entry->msecs, min, avg);
printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
}
-
- inode = intlist__next(inode);
}
+ resort_rb__delete(syscall_stats);
printed += fprintf(fp, "\n\n");
return printed;
}
-/* struct used to pass data to per-thread function */
-struct summary_data {
- FILE *fp;
- struct trace *trace;
- size_t printed;
-};
-
-static int trace__fprintf_one_thread(struct thread *thread, void *priv)
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
{
- struct summary_data *data = priv;
- FILE *fp = data->fp;
- size_t printed = data->printed;
- struct trace *trace = data->trace;
+ size_t printed = 0;
struct thread_trace *ttrace = thread__priv(thread);
double ratio;
@@ -2956,25 +2634,45 @@
printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
if (ttrace->pfmin)
printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
- printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+ if (trace->sched)
+ printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+ else if (fputc('\n', fp) != EOF)
+ ++printed;
+
printed += thread__dump_stats(ttrace, trace, fp);
- data->printed += printed;
+ return printed;
+}
- return 0;
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+ return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
}
static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
- struct summary_data data = {
- .fp = fp,
- .trace = trace
- };
- data.printed = trace__fprintf_threads_header(fp);
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
+ size_t printed = trace__fprintf_threads_header(fp);
+ struct rb_node *nd;
- machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
+ if (threads == NULL) {
+ fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+ return 0;
+ }
- return data.printed;
+ resort_rb__for_each(nd, threads)
+ printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+
+ resort_rb__delete(threads);
+
+ return printed;
}
static int trace__set_duration(const struct option *opt, const char *str,
@@ -3070,10 +2768,6 @@
NULL
};
struct trace trace = {
- .audit = {
- .machine = audit_detect_machine(),
- .open_id = audit_name_to_syscall("open", trace.audit.machine),
- },
.syscalls = {
. max = -1,
},
@@ -3091,6 +2785,8 @@
.output = stderr,
.show_comm = true,
.trace_syscalls = true,
+ .kernel_syscallchains = false,
+ .max_stack = UINT_MAX,
};
const char *output_name = NULL;
const char *ev_qualifier_str = NULL;
@@ -3136,10 +2832,24 @@
"Trace pagefaults", parse_pagefaults, "maj"),
OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+ OPT_CALLBACK(0, "call-graph", &trace.opts,
+ "record_mode[,record_size]", record_callchain_help,
+ &record_parse_callchain_opt),
+ OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+ "Show the kernel callchains on the syscall exit path"),
+ OPT_UINTEGER(0, "min-stack", &trace.min_stack,
+ "Set the minimum stack depth when parsing the callchain, "
+ "anything below the specified depth will be ignored."),
+ OPT_UINTEGER(0, "max-stack", &trace.max_stack,
+ "Set the maximum stack depth when parsing the callchain, "
+ "anything beyond the specified depth will be ignored. "
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
"per thread proc mmap processing timeout in ms"),
OPT_END()
};
+ bool __maybe_unused max_stack_user_set = true;
+ bool mmap_pages_user_set = true;
const char * const trace_subcommands[] = { "record", NULL };
int err;
char bf[BUFSIZ];
@@ -3148,8 +2858,9 @@
signal(SIGFPE, sighandler_dump_stack);
trace.evlist = perf_evlist__new();
+ trace.sctbl = syscalltbl__new();
- if (trace.evlist == NULL) {
+ if (trace.evlist == NULL || trace.sctbl == NULL) {
pr_err("Not enough memory to run!\n");
err = -ENOMEM;
goto out;
@@ -3158,11 +2869,40 @@
argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ err = bpf__setup_stdout(trace.evlist);
+ if (err) {
+ bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+ pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+ goto out;
+ }
+
+ err = -1;
+
if (trace.trace_pgfaults) {
trace.opts.sample_address = true;
trace.opts.sample_time = true;
}
+ if (trace.opts.mmap_pages == UINT_MAX)
+ mmap_pages_user_set = false;
+
+ if (trace.max_stack == UINT_MAX) {
+ trace.max_stack = sysctl_perf_event_max_stack;
+ max_stack_user_set = false;
+ }
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
+ record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+#endif
+
+ if (callchain_param.enabled) {
+ if (!mmap_pages_user_set && geteuid() == 0)
+ trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
+
+ symbol_conf.use_callchain = true;
+ }
+
if (trace.evlist->nr_entries > 0)
evlist__set_evsel_handler(trace.evlist, trace__event_handler);
@@ -3179,6 +2919,11 @@
return -1;
}
+ if (!trace.trace_syscalls && ev_qualifier_str) {
+ pr_err("The -e option can't be used with --no-syscalls.\n");
+ goto out;
+ }
+
if (output_name != NULL) {
err = trace__open_output(&trace, output_name);
if (err < 0) {
@@ -3187,6 +2932,8 @@
}
}
+ trace.open_id = syscalltbl__id(trace.sctbl, "open");
+
if (ev_qualifier_str != NULL) {
const char *s = ev_qualifier_str;
struct strlist_config slist_config = {
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 6f8f643..1e46277 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -27,7 +27,7 @@
ifeq ($(ARCH),x86)
$(call detected,CONFIG_X86)
ifeq (${IS_64_BIT}, 1)
- CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
+ CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
$(call detected,CONFIG_X86_64)
@@ -295,9 +295,6 @@
CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
endif
- # include ARCH specific config
- -include $(src-perf)/arch/$(ARCH)/Makefile
-
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 6461e02..3573f31 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -92,6 +92,22 @@
return ret;
}
+static int use_arch_timestamp;
+
+static inline uint64_t
+get_arch_timestamp(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((uint64_t)high) << 32;
+#else
+ return 0;
+#endif
+}
+
#define NSEC_PER_SEC 1000000000
static int perf_clk_id = CLOCK_MONOTONIC;
@@ -107,6 +123,9 @@
struct timespec ts;
int ret;
+ if (use_arch_timestamp)
+ return get_arch_timestamp();
+
ret = clock_gettime(perf_clk_id, &ts);
if (ret)
return 0;
@@ -203,6 +222,17 @@
munmap(marker_addr, pgsz);
}
+static void
+init_arch_timestamp(void)
+{
+ char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
+
+ if (!str || !*str || !strcmp(str, "0"))
+ return;
+
+ use_arch_timestamp = 1;
+}
+
void *jvmti_open(void)
{
int pad_cnt;
@@ -211,11 +241,17 @@
int fd;
FILE *fp;
+ init_arch_timestamp();
+
/*
* check if clockid is supported
*/
- if (!perf_get_timestamp())
- warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+ if (!perf_get_timestamp()) {
+ if (use_arch_timestamp)
+ warnx("jvmti: arch timestamp not supported");
+ else
+ warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+ }
memset(&header, 0, sizeof(header));
@@ -263,6 +299,9 @@
header.timestamp = perf_get_timestamp();
+ if (use_arch_timestamp)
+ header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
if (!fwrite(&header, sizeof(header), 1, fp)) {
warn("jvmti: cannot write dumpfile header");
goto error;
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index aaee0a7..7970008 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -17,6 +17,7 @@
#include <subcmd/parse-options.h>
#include "util/bpf-loader.h"
#include "util/debug.h"
+#include <api/fs/fs.h>
#include <api/fs/tracing_path.h>
#include <pthread.h>
#include <stdlib.h>
@@ -308,9 +309,11 @@
if (*argcp > 1) {
struct strbuf buf;
- strbuf_init(&buf, PATH_MAX);
- strbuf_addstr(&buf, alias_string);
- sq_quote_argv(&buf, (*argv) + 1, PATH_MAX);
+ if (strbuf_init(&buf, PATH_MAX) < 0 ||
+ strbuf_addstr(&buf, alias_string) < 0 ||
+ sq_quote_argv(&buf, (*argv) + 1,
+ PATH_MAX) < 0)
+ die("Failed to allocate memory.");
free(alias_string);
alias_string = buf.buf;
}
@@ -533,6 +536,7 @@
{
const char *cmd;
char sbuf[STRERR_BUFSIZE];
+ int value;
/* libsubcmd init */
exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -542,6 +546,9 @@
page_size = sysconf(_SC_PAGE_SIZE);
cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+ if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+ sysctl_perf_event_max_stack = value;
+
cmd = extract_argv0_path(argv[0]);
if (!cmd)
cmd = "perf-help";
@@ -549,6 +556,7 @@
srandom(time(NULL));
perf_config(perf_default_config, NULL);
+ set_buildid_dir(NULL);
/* get debugfs/tracefs mount point from /proc/mounts */
tracing_path_mount();
@@ -572,7 +580,6 @@
}
if (!prefixcmp(cmd, "trace")) {
#ifdef HAVE_LIBAUDIT_SUPPORT
- set_buildid_dir(NULL);
setup_path();
argv[0] = "trace";
return cmd_trace(argc, argv, NULL);
@@ -587,7 +594,6 @@
argc--;
handle_options(&argv, &argc, NULL);
commit_pager_choice();
- set_buildid_dir(NULL);
if (argc > 0) {
if (!prefixcmp(argv[0], "--"))
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 5381a01..cd8f1b1 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,7 +52,6 @@
bool sample_weight;
bool sample_time;
bool sample_time_set;
- bool callgraph_set;
bool period;
bool running_time;
bool full_auxtrace;
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 1b02cdc..7656ff8 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -34,10 +34,9 @@
#
# ubuntu:
#
-# $ sudo apt-get install postgresql
+# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
# $ sudo su - postgres
-# $ createuser <your user id here>
-# Shall the new role be a superuser? (y/n) y
+# $ createuser -s <your user id here>
#
# An example of using this script with Intel PT:
#
@@ -224,11 +223,14 @@
perf_db_export_mode = True
perf_db_export_calls = False
+perf_db_export_callchains = False
+
def usage():
- print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>]"
+ print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
print >> sys.stderr, "where: columns 'all' or 'branches'"
- print >> sys.stderr, " calls 'calls' => create calls table"
+ print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
+ print >> sys.stderr, " callchains 'callchains' => create call_paths table"
raise Exception("Too few arguments")
if (len(sys.argv) < 2):
@@ -246,9 +248,11 @@
branches = (columns == "branches")
-if (len(sys.argv) >= 4):
- if (sys.argv[3] == "calls"):
+for i in range(3,len(sys.argv)):
+ if (sys.argv[i] == "calls"):
perf_db_export_calls = True
+ elif (sys.argv[i] == "callchains"):
+ perf_db_export_callchains = True
else:
usage()
@@ -359,14 +363,16 @@
'transaction bigint,'
'data_src bigint,'
'branch_type integer,'
- 'in_tx boolean)')
+ 'in_tx boolean,'
+ 'call_path_id bigint)')
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'CREATE TABLE call_paths ('
'id bigint NOT NULL,'
'parent_id bigint,'
'symbol_id bigint,'
'ip bigint)')
+if perf_db_export_calls:
do_query(query, 'CREATE TABLE calls ('
'id bigint NOT NULL,'
'thread_id bigint,'
@@ -428,7 +434,7 @@
'(SELECT tid FROM threads WHERE id = thread_id) AS tid'
' FROM comm_threads')
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'CREATE VIEW call_paths_view AS '
'SELECT '
'c.id,'
@@ -444,6 +450,7 @@
'(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
'(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
do_query(query, 'CREATE VIEW calls_view AS '
'SELECT '
'calls.id,'
@@ -541,8 +548,9 @@
symbol_file = open_output_file("symbol_table.bin")
branch_type_file = open_output_file("branch_type_table.bin")
sample_file = open_output_file("sample_table.bin")
-if perf_db_export_calls:
+if perf_db_export_calls or perf_db_export_callchains:
call_path_file = open_output_file("call_path_table.bin")
+if perf_db_export_calls:
call_file = open_output_file("call_table.bin")
def trace_begin():
@@ -554,8 +562,8 @@
comm_table(0, "unknown")
dso_table(0, 0, "unknown", "unknown", "")
symbol_table(0, 0, 0, 0, 0, "unknown")
- sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
- if perf_db_export_calls:
+ sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ if perf_db_export_calls or perf_db_export_callchains:
call_path_table(0, 0, 0, 0)
unhandled_count = 0
@@ -571,8 +579,9 @@
copy_output_file(symbol_file, "symbols")
copy_output_file(branch_type_file, "branch_types")
copy_output_file(sample_file, "samples")
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
copy_output_file(call_path_file, "call_paths")
+ if perf_db_export_calls:
copy_output_file(call_file, "calls")
print datetime.datetime.today(), "Removing intermediate files..."
@@ -585,8 +594,9 @@
remove_output_file(symbol_file)
remove_output_file(branch_type_file)
remove_output_file(sample_file)
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
remove_output_file(call_path_file)
+ if perf_db_export_calls:
remove_output_file(call_file)
os.rmdir(output_dir_name)
print datetime.datetime.today(), "Adding primary keys"
@@ -599,8 +609,9 @@
do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
+ if perf_db_export_calls:
do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
print datetime.datetime.today(), "Adding foreign keys"
@@ -623,10 +634,11 @@
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
- if perf_db_export_calls:
+ if perf_db_export_calls or perf_db_export_callchains:
do_query(query, 'ALTER TABLE call_paths '
'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
+ if perf_db_export_calls:
do_query(query, 'ALTER TABLE calls '
'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
@@ -694,11 +706,11 @@
value = struct.pack(fmt, 2, 4, branch_type, n, name)
branch_type_file.write(value)
-def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, *x):
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
if branches:
- value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiB", 17, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx)
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
else:
- value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiB", 21, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx)
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
sample_file.write(value)
def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 1ba628e..66a2898 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -37,6 +37,8 @@
perf-y += cpumap.o
perf-y += stat.o
perf-y += event_update.o
+perf-y += event-times.o
+perf-y += backward-ring-buffer.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
new file mode 100644
index 0000000..d9ba991
--- /dev/null
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -0,0 +1,151 @@
+/*
+ * Test backward bit in event attribute, read ring buffer from end to
+ * beginning
+ */
+
+#include <perf.h>
+#include <evlist.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "debug.h"
+
+#define NR_ITERS 111
+
+static void testcase(void)
+{
+ int i;
+
+ for (i = 0; i < NR_ITERS; i++) {
+ char proc_name[10];
+
+ snprintf(proc_name, sizeof(proc_name), "p:%d\n", i);
+ prctl(PR_SET_NAME, proc_name);
+ }
+}
+
+static int count_samples(struct perf_evlist *evlist, int *sample_count,
+ int *comm_count)
+{
+ int i;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+
+ perf_evlist__mmap_read_catchup(evlist, i);
+ while ((event = perf_evlist__mmap_read_backward(evlist, i)) != NULL) {
+ const u32 type = event->header.type;
+
+ switch (type) {
+ case PERF_RECORD_SAMPLE:
+ (*sample_count)++;
+ break;
+ case PERF_RECORD_COMM:
+ (*comm_count)++;
+ break;
+ default:
+ pr_err("Unexpected record of type %d\n", type);
+ return TEST_FAIL;
+ }
+ }
+ }
+ return TEST_OK;
+}
+
+static int do_test(struct perf_evlist *evlist, int mmap_pages,
+ int *sample_count, int *comm_count)
+{
+ int err;
+ char sbuf[STRERR_BUFSIZE];
+
+ err = perf_evlist__mmap(evlist, mmap_pages, true);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ return TEST_FAIL;
+ }
+
+ perf_evlist__enable(evlist);
+ testcase();
+ perf_evlist__disable(evlist);
+
+ err = count_samples(evlist, sample_count, comm_count);
+ perf_evlist__munmap(evlist);
+ return err;
+}
+
+
+int test__backward_ring_buffer(int subtest __maybe_unused)
+{
+ int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
+ char pid[16], sbuf[STRERR_BUFSIZE];
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel __maybe_unused;
+ struct parse_events_error parse_error;
+ struct record_opts opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .freq = 0,
+ .mmap_pages = 256,
+ .default_interval = 1,
+ };
+
+ snprintf(pid, sizeof(pid), "%d", getpid());
+ pid[sizeof(pid) - 1] = '\0';
+ opts.target.tid = opts.target.pid = pid;
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("No ehough memory to create evlist\n");
+ return TEST_FAIL;
+ }
+
+ err = perf_evlist__create_maps(evlist, &opts.target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_delete_evlist;
+ }
+
+ bzero(&parse_error, sizeof(parse_error));
+ err = parse_events(evlist, "syscalls:sys_enter_prctl", &parse_error);
+ if (err) {
+ pr_debug("Failed to parse tracepoint event, try use root\n");
+ ret = TEST_SKIP;
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ /* Set backward bit, ring buffer should be writing from end */
+ evlist__for_each(evlist, evsel)
+ evsel->attr.write_backward = 1;
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ ret = TEST_FAIL;
+ err = do_test(evlist, opts.mmap_pages, &sample_count,
+ &comm_count);
+ if (err != TEST_OK)
+ goto out_delete_evlist;
+
+ if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) {
+ pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n",
+ sample_count, comm_count);
+ goto out_delete_evlist;
+ }
+
+ err = do_test(evlist, 1, &sample_count, &comm_count);
+ if (err != TEST_OK)
+ goto out_delete_evlist;
+
+ ret = TEST_OK;
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return ret;
+}
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 199501c..f31eed3 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -138,7 +138,7 @@
perf_evlist__splice_list_tail(evlist, &parse_evlist.list);
evlist->nr_groups = parse_evlist.nr_groups;
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
err = perf_evlist__open(evlist);
if (err < 0) {
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index f2b1dca..0e95c20 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -204,6 +204,14 @@
.func = test__event_update,
},
{
+ .desc = "Test events times",
+ .func = test__event_times,
+ },
+ {
+ .desc = "Test backward reading from ring buffer",
+ .func = test__backward_ring_buffer,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index abd3f0e..68a69a1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -532,7 +532,7 @@
goto out_put;
}
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
evsel = perf_evlist__first(evlist);
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index dc673ff..8cf0d9e 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -202,7 +202,7 @@
{
int i;
- dsos = malloc(sizeof(dsos) * cnt);
+ dsos = malloc(sizeof(*dsos) * cnt);
TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
for (i = 0; i < cnt; i++) {
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
new file mode 100644
index 0000000..95fb744
--- /dev/null
+++ b/tools/perf/tests/event-times.c
@@ -0,0 +1,236 @@
+#include <linux/compiler.h>
+#include <string.h>
+#include "tests.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+#include "debug.h"
+#include "thread_map.h"
+#include "target.h"
+
+static int attach__enable_on_exec(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct target target = {
+ .uid = UINT_MAX,
+ };
+ const char *argv[] = { "true", NULL, };
+ char sbuf[STRERR_BUFSIZE];
+ int err;
+
+ pr_debug("attaching to spawned child, enable on exec\n");
+
+ err = perf_evlist__create_maps(evlist, &target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ return err;
+ }
+
+ err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+ if (err < 0) {
+ pr_debug("Couldn't run the workload!\n");
+ return err;
+ }
+
+ evsel->attr.enable_on_exec = 1;
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ return err;
+ }
+
+ return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__enable_on_exec(struct perf_evlist *evlist)
+{
+ waitpid(evlist->workload.pid, NULL, 0);
+ return 0;
+}
+
+static int attach__current_disabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct thread_map *threads;
+ int err;
+
+ pr_debug("attaching to current thread as disabled\n");
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ evsel->attr.disabled = 1;
+
+ err = perf_evsel__open_per_thread(evsel, threads);
+ if (err) {
+ pr_debug("Failed to open event cpu-clock:u\n");
+ return err;
+ }
+
+ thread_map__put(threads);
+ return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int attach__current_enabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct thread_map *threads;
+ int err;
+
+ pr_debug("attaching to current thread as enabled\n");
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ if (threads == NULL) {
+ pr_debug("failed to call thread_map__new\n");
+ return -1;
+ }
+
+ err = perf_evsel__open_per_thread(evsel, threads);
+
+ thread_map__put(threads);
+ return err == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__disable(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+
+ return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_disabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct cpu_map *cpus;
+ int err;
+
+ pr_debug("attaching to CPU 0 as enabled\n");
+
+ cpus = cpu_map__new("0");
+ if (cpus == NULL) {
+ pr_debug("failed to call cpu_map__new\n");
+ return -1;
+ }
+
+ evsel->attr.disabled = 1;
+
+ err = perf_evsel__open_per_cpu(evsel, cpus);
+ if (err) {
+ if (err == -EACCES)
+ return TEST_SKIP;
+
+ pr_debug("Failed to open event cpu-clock:u\n");
+ return err;
+ }
+
+ cpu_map__put(cpus);
+ return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_enabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct cpu_map *cpus;
+ int err;
+
+ pr_debug("attaching to CPU 0 as enabled\n");
+
+ cpus = cpu_map__new("0");
+ if (cpus == NULL) {
+ pr_debug("failed to call cpu_map__new\n");
+ return -1;
+ }
+
+ err = perf_evsel__open_per_cpu(evsel, cpus);
+ if (err == -EACCES)
+ return TEST_SKIP;
+
+ cpu_map__put(cpus);
+ return err ? TEST_FAIL : TEST_OK;
+}
+
+static int test_times(int (attach)(struct perf_evlist *),
+ int (detach)(struct perf_evlist *))
+{
+ struct perf_counts_values count;
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel;
+ int err = -1, i;
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("failed to create event list\n");
+ goto out_err;
+ }
+
+ err = parse_events(evlist, "cpu-clock:u", NULL);
+ if (err) {
+ pr_debug("failed to parse event cpu-clock:u\n");
+ goto out_err;
+ }
+
+ evsel = perf_evlist__last(evlist);
+ evsel->attr.read_format |=
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+ err = attach(evlist);
+ if (err == TEST_SKIP) {
+ pr_debug(" SKIP : not enough rights\n");
+ return err;
+ }
+
+ TEST_ASSERT_VAL("failed to attach", !err);
+
+ for (i = 0; i < 100000000; i++) { }
+
+ TEST_ASSERT_VAL("failed to detach", !detach(evlist));
+
+ perf_evsel__read(evsel, 0, 0, &count);
+
+ err = !(count.ena == count.run);
+
+ pr_debug(" %s: ena %" PRIu64", run %" PRIu64"\n",
+ !err ? "OK " : "FAILED",
+ count.ena, count.run);
+
+out_err:
+ if (evlist)
+ perf_evlist__delete(evlist);
+ return !err ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This test creates software event 'cpu-clock'
+ * attaches it in several ways (explained below)
+ * and checks that enabled and running times
+ * match.
+ */
+int test__event_times(int subtest __maybe_unused)
+{
+ int err, ret = 0;
+
+#define _T(attach, detach) \
+ err = test_times(attach, detach); \
+ if (err && (ret == TEST_OK || ret == TEST_SKIP)) \
+ ret = err;
+
+ /* attach on newly spawned process after exec */
+ _T(attach__enable_on_exec, detach__enable_on_exec)
+ /* attach on current process as enabled */
+ _T(attach__current_enabled, detach__disable)
+ /* attach on current process as disabled */
+ _T(attach__current_disabled, detach__disable)
+ /* attach on cpu as disabled */
+ _T(attach__cpu_disabled, detach__disable)
+ /* attach on cpu as enabled */
+ _T(attach__cpu_enabled, detach__disable)
+
+#undef _T
+ return ret;
+}
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index 012eab5..63ecf21 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -30,7 +30,7 @@
TEST_ASSERT_VAL("wrong id", ev->id == 123);
TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
- TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123);
+ TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
return 0;
}
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f55f4bd..6b21746 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -161,7 +161,7 @@
struct rb_root *root;
struct rb_node *node;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index ed5aa9e..a9e3db3 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -101,7 +101,7 @@
if (machine__resolve(machine, &al, &sample) < 0)
goto out;
- if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+ if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
NULL) < 0) {
addr_location__put(&al);
goto out;
@@ -126,7 +126,7 @@
struct rb_root *root_out;
struct rb_node *node;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index b825d24..e846f8c 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -81,7 +81,7 @@
al.socket = fake_samples[i].socket;
if (hist_entry_iter__add(&iter, &al,
- PERF_MAX_STACK_DEPTH, NULL) < 0) {
+ sysctl_perf_event_max_stack, NULL) < 0) {
addr_location__put(&al);
goto out;
}
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 358324e..acf5a13 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -145,7 +145,7 @@
/*
* Only entries from fake_common_samples should have a pair.
*/
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -197,7 +197,7 @@
* and some entries will have no pair. However every entry
* in other hists should have (dummy) pair.
*/
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index d3556fb..63c5efa 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -67,7 +67,7 @@
if (machine__resolve(machine, &al, &sample) < 0)
goto out;
- if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+ if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
NULL) < 0) {
addr_location__put(&al);
goto out;
@@ -92,7 +92,7 @@
struct rb_root *root_out;
struct rb_node *node;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index ddb78fa..614e45a 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -80,7 +80,7 @@
CHECK__(parse_events(evlist, "dummy:u", NULL));
CHECK__(parse_events(evlist, "cycles:u", NULL));
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
evsel = perf_evlist__first(evlist);
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index eb99a105..4344fe4 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -44,7 +44,7 @@
goto out_delete_evlist;
}
- perf_evsel__config(evsel, &opts);
+ perf_evsel__config(evsel, &opts, NULL);
thread_map__set_pid(evlist->threads, 0, getpid());
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 1cc78ce..b836ee6a 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -99,7 +99,7 @@
perf_evsel__set_sample_bit(evsel, CPU);
perf_evsel__set_sample_bit(evsel, TID);
perf_evsel__set_sample_bit(evsel, TIME);
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
if (err < 0) {
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index ebd8016..39a689b 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -417,7 +417,7 @@
perf_evsel__set_sample_bit(tracking_evsel, TIME);
/* Config events */
- perf_evlist__config(evlist, &opts);
+ perf_evlist__config(evlist, &opts, NULL);
/* Check moved event is still at the front */
if (cycles_evsel != perf_evlist__first(evlist)) {
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 82b2b5e..c57e72c 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -85,6 +85,8 @@
int test__synthesize_stat(int subtest);
int test__synthesize_stat_round(int subtest);
int test__event_update(int subtest);
+int test__event_times(int subtest);
+int test__backward_ring_buffer(int subtest);
#if defined(__arm__) || defined(__aarch64__)
#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 630b0b4..e63abab 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -54,8 +54,14 @@
* Step 3:
*
* Load and split /proc/kallsyms into multiple maps, one per module.
+ * Do not use kcore, as this test was designed before kcore support
+ * and has parts that only make sense if using the non-kcore code.
+ * XXX: extend it to stress the kcorre code as well, hint: the list
+ * of modules extracted from /proc/kcore, in its current form, can't
+ * be compacted against the list of modules found in the "vmlinux"
+ * code and with the one got from /proc/modules from the "kallsyms" code.
*/
- if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, NULL) <= 0) {
+ if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
pr_debug("dso__load_kallsyms ");
goto out;
}
@@ -157,6 +163,9 @@
pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
mem_start, sym->name, pair->name);
+ } else {
+ pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+ mem_start, sym->name, first_pair->name);
}
}
} else
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
new file mode 100644
index 0000000..d64f4a9
--- /dev/null
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -0,0 +1,38 @@
+#include <sys/eventfd.h>
+
+#ifndef EFD_SEMAPHORE
+#define EFD_SEMAPHORE 1
+#endif
+
+#ifndef EFD_NONBLOCK
+#define EFD_NONBLOCK 00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+#define EFD_CLOEXEC 02000000
+#endif
+
+static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return scnprintf(bf, size, "NONE");
+#define P_FLAG(n) \
+ if (flags & EFD_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~EFD_##n; \
+ }
+
+ P_FLAG(SEMAPHORE);
+ P_FLAG(CLOEXEC);
+ P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
new file mode 100644
index 0000000..021bb48
--- /dev/null
+++ b/tools/perf/trace/beauty/flock.c
@@ -0,0 +1,31 @@
+
+static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, op = arg->val;
+
+ if (op == 0)
+ return scnprintf(bf, size, "NONE");
+#define P_CMD(cmd) \
+ if ((op & LOCK_##cmd) == LOCK_##cmd) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
+ op &= ~LOCK_##cmd; \
+ }
+
+ P_CMD(SH);
+ P_CMD(EX);
+ P_CMD(NB);
+ P_CMD(UN);
+ P_CMD(MAND);
+ P_CMD(RW);
+ P_CMD(READ);
+ P_CMD(WRITE);
+#undef P_OP
+
+ if (op)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
+
+ return printed;
+}
+
+#define SCA_FLOCK syscall_arg__scnprintf_flock
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
new file mode 100644
index 0000000..e247621
--- /dev/null
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -0,0 +1,44 @@
+#include <linux/futex.h>
+
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+ enum syscall_futex_args {
+ SCF_UADDR = (1 << 0),
+ SCF_OP = (1 << 1),
+ SCF_VAL = (1 << 2),
+ SCF_TIMEOUT = (1 << 3),
+ SCF_UADDR2 = (1 << 4),
+ SCF_VAL3 = (1 << 5),
+ };
+ int op = arg->val;
+ int cmd = op & FUTEX_CMD_MASK;
+ size_t printed = 0;
+
+ switch (cmd) {
+#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
+ P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
+ P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
+ P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
+ P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
+ P_FUTEX_OP(WAKE_OP); break;
+ P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
+ P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
+ P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
+ P_FUTEX_OP(WAIT_REQUEUE_PI); break;
+ default: printed = scnprintf(bf, size, "%#x", cmd); break;
+ }
+
+ if (op & FUTEX_PRIVATE_FLAG)
+ printed += scnprintf(bf + printed, size - printed, "|PRIV");
+
+ if (op & FUTEX_CLOCK_REALTIME)
+ printed += scnprintf(bf + printed, size - printed, "|CLKRT");
+
+ return printed;
+}
+
+#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
new file mode 100644
index 0000000..3444a4d
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap.c
@@ -0,0 +1,158 @@
+#include <sys/mman.h>
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, prot = arg->val;
+
+ if (prot == PROT_NONE)
+ return scnprintf(bf, size, "NONE");
+#define P_MMAP_PROT(n) \
+ if (prot & PROT_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ prot &= ~PROT_##n; \
+ }
+
+ P_MMAP_PROT(EXEC);
+ P_MMAP_PROT(READ);
+ P_MMAP_PROT(WRITE);
+#ifdef PROT_SEM
+ P_MMAP_PROT(SEM);
+#endif
+ P_MMAP_PROT(GROWSDOWN);
+ P_MMAP_PROT(GROWSUP);
+#undef P_MMAP_PROT
+
+ if (prot)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
+
+ return printed;
+}
+
+#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+
+#ifndef MAP_STACK
+# define MAP_STACK 0x20000
+#endif
+
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_MMAP_FLAG(n) \
+ if (flags & MAP_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~MAP_##n; \
+ }
+
+ P_MMAP_FLAG(SHARED);
+ P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
+ P_MMAP_FLAG(32BIT);
+#endif
+ P_MMAP_FLAG(ANONYMOUS);
+ P_MMAP_FLAG(DENYWRITE);
+ P_MMAP_FLAG(EXECUTABLE);
+ P_MMAP_FLAG(FILE);
+ P_MMAP_FLAG(FIXED);
+ P_MMAP_FLAG(GROWSDOWN);
+#ifdef MAP_HUGETLB
+ P_MMAP_FLAG(HUGETLB);
+#endif
+ P_MMAP_FLAG(LOCKED);
+ P_MMAP_FLAG(NONBLOCK);
+ P_MMAP_FLAG(NORESERVE);
+ P_MMAP_FLAG(POPULATE);
+ P_MMAP_FLAG(STACK);
+#ifdef MAP_UNINITIALIZED
+ P_MMAP_FLAG(UNINITIALIZED);
+#endif
+#undef P_MMAP_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
+
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_MREMAP_FLAG(n) \
+ if (flags & MREMAP_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~MREMAP_##n; \
+ }
+
+ P_MREMAP_FLAG(MAYMOVE);
+#ifdef MREMAP_FIXED
+ P_MREMAP_FLAG(FIXED);
+#endif
+#undef P_MREMAP_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
+
+#ifndef MADV_HWPOISON
+#define MADV_HWPOISON 100
+#endif
+
+#ifndef MADV_MERGEABLE
+#define MADV_MERGEABLE 12
+#endif
+
+#ifndef MADV_UNMERGEABLE
+#define MADV_UNMERGEABLE 13
+#endif
+
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int behavior = arg->val;
+
+ switch (behavior) {
+#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
+ P_MADV_BHV(NORMAL);
+ P_MADV_BHV(RANDOM);
+ P_MADV_BHV(SEQUENTIAL);
+ P_MADV_BHV(WILLNEED);
+ P_MADV_BHV(DONTNEED);
+ P_MADV_BHV(REMOVE);
+ P_MADV_BHV(DONTFORK);
+ P_MADV_BHV(DOFORK);
+ P_MADV_BHV(HWPOISON);
+#ifdef MADV_SOFT_OFFLINE
+ P_MADV_BHV(SOFT_OFFLINE);
+#endif
+ P_MADV_BHV(MERGEABLE);
+ P_MADV_BHV(UNMERGEABLE);
+#ifdef MADV_HUGEPAGE
+ P_MADV_BHV(HUGEPAGE);
+#endif
+#ifdef MADV_NOHUGEPAGE
+ P_MADV_BHV(NOHUGEPAGE);
+#endif
+#ifdef MADV_DONTDUMP
+ P_MADV_BHV(DONTDUMP);
+#endif
+#ifdef MADV_DODUMP
+ P_MADV_BHV(DODUMP);
+#endif
+#undef P_MADV_PHV
+ default: break;
+ }
+
+ return scnprintf(bf, size, "%#x", behavior);
+}
+
+#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
new file mode 100644
index 0000000..930d8fe
--- /dev/null
+++ b/tools/perf/trace/beauty/mode_t.c
@@ -0,0 +1,68 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* From include/linux/stat.h */
+#ifndef S_IRWXUGO
+#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
+#endif
+#ifndef S_IALLUGO
+#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#endif
+#ifndef S_IRUGO
+#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH)
+#endif
+#ifndef S_IWUGO
+#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH)
+#endif
+#ifndef S_IXUGO
+#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
+#endif
+
+static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, mode = arg->val;
+
+#define P_MODE(n) \
+ if ((mode & S_##n) == S_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ mode &= ~S_##n; \
+ }
+
+ P_MODE(IALLUGO);
+ P_MODE(IRWXUGO);
+ P_MODE(IRUGO);
+ P_MODE(IWUGO);
+ P_MODE(IXUGO);
+ P_MODE(IFMT);
+ P_MODE(IFSOCK);
+ P_MODE(IFLNK);
+ P_MODE(IFREG);
+ P_MODE(IFBLK);
+ P_MODE(IFDIR);
+ P_MODE(IFCHR);
+ P_MODE(IFIFO);
+ P_MODE(ISUID);
+ P_MODE(ISGID);
+ P_MODE(ISVTX);
+ P_MODE(IRWXU);
+ P_MODE(IRUSR);
+ P_MODE(IWUSR);
+ P_MODE(IXUSR);
+ P_MODE(IRWXG);
+ P_MODE(IRGRP);
+ P_MODE(IWGRP);
+ P_MODE(IXGRP);
+ P_MODE(IRWXO);
+ P_MODE(IROTH);
+ P_MODE(IWOTH);
+ P_MODE(IXOTH);
+#undef P_MODE
+
+ if (mode)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode);
+
+ return printed;
+}
+
+#define SCA_MODE_T syscall_arg__scnprintf_mode_t
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
new file mode 100644
index 0000000..07fa8a0
--- /dev/null
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -0,0 +1,62 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef MSG_PROBE
+#define MSG_PROBE 0x10
+#endif
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE 0x10000
+#endif
+#ifndef MSG_SENDPAGE_NOTLAST
+#define MSG_SENDPAGE_NOTLAST 0x20000
+#endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN 0x20000000
+#endif
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC 0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return scnprintf(bf, size, "NONE");
+#define P_MSG_FLAG(n) \
+ if (flags & MSG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~MSG_##n; \
+ }
+
+ P_MSG_FLAG(OOB);
+ P_MSG_FLAG(PEEK);
+ P_MSG_FLAG(DONTROUTE);
+ P_MSG_FLAG(TRYHARD);
+ P_MSG_FLAG(CTRUNC);
+ P_MSG_FLAG(PROBE);
+ P_MSG_FLAG(TRUNC);
+ P_MSG_FLAG(DONTWAIT);
+ P_MSG_FLAG(EOR);
+ P_MSG_FLAG(WAITALL);
+ P_MSG_FLAG(FIN);
+ P_MSG_FLAG(SYN);
+ P_MSG_FLAG(CONFIRM);
+ P_MSG_FLAG(RST);
+ P_MSG_FLAG(ERRQUEUE);
+ P_MSG_FLAG(NOSIGNAL);
+ P_MSG_FLAG(MORE);
+ P_MSG_FLAG(WAITFORONE);
+ P_MSG_FLAG(SENDPAGE_NOTLAST);
+ P_MSG_FLAG(FASTOPEN);
+ P_MSG_FLAG(CMSG_CLOEXEC);
+#undef P_MSG_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
new file mode 100644
index 0000000..0f3679e
--- /dev/null
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -0,0 +1,56 @@
+
+static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (!(flags & O_CREAT))
+ arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
+
+ if (flags == 0)
+ return scnprintf(bf, size, "RDONLY");
+#define P_FLAG(n) \
+ if (flags & O_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~O_##n; \
+ }
+
+ P_FLAG(APPEND);
+ P_FLAG(ASYNC);
+ P_FLAG(CLOEXEC);
+ P_FLAG(CREAT);
+ P_FLAG(DIRECT);
+ P_FLAG(DIRECTORY);
+ P_FLAG(EXCL);
+ P_FLAG(LARGEFILE);
+ P_FLAG(NOATIME);
+ P_FLAG(NOCTTY);
+#ifdef O_NONBLOCK
+ P_FLAG(NONBLOCK);
+#elif O_NDELAY
+ P_FLAG(NDELAY);
+#endif
+#ifdef O_PATH
+ P_FLAG(PATH);
+#endif
+ P_FLAG(RDWR);
+#ifdef O_DSYNC
+ if ((flags & O_SYNC) == O_SYNC)
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
+ else {
+ P_FLAG(DSYNC);
+ }
+#else
+ P_FLAG(SYNC);
+#endif
+ P_FLAG(TRUNC);
+ P_FLAG(WRONLY);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
new file mode 100644
index 0000000..311f09d
--- /dev/null
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -0,0 +1,43 @@
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#endif
+
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return 0;
+
+#define P_FLAG(n) \
+ if (flags & PERF_FLAG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~PERF_FLAG_##n; \
+ }
+
+ P_FLAG(FD_NO_GROUP);
+ P_FLAG(FD_OUTPUT);
+ P_FLAG(PID_CGROUP);
+ P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
new file mode 100644
index 0000000..07486ea
--- /dev/null
+++ b/tools/perf/trace/beauty/pid.c
@@ -0,0 +1,21 @@
+static size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int pid = arg->val;
+ struct trace *trace = arg->trace;
+ size_t printed = scnprintf(bf, size, "%d", pid);
+ struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
+
+ if (thread != NULL) {
+ if (!thread->comm_set)
+ thread__set_comm_from_proc(thread);
+
+ if (thread->comm_set)
+ printed += scnprintf(bf + printed, size - printed,
+ " (%s)", thread__comm_str(thread));
+ thread__put(thread);
+ }
+
+ return printed;
+}
+
+#define SCA_PID syscall_arg__scnprintf_pid
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
new file mode 100644
index 0000000..c205bc6
--- /dev/null
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -0,0 +1,44 @@
+#include <sched.h>
+
+/*
+ * Not defined anywhere else, probably, just to make sure we
+ * catch future flags
+ */
+#define SCHED_POLICY_MASK 0xff
+
+#ifndef SCHED_DEADLINE
+#define SCHED_DEADLINE 6
+#endif
+
+static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ const char *policies[] = {
+ "NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE",
+ };
+ size_t printed;
+ int policy = arg->val,
+ flags = policy & ~SCHED_POLICY_MASK;
+
+ policy &= SCHED_POLICY_MASK;
+ if (policy <= SCHED_DEADLINE)
+ printed = scnprintf(bf, size, "%s", policies[policy]);
+ else
+ printed = scnprintf(bf, size, "%#x", policy);
+
+#define P_POLICY_FLAG(n) \
+ if (flags & SCHED_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+ flags &= ~SCHED_##n; \
+ }
+
+ P_POLICY_FLAG(RESET_ON_FORK);
+#undef P_POLICY_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+ return printed;
+}
+
+#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
new file mode 100644
index 0000000..213c5a7
--- /dev/null
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -0,0 +1,52 @@
+#include <linux/seccomp.h>
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int op = arg->val;
+ size_t printed = 0;
+
+ switch (op) {
+#define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
+ P_SECCOMP_SET_MODE_OP(STRICT);
+ P_SECCOMP_SET_MODE_OP(FILTER);
+#undef P_SECCOMP_SET_MODE_OP
+ default: printed = scnprintf(bf, size, "%#x", op); break;
+ }
+
+ return printed;
+}
+
+#define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_FLAG(n) \
+ if (flags & SECCOMP_FILTER_FLAG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~SECCOMP_FILTER_FLAG_##n; \
+ }
+
+ P_FLAG(TSYNC);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
new file mode 100644
index 0000000..d3b0b1f
--- /dev/null
+++ b/tools/perf/trace/beauty/signum.c
@@ -0,0 +1,53 @@
+
+static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int sig = arg->val;
+
+ switch (sig) {
+#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
+ P_SIGNUM(HUP);
+ P_SIGNUM(INT);
+ P_SIGNUM(QUIT);
+ P_SIGNUM(ILL);
+ P_SIGNUM(TRAP);
+ P_SIGNUM(ABRT);
+ P_SIGNUM(BUS);
+ P_SIGNUM(FPE);
+ P_SIGNUM(KILL);
+ P_SIGNUM(USR1);
+ P_SIGNUM(SEGV);
+ P_SIGNUM(USR2);
+ P_SIGNUM(PIPE);
+ P_SIGNUM(ALRM);
+ P_SIGNUM(TERM);
+ P_SIGNUM(CHLD);
+ P_SIGNUM(CONT);
+ P_SIGNUM(STOP);
+ P_SIGNUM(TSTP);
+ P_SIGNUM(TTIN);
+ P_SIGNUM(TTOU);
+ P_SIGNUM(URG);
+ P_SIGNUM(XCPU);
+ P_SIGNUM(XFSZ);
+ P_SIGNUM(VTALRM);
+ P_SIGNUM(PROF);
+ P_SIGNUM(WINCH);
+ P_SIGNUM(IO);
+ P_SIGNUM(PWR);
+ P_SIGNUM(SYS);
+#ifdef SIGEMT
+ P_SIGNUM(EMT);
+#endif
+#ifdef SIGSTKFLT
+ P_SIGNUM(STKFLT);
+#endif
+#ifdef SIGSWI
+ P_SIGNUM(SWI);
+#endif
+ default: break;
+ }
+
+ return scnprintf(bf, size, "%#x", sig);
+}
+
+#define SCA_SIGNUM syscall_arg__scnprintf_signum
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
new file mode 100644
index 0000000..0a5ce81
--- /dev/null
+++ b/tools/perf/trace/beauty/socket_type.c
@@ -0,0 +1,60 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP 6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC 02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK 00004000
+#endif
+
+#ifndef SOCK_TYPE_MASK
+#define SOCK_TYPE_MASK 0xf
+#endif
+
+static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+ size_t printed;
+ int type = arg->val,
+ flags = type & ~SOCK_TYPE_MASK;
+
+ type &= SOCK_TYPE_MASK;
+ /*
+ * Can't use a strarray, MIPS may override for ABI reasons.
+ */
+ switch (type) {
+#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
+ P_SK_TYPE(STREAM);
+ P_SK_TYPE(DGRAM);
+ P_SK_TYPE(RAW);
+ P_SK_TYPE(RDM);
+ P_SK_TYPE(SEQPACKET);
+ P_SK_TYPE(DCCP);
+ P_SK_TYPE(PACKET);
+#undef P_SK_TYPE
+ default:
+ printed = scnprintf(bf, size, "%#x", type);
+ }
+
+#define P_SK_FLAG(n) \
+ if (flags & SOCK_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+ flags &= ~SOCK_##n; \
+ }
+
+ P_SK_FLAG(CLOEXEC);
+ P_SK_FLAG(NONBLOCK);
+#undef P_SK_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+ return printed;
+}
+
+#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
new file mode 100644
index 0000000..7942724
--- /dev/null
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -0,0 +1,26 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+
+static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, options = arg->val;
+
+#define P_OPTION(n) \
+ if (options & W##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ options &= ~W##n; \
+ }
+
+ P_OPTION(NOHANG);
+ P_OPTION(UNTRACED);
+ P_OPTION(CONTINUED);
+#undef P_OPTION
+
+ if (options)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options);
+
+ return printed;
+}
+
+#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 2a83414..538bae8 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1607,9 +1607,8 @@
ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists));
dummy_hpp.buf[ret] = '\0';
- rtrim(dummy_hpp.buf);
- start = ltrim(dummy_hpp.buf);
+ start = trim(dummy_hpp.buf);
ret = strlen(start);
if (start != dummy_hpp.buf)
@@ -1897,11 +1896,10 @@
bool first = true;
int ret;
- if (symbol_conf.use_callchain)
+ if (symbol_conf.use_callchain) {
folded_sign = hist_entry__folded(he);
-
- if (symbol_conf.use_callchain)
printed += fprintf(fp, "%c ", folded_sign);
+ }
hists__for_each_format(browser->hists, fmt) {
if (perf_hpp__should_skip(fmt, he->hists))
@@ -2137,7 +2135,7 @@
printed += snprintf(bf + printed, size - printed,
", UID: %s", hists->uid_filter_str);
if (thread) {
- if (sort__has_thread) {
+ if (hists__has(hists, thread)) {
printed += scnprintf(bf + printed, size - printed,
", Thread: %s(%d)",
(thread->comm_set ? thread__comm_str(thread) : ""),
@@ -2322,7 +2320,8 @@
{
struct thread *thread = act->thread;
- if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+ if ((!hists__has(browser->hists, thread) &&
+ !hists__has(browser->hists, comm)) || thread == NULL)
return 0;
if (browser->hists->thread_filter) {
@@ -2331,7 +2330,7 @@
thread__zput(browser->hists->thread_filter);
ui_helpline__pop();
} else {
- if (sort__has_thread) {
+ if (hists__has(browser->hists, thread)) {
ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
thread->comm_set ? thread__comm_str(thread) : "",
thread->tid);
@@ -2356,10 +2355,11 @@
{
int ret;
- if ((!sort__has_thread && !sort__has_comm) || thread == NULL)
+ if ((!hists__has(browser->hists, thread) &&
+ !hists__has(browser->hists, comm)) || thread == NULL)
return 0;
- if (sort__has_thread) {
+ if (hists__has(browser->hists, thread)) {
ret = asprintf(optstr, "Zoom %s %s(%d) thread",
browser->hists->thread_filter ? "out of" : "into",
thread->comm_set ? thread__comm_str(thread) : "",
@@ -2382,7 +2382,7 @@
{
struct map *map = act->ms.map;
- if (!sort__has_dso || map == NULL)
+ if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
if (browser->hists->dso_filter) {
@@ -2409,7 +2409,7 @@
add_dso_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, struct map *map)
{
- if (!sort__has_dso || map == NULL)
+ if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
if (asprintf(optstr, "Zoom %s %s DSO",
@@ -2431,10 +2431,10 @@
}
static int
-add_map_opt(struct hist_browser *browser __maybe_unused,
+add_map_opt(struct hist_browser *browser,
struct popup_action *act, char **optstr, struct map *map)
{
- if (!sort__has_dso || map == NULL)
+ if (!hists__has(browser->hists, dso) || map == NULL)
return 0;
if (asprintf(optstr, "Browse map details") < 0)
@@ -2536,7 +2536,7 @@
static int
do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
{
- if (!sort__has_socket || act->socket < 0)
+ if (!hists__has(browser->hists, socket) || act->socket < 0)
return 0;
if (browser->hists->socket_filter > -1) {
@@ -2558,7 +2558,7 @@
add_socket_opt(struct hist_browser *browser, struct popup_action *act,
char **optstr, int socket_id)
{
- if (!sort__has_socket || socket_id < 0)
+ if (!hists__has(browser->hists, socket) || socket_id < 0)
return 0;
if (asprintf(optstr, "Zoom %s Processor Socket %d",
@@ -2749,7 +2749,7 @@
*/
goto out_free_stack;
case 'a':
- if (!sort__has_sym) {
+ if (!hists__has(hists, sym)) {
ui_browser__warning(&browser->b, delay_secs * 2,
"Annotation is only available for symbolic views, "
"include \"sym*\" in --sort to use it.");
@@ -2912,7 +2912,7 @@
continue;
}
- if (!sort__has_sym || browser->selection == NULL)
+ if (!hists__has(hists, sym) || browser->selection == NULL)
goto skip_annotation;
if (sort__mode == SORT_MODE__BRANCH) {
@@ -2956,7 +2956,7 @@
goto skip_scripting;
if (browser->he_selection) {
- if (sort__has_thread && thread) {
+ if (hists__has(hists, thread) && thread) {
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
@@ -2971,7 +2971,7 @@
*
* See hist_browser__show_entry.
*/
- if (sort__has_sym && browser->selection->sym) {
+ if (hists__has(hists, sym) && browser->selection->sym) {
nr_options += add_script_opt(browser,
&actions[nr_options],
&options[nr_options],
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 2aa45b6..932adfa 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -379,7 +379,7 @@
gtk_tree_store_set(store, &iter, col_idx++, s, -1);
}
- if (symbol_conf.use_callchain && sort__has_sym) {
+ if (symbol_conf.use_callchain && hists__has(hists, sym)) {
if (callchain_param.mode == CHAIN_GRAPH_REL)
total = symbol_conf.cumulate_callchain ?
h->stat_acc->period : h->stat.period;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 3baeaa6..af07ffb 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -635,7 +635,7 @@
ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists));
}
- if (verbose && sort__has_sym) /* Addr + origin */
+ if (verbose && hists__has(hists, sym)) /* Addr + origin */
ret += 3 + BITS_PER_LONG / 4;
return ret;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 7aff5ac..560eb47 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -569,9 +569,8 @@
first_col = false;
fmt->header(fmt, hpp, hists_to_evsel(hists));
- rtrim(hpp->buf);
- header_width += fprintf(fp, "%s", ltrim(hpp->buf));
+ header_width += fprintf(fp, "%s", trim(hpp->buf));
}
}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index da48fd8..8c6c8a0 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -8,6 +8,7 @@
libperf-y += event.o
libperf-y += evlist.o
libperf-y += evsel.o
+libperf-y += evsel_fprintf.o
libperf-y += find_bit.o
libperf-y += kallsyms.o
libperf-y += levenshtein.o
@@ -26,9 +27,9 @@
libperf-y += strfilter.o
libperf-y += top.o
libperf-y += usage.o
-libperf-y += wrapper.o
libperf-y += dso.o
libperf-y += symbol.o
+libperf-y += symbol_fprintf.o
libperf-y += color.o
libperf-y += header.o
libperf-y += callchain.o
@@ -38,6 +39,7 @@
libperf-y += map.o
libperf-y += pstack.o
libperf-y += session.o
+libperf-$(CONFIG_AUDIT) += syscalltbl.o
libperf-y += ordered-events.o
libperf-y += comm.o
libperf-y += thread.o
@@ -69,9 +71,9 @@
libperf-y += record.o
libperf-y += srcline.o
libperf-y += data.o
-libperf-$(CONFIG_X86) += tsc.o
-libperf-$(CONFIG_AUXTRACE) += tsc.o
+libperf-y += tsc.o
libperf-y += cloexec.o
+libperf-y += call-path.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b795b69..4db73d5 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1138,7 +1138,7 @@
if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
!dso__is_kcore(dso)) {
- char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
+ char bf[SBUILD_ID_SIZE + 15] = " with build id ";
char *build_id_msg = NULL;
if (dso->annotate_warned)
@@ -1665,5 +1665,5 @@
bool ui__has_annotation(void)
{
- return use_browser == 1 && sort__has_sym;
+ return use_browser == 1 && perf_hpp_list.sym;
}
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index ec164fe..c916901 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -940,6 +940,7 @@
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+ synth_opts->initial_skip = 0;
}
/*
@@ -1064,6 +1065,12 @@
synth_opts->last_branch_sz = val;
}
break;
+ case 's':
+ synth_opts->initial_skip = strtoul(p, &endptr, 10);
+ if (p == endptr)
+ goto out_err;
+ p = endptr;
+ break;
case ' ':
case ',':
break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 57ff31e..767989e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -68,6 +68,7 @@
* @last_branch_sz: branch context size
* @period: 'instructions' events period
* @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
*/
struct itrace_synth_opts {
bool set;
@@ -86,6 +87,7 @@
unsigned int last_branch_sz;
unsigned long long period;
enum itrace_period_type period_type;
+ unsigned long initial_skip;
};
/**
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 0967ce6..493307d 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -842,6 +842,58 @@
return op;
}
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+ struct bpf_map_op *newop;
+
+ newop = memdup(op, sizeof(*op));
+ if (!newop) {
+ pr_debug("Failed to alloc bpf_map_op\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&newop->list);
+ if (op->key_type == BPF_MAP_KEY_RANGES) {
+ size_t memsz = op->k.array.nr_ranges *
+ sizeof(op->k.array.ranges[0]);
+
+ newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+ if (!newop->k.array.ranges) {
+ pr_debug("Failed to alloc indices for map\n");
+ free(newop);
+ return NULL;
+ }
+ }
+
+ return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+ struct bpf_map_priv *newpriv;
+ struct bpf_map_op *pos, *newop;
+
+ newpriv = zalloc(sizeof(*newpriv));
+ if (!newpriv) {
+ pr_debug("No enough memory to alloc map private\n");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&newpriv->ops_list);
+
+ list_for_each_entry(pos, &priv->ops_list, list) {
+ newop = bpf_map_op__clone(pos);
+ if (!newop) {
+ bpf_map_priv__purge(newpriv);
+ return NULL;
+ }
+ list_add_tail(&newop->list, &newpriv->ops_list);
+ }
+
+ return newpriv;
+}
+
static int
bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
{
@@ -1417,6 +1469,89 @@
return 0;
}
+#define bpf__for_each_map(pos, obj, objtmp) \
+ bpf_object__for_each_safe(obj, objtmp) \
+ bpf_map__for_each(pos, obj)
+
+#define bpf__for_each_stdout_map(pos, obj, objtmp) \
+ bpf__for_each_map(pos, obj, objtmp) \
+ if (bpf_map__get_name(pos) && \
+ (strcmp("__bpf_stdout__", \
+ bpf_map__get_name(pos)) == 0))
+
+int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+ struct bpf_map_priv *tmpl_priv = NULL;
+ struct bpf_object *obj, *tmp;
+ struct perf_evsel *evsel = NULL;
+ struct bpf_map *map;
+ int err;
+ bool need_init = false;
+
+ bpf__for_each_stdout_map(map, obj, tmp) {
+ struct bpf_map_priv *priv;
+
+ err = bpf_map__get_private(map, (void **)&priv);
+ if (err)
+ return -BPF_LOADER_ERRNO__INTERNAL;
+
+ /*
+ * No need to check map type: type should have been
+ * verified by kernel.
+ */
+ if (!need_init && !priv)
+ need_init = !priv;
+ if (!tmpl_priv && priv)
+ tmpl_priv = priv;
+ }
+
+ if (!need_init)
+ return 0;
+
+ if (!tmpl_priv) {
+ err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/",
+ NULL);
+ if (err) {
+ pr_debug("ERROR: failed to create bpf-output event\n");
+ return -err;
+ }
+
+ evsel = perf_evlist__last(evlist);
+ }
+
+ bpf__for_each_stdout_map(map, obj, tmp) {
+ struct bpf_map_priv *priv;
+
+ err = bpf_map__get_private(map, (void **)&priv);
+ if (err)
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ if (priv)
+ continue;
+
+ if (tmpl_priv) {
+ priv = bpf_map_priv__clone(tmpl_priv);
+ if (!priv)
+ return -ENOMEM;
+
+ err = bpf_map__set_private(map, priv, bpf_map_priv__clear);
+ if (err) {
+ bpf_map_priv__clear(map, priv);
+ return err;
+ }
+ } else if (evsel) {
+ struct bpf_map_op *op;
+
+ op = bpf_map__add_newop(map, NULL);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+ op->op_type = BPF_MAP_OP_SET_EVSEL;
+ op->v.evsel = evsel;
+ }
+ }
+
+ return 0;
+}
+
#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START)
#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
@@ -1590,3 +1725,11 @@
bpf__strerror_end(buf, size);
return 0;
}
+
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+ int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_end(buf, size);
+ return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index be43119..941e172 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -79,6 +79,11 @@
size_t size);
int bpf__apply_obj_config(void);
int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct perf_evlist *evlist);
+int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err,
+ char *buf, size_t size);
+
#else
static inline struct bpf_object *
bpf__prepare_load(const char *filename __maybe_unused,
@@ -125,6 +130,12 @@
}
static inline int
+bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static inline int
__bpf_strerror(char *buf, size_t size)
{
if (!size)
@@ -177,5 +188,13 @@
{
return __bpf_strerror(buf, size);
}
+
+static inline int
+bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused,
+ int err __maybe_unused, char *buf,
+ size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
#endif
#endif
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 0573c2e..bff425e 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -261,14 +261,14 @@
if (dso__is_vdso(pos)) {
name = pos->short_name;
- name_len = pos->short_name_len + 1;
+ name_len = pos->short_name_len;
} else if (dso__is_kcore(pos)) {
machine__mmap_name(machine, nm, sizeof(nm));
name = nm;
- name_len = strlen(nm) + 1;
+ name_len = strlen(nm);
} else {
name = pos->long_name;
- name_len = pos->long_name_len + 1;
+ name_len = pos->long_name_len;
}
in_kernel = pos->kernel ||
@@ -365,39 +365,17 @@
int build_id_cache__list_build_ids(const char *pathname,
struct strlist **result)
{
- struct strlist *list;
char *dir_name;
- DIR *dir;
- struct dirent *d;
int ret = 0;
- list = strlist__new(NULL, NULL);
dir_name = build_id_cache__dirname_from_path(pathname, false, false);
- if (!list || !dir_name) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!dir_name)
+ return -ENOMEM;
- /* List up all dirents */
- dir = opendir(dir_name);
- if (!dir) {
+ *result = lsdir(dir_name, lsdir_no_dot_filter);
+ if (!*result)
ret = -errno;
- goto out;
- }
-
- while ((d = readdir(dir)) != NULL) {
- if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
- continue;
- strlist__add(list, d->d_name);
- }
- closedir(dir);
-
-out:
free(dir_name);
- if (ret)
- strlist__delete(list);
- else
- *result = list;
return ret;
}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 1f5a93c..0d814bb 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -40,25 +40,6 @@
#define alloc_nr(x) (((x)+16)*3/2)
-/*
- * Realloc the buffer pointed at by variable 'x' so that it can hold
- * at least 'nr' entries; the number of entries currently allocated
- * is 'alloc', using the standard growing factor alloc_nr() macro.
- *
- * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
- do { \
- if ((nr) > alloc) { \
- if (alloc_nr(alloc) < (nr)) \
- alloc = (nr); \
- else \
- alloc = alloc_nr(alloc); \
- x = xrealloc((x), alloc * sizeof(*(x))); \
- } \
- } while(0)
-
-
static inline int is_absolute_path(const char *path)
{
return path[0] == '/';
diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c
new file mode 100644
index 0000000..904a170
--- /dev/null
+++ b/tools/perf/util/call-path.c
@@ -0,0 +1,122 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+#include "util.h"
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+ struct symbol *sym, u64 ip, bool in_kernel)
+{
+ cp->parent = parent;
+ cp->sym = sym;
+ cp->ip = sym ? 0 : ip;
+ cp->db_id = 0;
+ cp->in_kernel = in_kernel;
+ RB_CLEAR_NODE(&cp->rb_node);
+ cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+ struct call_path_root *cpr;
+
+ cpr = zalloc(sizeof(struct call_path_root));
+ if (!cpr)
+ return NULL;
+ call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+ INIT_LIST_HEAD(&cpr->blocks);
+ return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+ struct call_path_block *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+ list_del(&pos->node);
+ free(pos);
+ }
+ free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip,
+ bool in_kernel)
+{
+ struct call_path_block *cpb;
+ struct call_path *cp;
+ size_t n;
+
+ if (cpr->next < cpr->sz) {
+ cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+ node);
+ } else {
+ cpb = zalloc(sizeof(struct call_path_block));
+ if (!cpb)
+ return NULL;
+ list_add_tail(&cpb->node, &cpr->blocks);
+ cpr->sz += CALL_PATH_BLOCK_SIZE;
+ }
+
+ n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+ cp = &cpb->cp[n];
+
+ call_path__init(cp, parent, sym, ip, in_kernel);
+
+ return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip, u64 ks)
+{
+ struct rb_node **p;
+ struct rb_node *node_parent = NULL;
+ struct call_path *cp;
+ bool in_kernel = ip >= ks;
+
+ if (sym)
+ ip = 0;
+
+ if (!parent)
+ return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+ p = &parent->children.rb_node;
+ while (*p != NULL) {
+ node_parent = *p;
+ cp = rb_entry(node_parent, struct call_path, rb_node);
+
+ if (cp->sym == sym && cp->ip == ip)
+ return cp;
+
+ if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+ if (!cp)
+ return NULL;
+
+ rb_link_node(&cp->rb_node, node_parent, p);
+ rb_insert_color(&cp->rb_node, &parent->children);
+
+ return cp;
+}
diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h
new file mode 100644
index 0000000..477f6d0
--- /dev/null
+++ b/tools/perf/util/call-path.h
@@ -0,0 +1,77 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitve call-graph.
+ */
+struct call_path {
+ struct call_path *parent;
+ struct symbol *sym;
+ u64 ip;
+ u64 db_id;
+ bool in_kernel;
+ struct rb_node rb_node;
+ struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+ struct call_path cp[CALL_PATH_BLOCK_SIZE];
+ struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+ struct call_path call_path;
+ struct list_head blocks;
+ size_t next;
+ size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip, u64 ks);
+
+#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 24b4bd0..07fd30b 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -109,6 +109,7 @@
bool record_opt_set = false;
bool try_stack_size = false;
+ callchain_param.enabled = true;
symbol_conf.use_callchain = true;
if (!arg)
@@ -117,6 +118,7 @@
while ((tok = strtok((char *)arg, ",")) != NULL) {
if (!strncmp(tok, "none", strlen(tok))) {
callchain_param.mode = CHAIN_NONE;
+ callchain_param.enabled = false;
symbol_conf.use_callchain = false;
return 0;
}
@@ -788,7 +790,8 @@
return 0;
}
-int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
+int sample__resolve_callchain(struct perf_sample *sample,
+ struct callchain_cursor *cursor, struct symbol **parent,
struct perf_evsel *evsel, struct addr_location *al,
int max_stack)
{
@@ -796,8 +799,8 @@
return 0;
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
- sort__has_parent) {
- return thread__resolve_callchain(al->thread, evsel, sample,
+ perf_hpp_list.parent) {
+ return thread__resolve_callchain(al->thread, cursor, evsel, sample,
parent, al, max_stack);
}
return 0;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d2a9e69..65e2a4f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -212,7 +212,14 @@
int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
int record_callchain_opt(const struct option *opt, const char *arg, int unset);
-int sample__resolve_callchain(struct perf_sample *sample, struct symbol **parent,
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+ struct callchain_param *callchain,
+ const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+ struct callchain_cursor *cursor, struct symbol **parent,
struct perf_evsel *evsel, struct addr_location *al,
int max_stack);
int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 4e72763..dad7d82 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -13,6 +13,7 @@
#include <subcmd/exec-cmd.h>
#include "util/hist.h" /* perf_hist_config */
#include "util/llvm-utils.h" /* perf_llvm_config */
+#include "config.h"
#define MAXNAME (256)
@@ -377,6 +378,21 @@
return value;
}
+static int perf_buildid_config(const char *var, const char *value)
+{
+ /* same dir for all commands */
+ if (!strcmp(var, "buildid.dir")) {
+ const char *dir = perf_config_dirname(var, value);
+
+ if (!dir)
+ return -1;
+ strncpy(buildid_dir, dir, MAXPATHLEN-1);
+ buildid_dir[MAXPATHLEN-1] = '\0';
+ }
+
+ return 0;
+}
+
static int perf_default_core_config(const char *var __maybe_unused,
const char *value __maybe_unused)
{
@@ -412,6 +428,9 @@
if (!prefixcmp(var, "llvm."))
return perf_llvm_config(var, value);
+ if (!prefixcmp(var, "buildid."))
+ return perf_buildid_config(var, value);
+
/* Add other config variables here. */
return 0;
}
@@ -506,6 +525,178 @@
return ret;
}
+static struct perf_config_section *find_section(struct list_head *sections,
+ const char *section_name)
+{
+ struct perf_config_section *section;
+
+ list_for_each_entry(section, sections, node)
+ if (!strcmp(section->name, section_name))
+ return section;
+
+ return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+ struct perf_config_section *section)
+{
+ struct perf_config_item *item;
+
+ list_for_each_entry(item, §ion->items, node)
+ if (!strcmp(item->name, name))
+ return item;
+
+ return NULL;
+}
+
+static struct perf_config_section *add_section(struct list_head *sections,
+ const char *section_name)
+{
+ struct perf_config_section *section = zalloc(sizeof(*section));
+
+ if (!section)
+ return NULL;
+
+ INIT_LIST_HEAD(§ion->items);
+ section->name = strdup(section_name);
+ if (!section->name) {
+ pr_debug("%s: strdup failed\n", __func__);
+ free(section);
+ return NULL;
+ }
+
+ list_add_tail(§ion->node, sections);
+ return section;
+}
+
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+ const char *name)
+{
+ struct perf_config_item *item = zalloc(sizeof(*item));
+
+ if (!item)
+ return NULL;
+
+ item->name = strdup(name);
+ if (!item->name) {
+ pr_debug("%s: strdup failed\n", __func__);
+ free(item);
+ return NULL;
+ }
+
+ list_add_tail(&item->node, §ion->items);
+ return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+ char *val = strdup(value);
+
+ if (!val)
+ return -1;
+
+ zfree(&item->value);
+ item->value = val;
+ return 0;
+}
+
+static int collect_config(const char *var, const char *value,
+ void *perf_config_set)
+{
+ int ret = -1;
+ char *ptr, *key;
+ char *section_name, *name;
+ struct perf_config_section *section = NULL;
+ struct perf_config_item *item = NULL;
+ struct perf_config_set *set = perf_config_set;
+ struct list_head *sections = &set->sections;
+
+ key = ptr = strdup(var);
+ if (!key) {
+ pr_debug("%s: strdup failed\n", __func__);
+ return -1;
+ }
+
+ section_name = strsep(&ptr, ".");
+ name = ptr;
+ if (name == NULL || value == NULL)
+ goto out_free;
+
+ section = find_section(sections, section_name);
+ if (!section) {
+ section = add_section(sections, section_name);
+ if (!section)
+ goto out_free;
+ }
+
+ item = find_config_item(name, section);
+ if (!item) {
+ item = add_config_item(section, name);
+ if (!item)
+ goto out_free;
+ }
+
+ ret = set_value(item, value);
+ return ret;
+
+out_free:
+ free(key);
+ perf_config_set__delete(set);
+ return -1;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+ struct perf_config_set *set = zalloc(sizeof(*set));
+
+ if (set) {
+ INIT_LIST_HEAD(&set->sections);
+ perf_config(collect_config, set);
+ }
+
+ return set;
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+ zfree(&item->name);
+ zfree(&item->value);
+ free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+ struct perf_config_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, §ion->items, node) {
+ list_del_init(&item->node);
+ perf_config_item__delete(item);
+ }
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+ perf_config_section__purge(section);
+ zfree(§ion->name);
+ free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+ struct perf_config_section *section, *tmp;
+
+ list_for_each_entry_safe(section, tmp, &set->sections, node) {
+ list_del_init(§ion->node);
+ perf_config_section__delete(section);
+ }
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+ perf_config_set__purge(set);
+ free(set);
+}
+
/*
* Call this to report error for your variable that should not
* get a boolean value (i.e. "[my] var" means "true").
@@ -515,49 +706,18 @@
return error("Missing value for '%s'", var);
}
-struct buildid_dir_config {
- char *dir;
-};
-
-static int buildid_dir_command_config(const char *var, const char *value,
- void *data)
-{
- struct buildid_dir_config *c = data;
- const char *v;
-
- /* same dir for all commands */
- if (!strcmp(var, "buildid.dir")) {
- v = perf_config_dirname(var, value);
- if (!v)
- return -1;
- strncpy(c->dir, v, MAXPATHLEN-1);
- c->dir[MAXPATHLEN-1] = '\0';
- }
- return 0;
-}
-
-static void check_buildid_dir_config(void)
-{
- struct buildid_dir_config c;
- c.dir = buildid_dir;
- perf_config(buildid_dir_command_config, &c);
-}
-
void set_buildid_dir(const char *dir)
{
if (dir)
scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
- /* try config file */
- if (buildid_dir[0] == '\0')
- check_buildid_dir_config();
-
/* default to $HOME/.debug */
if (buildid_dir[0] == '\0') {
- char *v = getenv("HOME");
- if (v) {
+ char *home = getenv("HOME");
+
+ if (home) {
snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
- v, DEBUG_CACHE_DIR);
+ home, DEBUG_CACHE_DIR);
} else {
strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
new file mode 100644
index 0000000..22ec626
--- /dev/null
+++ b/tools/perf/util/config.h
@@ -0,0 +1,26 @@
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+ char *name;
+ char *value;
+ struct list_head node;
+};
+
+struct perf_config_section {
+ char *name;
+ struct list_head items;
+ struct list_head node;
+};
+
+struct perf_config_set {
+ struct list_head sections;
+};
+
+struct perf_config_set *perf_config_set__new(void);
+void perf_config_set__delete(struct perf_config_set *set);
+
+#endif /* __PERF_CONFIG_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 9bcf2be..02d8016 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -587,3 +587,15 @@
closedir(dir1);
return 0;
}
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu)
+{
+ int i;
+
+ for (i = 0; i < cpus->nr; ++i) {
+ if (cpus->map[i] == cpu)
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 81a2562..1a0a350 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -66,4 +66,6 @@
int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
int (*f)(struct cpu_map *map, int cpu, void *data),
void *data);
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 1921942..be835161 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -136,3 +136,44 @@
{
return writen(file->fd, buf, size);
}
+
+int perf_data_file__switch(struct perf_data_file *file,
+ const char *postfix,
+ size_t pos, bool at_exit)
+{
+ char *new_filepath;
+ int ret;
+
+ if (check_pipe(file))
+ return -EINVAL;
+ if (perf_data_file__is_read(file))
+ return -EINVAL;
+
+ if (asprintf(&new_filepath, "%s.%s", file->path, postfix) < 0)
+ return -ENOMEM;
+
+ /*
+ * Only fire a warning, don't return error, continue fill
+ * original file.
+ */
+ if (rename(file->path, new_filepath))
+ pr_warning("Failed to rename %s to %s\n", file->path, new_filepath);
+
+ if (!at_exit) {
+ close(file->fd);
+ ret = perf_data_file__open(file);
+ if (ret < 0)
+ goto out;
+
+ if (lseek(file->fd, pos, SEEK_SET) == (off_t)-1) {
+ ret = -errno;
+ pr_debug("Failed to lseek to %zu: %s",
+ pos, strerror(errno));
+ goto out;
+ }
+ }
+ ret = file->fd;
+out:
+ free(new_filepath);
+ return ret;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 2b15d0c..ae510ce 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -46,5 +46,14 @@
void perf_data_file__close(struct perf_data_file *file);
ssize_t perf_data_file__write(struct perf_data_file *file,
void *buf, size_t size);
-
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original file.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data_file__switch(struct perf_data_file *file,
+ const char *postfix,
+ size_t pos, bool at_exit);
#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 049438d..8d96c80 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -23,6 +23,8 @@
#include "event.h"
#include "util.h"
#include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
#include "db-export.h"
struct deferred_export {
@@ -258,8 +260,7 @@
if (!al->sym) {
al->sym = symbol__new(al->addr, 0, 0, "unknown");
if (al->sym)
- symbols__insert(&dso->symbols[al->map->type],
- al->sym);
+ dso__insert_symbol(dso, al->map->type, al->sym);
}
if (al->sym) {
@@ -276,6 +277,80 @@
return 0;
}
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+ struct machine *machine,
+ struct thread *thread,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ u64 kernel_start = machine__kernel_start(machine);
+ struct call_path *current = &dbe->cpr->call_path;
+ enum chain_order saved_order = callchain_param.order;
+ int err;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ return NULL;
+
+ /*
+ * Since the call path tree must be built starting with the root, we
+ * must use ORDER_CALL for call chain resolution, in order to process
+ * the callchain starting with the root node and ending with the leaf.
+ */
+ callchain_param.order = ORDER_CALLER;
+ err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+ sample, NULL, NULL,
+ sysctl_perf_event_max_stack);
+ if (err) {
+ callchain_param.order = saved_order;
+ return NULL;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node;
+ struct addr_location al;
+ u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+ memset(&al, 0, sizeof(al));
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+ /*
+ * Handle export of symbol and dso for this node by
+ * constructing an addr_location struct and then passing it to
+ * db_ids_from_al() to perform the export.
+ */
+ al.sym = node->sym;
+ al.map = node->map;
+ al.machine = machine;
+ al.addr = node->ip;
+
+ if (al.map && !al.sym)
+ al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION,
+ al.addr);
+
+ db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+ /* add node to the call path tree if it doesn't exist */
+ current = call_path__findnew(dbe->cpr, current,
+ al.sym, node->ip,
+ kernel_start);
+
+ callchain_cursor_advance(&callchain_cursor);
+ }
+
+ /* Reset the callchain order to its prior value. */
+ callchain_param.order = saved_order;
+
+ if (current == &dbe->cpr->call_path) {
+ /* Bail because the callchain was empty. */
+ return NULL;
+ }
+
+ return current;
+}
+
int db_export__branch_type(struct db_export *dbe, u32 branch_type,
const char *name)
{
@@ -329,6 +404,16 @@
if (err)
goto out_put;
+ if (dbe->cpr) {
+ struct call_path *cp = call_path_from_sample(dbe, al->machine,
+ thread, sample,
+ evsel);
+ if (cp) {
+ db_export__call_path(dbe, cp);
+ es.call_path_id = cp->db_id;
+ }
+ }
+
if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
sample_addr_correlates_sym(&evsel->attr)) {
struct addr_location addr_al;
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
index 25e22fd..67bc6b8 100644
--- a/tools/perf/util/db-export.h
+++ b/tools/perf/util/db-export.h
@@ -27,6 +27,7 @@
struct perf_sample;
struct addr_location;
struct call_return_processor;
+struct call_path_root;
struct call_path;
struct call_return;
@@ -43,6 +44,7 @@
u64 addr_dso_db_id;
u64 addr_sym_db_id;
u64 addr_offset; /* addr offset from symbol start */
+ u64 call_path_id;
};
struct db_export {
@@ -64,6 +66,7 @@
int (*export_call_return)(struct db_export *dbe,
struct call_return *cr);
struct call_return_processor *crp;
+ struct call_path_root *cpr;
u64 evsel_last_db_id;
u64 machine_last_db_id;
u64 thread_last_db_id;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 8e639543..3357479 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -38,7 +38,7 @@
enum dso_binary_type type,
char *root_dir, char *filename, size_t size)
{
- char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+ char build_id_hex[SBUILD_ID_SIZE];
int ret = 0;
size_t len;
@@ -1301,7 +1301,7 @@
size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
{
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
return fprintf(fp, "%s", sbuild_id);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index aea189b..a347b19 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -915,8 +915,7 @@
tmp = "*";
else if (tag == DW_TAG_subroutine_type) {
/* Function pointer */
- strbuf_add(buf, "(function_type)", 15);
- return 0;
+ return strbuf_add(buf, "(function_type)", 15);
} else {
if (!dwarf_diename(&type))
return -ENOENT;
@@ -927,14 +926,10 @@
else if (tag == DW_TAG_enumeration_type)
tmp = "enum ";
/* Write a base name */
- strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
- return 0;
+ return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
}
ret = die_get_typename(&type, buf);
- if (ret == 0)
- strbuf_addstr(buf, tmp);
-
- return ret;
+ return ret ? ret : strbuf_addstr(buf, tmp);
}
/**
@@ -951,12 +946,10 @@
ret = die_get_typename(vr_die, buf);
if (ret < 0) {
pr_debug("Failed to get type, make it unknown.\n");
- strbuf_add(buf, " (unknown_type)", 14);
+ ret = strbuf_add(buf, " (unknown_type)", 14);
}
- strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
-
- return 0;
+ return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
}
#ifdef HAVE_DWARF_GETLOCATIONS
@@ -999,22 +992,24 @@
}
while ((offset = dwarf_ranges(&scopes[1], offset, &base,
- &start, &end)) > 0) {
+ &start, &end)) > 0) {
start -= entry;
end -= entry;
if (first) {
- strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
- name, start, end);
+ ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
first = false;
} else {
- strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
- start, end);
+ ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
}
+ if (ret < 0)
+ goto out;
}
if (!first)
- strbuf_add(buf, "]>", 2);
+ ret = strbuf_add(buf, "]>", 2);
out:
free(scopes);
@@ -1054,31 +1049,32 @@
if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
return -EINVAL;
- while ((offset = dwarf_getlocations(
- &attr, offset, &base,
- &start, &end, &op, &nops)) > 0) {
+ while ((offset = dwarf_getlocations(&attr, offset, &base,
+ &start, &end, &op, &nops)) > 0) {
if (start == 0) {
/* Single Location Descriptions */
ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
- return ret;
+ goto out;
}
/* Location Lists */
start -= entry;
end -= entry;
if (first) {
- strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
- name, start, end);
+ ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
first = false;
} else {
- strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
- start, end);
+ ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
}
+ if (ret < 0)
+ goto out;
}
if (!first)
- strbuf_add(buf, "]>", 2);
-
+ ret = strbuf_add(buf, "]>", 2);
+out:
return ret;
}
#else
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index edcf4ed..f6fcc68 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -45,6 +45,7 @@
[PERF_RECORD_STAT] = "STAT",
[PERF_RECORD_STAT_ROUND] = "STAT_ROUND",
[PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
+ [PERF_RECORD_TIME_CONV] = "TIME_CONV",
};
const char *perf_event__name(unsigned int id)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6bb1c92..8d363d5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -233,6 +233,7 @@
PERF_RECORD_STAT = 76,
PERF_RECORD_STAT_ROUND = 77,
PERF_RECORD_EVENT_UPDATE = 78,
+ PERF_RECORD_TIME_CONV = 79,
PERF_RECORD_HEADER_MAX
};
@@ -469,6 +470,13 @@
u64 time;
};
+struct time_conv_event {
+ struct perf_event_header header;
+ u64 time_shift;
+ u64 time_mult;
+ u64 time_zero;
+};
+
union perf_event {
struct perf_event_header header;
struct mmap_event mmap;
@@ -497,6 +505,7 @@
struct stat_config_event stat_config;
struct stat_event stat;
struct stat_round_event stat_round;
+ struct time_conv_event time_conv;
};
void perf_event__print_totals(void);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 86a0383..c4bfe11 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -679,53 +679,52 @@
return NULL;
}
-union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *
+perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
+ u64 end, u64 *prev)
{
- struct perf_mmap *md = &evlist->mmap[idx];
- u64 head;
- u64 old = md->prev;
unsigned char *data = md->base + page_size;
union perf_event *event = NULL;
+ int diff = end - start;
- /*
- * Check if event was unmapped due to a POLLHUP/POLLERR.
- */
- if (!atomic_read(&md->refcnt))
- return NULL;
-
- head = perf_mmap__read_head(md);
- if (evlist->overwrite) {
+ if (check_messup) {
/*
* If we're further behind than half the buffer, there's a chance
* the writer will bite our tail and mess up the samples under us.
*
- * If we somehow ended up ahead of the head, we got messed up.
+ * If we somehow ended up ahead of the 'end', we got messed up.
*
- * In either case, truncate and restart at head.
+ * In either case, truncate and restart at 'end'.
*/
- int diff = head - old;
if (diff > md->mask / 2 || diff < 0) {
fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
/*
- * head points to a known good entry, start there.
+ * 'end' points to a known good entry, start there.
*/
- old = head;
+ start = end;
+ diff = 0;
}
}
- if (old != head) {
+ if (diff >= (int)sizeof(event->header)) {
size_t size;
- event = (union perf_event *)&data[old & md->mask];
+ event = (union perf_event *)&data[start & md->mask];
size = event->header.size;
+ if (size < sizeof(event->header) || diff < (int)size) {
+ event = NULL;
+ goto broken_event;
+ }
+
/*
* Event straddles the mmap boundary -- header should always
* be inside due to u64 alignment of output.
*/
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
+ if ((start & md->mask) + size != ((start + size) & md->mask)) {
+ unsigned int offset = start;
unsigned int len = min(sizeof(*event), size), cpy;
void *dst = md->event_copy;
@@ -740,14 +739,83 @@
event = (union perf_event *) md->event_copy;
}
- old += size;
+ start += size;
}
- md->prev = old;
+broken_event:
+ if (prev)
+ *prev = start;
return event;
}
+union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head;
+ u64 old = md->prev;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!atomic_read(&md->refcnt))
+ return NULL;
+
+ head = perf_mmap__read_head(md);
+
+ return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
+}
+
+union perf_event *
+perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head, end;
+ u64 start = md->prev;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!atomic_read(&md->refcnt))
+ return NULL;
+
+ head = perf_mmap__read_head(md);
+ if (!head)
+ return NULL;
+
+ /*
+ * 'head' pointer starts from 0. Kernel minus sizeof(record) form
+ * it each time when kernel writes to it, so in fact 'head' is
+ * negative. 'end' pointer is made manually by adding the size of
+ * the ring buffer to 'head' pointer, means the validate data can
+ * read is the whole ring buffer. If 'end' is positive, the ring
+ * buffer has not fully filled, so we must adjust 'end' to 0.
+ *
+ * However, since both 'head' and 'end' is unsigned, we can't
+ * simply compare 'end' against 0. Here we compare '-head' and
+ * the size of the ring buffer, where -head is the number of bytes
+ * kernel write to the ring buffer.
+ */
+ if (-head < (u64)(md->mask + 1))
+ end = 0;
+ else
+ end = head + md->mask + 1;
+
+ return perf_mmap__read(md, false, start, end, &md->prev);
+}
+
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
+{
+ struct perf_mmap *md = &evlist->mmap[idx];
+ u64 head;
+
+ if (!atomic_read(&md->refcnt))
+ return;
+
+ head = perf_mmap__read_head(md);
+ md->prev = head;
+}
+
static bool perf_mmap__empty(struct perf_mmap *md)
{
return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
@@ -986,26 +1054,34 @@
return -1;
}
+unsigned long perf_event_mlock_kb_in_pages(void)
+{
+ unsigned long pages;
+ int max;
+
+ if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+ /*
+ * Pick a once upon a time good value, i.e. things look
+ * strange since we can't read a sysctl value, but lets not
+ * die yet...
+ */
+ max = 512;
+ } else {
+ max -= (page_size / 1024);
+ }
+
+ pages = (max * 1024) / page_size;
+ if (!is_power_of_2(pages))
+ pages = rounddown_pow_of_two(pages);
+
+ return pages;
+}
+
static size_t perf_evlist__mmap_size(unsigned long pages)
{
- if (pages == UINT_MAX) {
- int max;
-
- if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
- /*
- * Pick a once upon a time good value, i.e. things look
- * strange since we can't read a sysctl value, but lets not
- * die yet...
- */
- max = 512;
- } else {
- max -= (page_size / 1024);
- }
-
- pages = (max * 1024) / page_size;
- if (!is_power_of_2(pages))
- pages = rounddown_pow_of_two(pages);
- } else if (!is_power_of_2(pages))
+ if (pages == UINT_MAX)
+ pages = perf_event_mlock_kb_in_pages();
+ else if (!is_power_of_2(pages))
return 0;
return (pages + 1) * page_size;
@@ -1192,6 +1268,24 @@
perf_evlist__propagate_maps(evlist);
}
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel)
+ __perf_evsel__set_sample_bit(evsel, bit);
+}
+
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel)
+ __perf_evsel__reset_sample_bit(evsel, bit);
+}
+
int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
{
struct perf_evsel *evsel;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a0d1522..85d1b598 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -87,6 +87,17 @@
int perf_evlist__add_newtp(struct perf_evlist *evlist,
const char *sys, const char *name, void *handler);
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit);
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit);
+
+#define perf_evlist__set_sample_bit(evlist, bit) \
+ __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+#define perf_evlist__reset_sample_bit(evlist, bit) \
+ __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
+
int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
@@ -118,16 +129,23 @@
union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx);
+union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist,
+ int idx);
+void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx);
+
void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
int perf_evlist__open(struct perf_evlist *evlist);
void perf_evlist__close(struct perf_evlist *evlist);
+struct callchain_param;
+
void perf_evlist__set_id_pos(struct perf_evlist *evlist);
bool perf_can_sample_identifier(void);
bool perf_can_record_switch_events(void);
bool perf_can_record_cpu_wide(void);
-void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts);
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+ struct callchain_param *callchain);
int record_opts__config(struct record_opts *opts);
int perf_evlist__prepare_workload(struct perf_evlist *evlist,
@@ -144,6 +162,8 @@
const char *str,
int unset);
+unsigned long perf_event_mlock_kb_in_pages(void);
+
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
bool overwrite, unsigned int auxtrace_pages,
bool auxtrace_overwrite);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 645dc18..964c7c3 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -226,7 +226,8 @@
perf_evsel__init(evsel, attr, idx);
if (perf_evsel__is_bpf_output(evsel)) {
- evsel->attr.sample_type |= PERF_SAMPLE_RAW;
+ evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
evsel->attr.sample_period = 1;
}
@@ -561,10 +562,9 @@
return ret;
}
-static void
-perf_evsel__config_callgraph(struct perf_evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *param)
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+ struct record_opts *opts,
+ struct callchain_param *param)
{
bool function = perf_evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->attr;
@@ -704,7 +704,7 @@
/* set perf-event callgraph */
if (param.enabled)
- perf_evsel__config_callgraph(evsel, opts, ¶m);
+ perf_evsel__config_callchain(evsel, opts, ¶m);
}
}
@@ -736,7 +736,8 @@
* enable/disable events specifically, as there's no
* initial traced exec call.
*/
-void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts)
+void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain)
{
struct perf_evsel *leader = evsel->leader;
struct perf_event_attr *attr = &evsel->attr;
@@ -811,8 +812,8 @@
if (perf_evsel__is_function_event(evsel))
evsel->attr.exclude_callchain_user = 1;
- if (callchain_param.enabled && !evsel->no_aux_samples)
- perf_evsel__config_callgraph(evsel, opts, &callchain_param);
+ if (callchain && callchain->enabled && !evsel->no_aux_samples)
+ perf_evsel__config_callchain(evsel, opts, callchain);
if (opts->sample_intr_regs) {
attr->sample_regs_intr = opts->sample_intr_regs;
@@ -1230,6 +1231,21 @@
__p_bits(buf, size, value, bits);
}
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+ bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+ bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+ bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+ bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
static void __p_read_format(char *buf, size_t size, u64 value)
{
#define bit_name(n) { PERF_FORMAT_##n, #n }
@@ -1248,6 +1264,7 @@
#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
#define PRINT_ATTRn(_n, _f, _p) \
@@ -1299,12 +1316,13 @@
PRINT_ATTRf(comm_exec, p_unsigned);
PRINT_ATTRf(use_clockid, p_unsigned);
PRINT_ATTRf(context_switch, p_unsigned);
+ PRINT_ATTRf(write_backward, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
- PRINT_ATTRf(branch_sample_type, p_unsigned);
+ PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
PRINT_ATTRf(sample_regs_user, p_hex);
PRINT_ATTRf(sample_stack_user, p_unsigned);
PRINT_ATTRf(clockid, p_signed);
@@ -2253,95 +2271,6 @@
return 0;
}
-static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
-{
- va_list args;
- int ret = 0;
-
- if (!*first) {
- ret += fprintf(fp, ",");
- } else {
- ret += fprintf(fp, ":");
- *first = false;
- }
-
- va_start(args, fmt);
- ret += vfprintf(fp, fmt, args);
- va_end(args);
- return ret;
-}
-
-static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
-{
- return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
-}
-
-int perf_evsel__fprintf(struct perf_evsel *evsel,
- struct perf_attr_details *details, FILE *fp)
-{
- bool first = true;
- int printed = 0;
-
- if (details->event_group) {
- struct perf_evsel *pos;
-
- if (!perf_evsel__is_group_leader(evsel))
- return 0;
-
- if (evsel->nr_members > 1)
- printed += fprintf(fp, "%s{", evsel->group_name ?: "");
-
- printed += fprintf(fp, "%s", perf_evsel__name(evsel));
- for_each_group_member(pos, evsel)
- printed += fprintf(fp, ",%s", perf_evsel__name(pos));
-
- if (evsel->nr_members > 1)
- printed += fprintf(fp, "}");
- goto out;
- }
-
- printed += fprintf(fp, "%s", perf_evsel__name(evsel));
-
- if (details->verbose) {
- printed += perf_event_attr__fprintf(fp, &evsel->attr,
- __print_attr__fprintf, &first);
- } else if (details->freq) {
- const char *term = "sample_freq";
-
- if (!evsel->attr.freq)
- term = "sample_period";
-
- printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
- term, (u64)evsel->attr.sample_freq);
- }
-
- if (details->trace_fields) {
- struct format_field *field;
-
- if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
- printed += comma_fprintf(fp, &first, " (not a tracepoint)");
- goto out;
- }
-
- field = evsel->tp_format->format.fields;
- if (field == NULL) {
- printed += comma_fprintf(fp, &first, " (no trace field)");
- goto out;
- }
-
- printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
-
- field = field->next;
- while (field) {
- printed += comma_fprintf(fp, &first, "%s", field->name);
- field = field->next;
- }
- }
-out:
- fputc('\n', fp);
- return ++printed;
-}
-
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{
@@ -2416,10 +2345,18 @@
"Probably the maximum number of open file descriptors has been reached.\n"
"Hint: Try again after reducing the number of events.\n"
"Hint: Try increasing the limit with 'ulimit -n <limit>'");
+ case ENOMEM:
+ if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 &&
+ access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+ return scnprintf(msg, size,
+ "Not enough memory to setup event with callchain.\n"
+ "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+ "Hint: Current value: %d", sysctl_perf_event_max_stack);
+ break;
case ENODEV:
if (target->cpu_list)
return scnprintf(msg, size, "%s",
- "No such device - did you specify an out-of-range profile CPU?\n");
+ "No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
if (evsel->attr.precise_ip)
@@ -2451,7 +2388,7 @@
return scnprintf(msg, size,
"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
"/bin/dmesg may provide additional information.\n"
- "No CONFIG_PERF_EVENTS=y kernel support configured?\n",
+ "No CONFIG_PERF_EVENTS=y kernel support configured?",
err, strerror_r(err, sbuf, sizeof(sbuf)),
perf_evsel__name(evsel));
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 501ea6e..8a644fe 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -178,8 +178,14 @@
void perf_evsel__exit(struct perf_evsel *evsel);
void perf_evsel__delete(struct perf_evsel *evsel);
+struct callchain_param;
+
void perf_evsel__config(struct perf_evsel *evsel,
- struct record_opts *opts);
+ struct record_opts *opts,
+ struct callchain_param *callchain);
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+ struct record_opts *opts,
+ struct callchain_param *callchain);
int __perf_evsel__sample_size(u64 sample_type);
void perf_evsel__calc_id_pos(struct perf_evsel *evsel);
@@ -381,6 +387,24 @@
int perf_evsel__fprintf(struct perf_evsel *evsel,
struct perf_attr_details *details, FILE *fp);
+#define EVSEL__PRINT_IP (1<<0)
+#define EVSEL__PRINT_SYM (1<<1)
+#define EVSEL__PRINT_DSO (1<<2)
+#define EVSEL__PRINT_SYMOFFSET (1<<3)
+#define EVSEL__PRINT_ONELINE (1<<4)
+#define EVSEL__PRINT_SRCLINE (1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6)
+
+struct callchain_cursor;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+ unsigned int print_opts,
+ struct callchain_cursor *cursor, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+ int left_alignment, unsigned int print_opts,
+ struct callchain_cursor *cursor, FILE *fp);
+
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize);
int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
@@ -396,7 +420,7 @@
(_evsel) && (_evsel)->leader == (_leader); \
(_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
-static inline bool has_branch_callstack(struct perf_evsel *evsel)
+static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel)
{
return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
new file mode 100644
index 0000000..3674e77
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.c
@@ -0,0 +1,212 @@
+#include <stdio.h>
+#include <stdbool.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "callchain.h"
+#include "map.h"
+#include "symbol.h"
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (!*first) {
+ ret += fprintf(fp, ",");
+ } else {
+ ret += fprintf(fp, ":");
+ *first = false;
+ }
+
+ va_start(args, fmt);
+ ret += vfprintf(fp, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+ return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+ struct perf_attr_details *details, FILE *fp)
+{
+ bool first = true;
+ int printed = 0;
+
+ if (details->event_group) {
+ struct perf_evsel *pos;
+
+ if (!perf_evsel__is_group_leader(evsel))
+ return 0;
+
+ if (evsel->nr_members > 1)
+ printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+ printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ for_each_group_member(pos, evsel)
+ printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+
+ if (evsel->nr_members > 1)
+ printed += fprintf(fp, "}");
+ goto out;
+ }
+
+ printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+
+ if (details->verbose) {
+ printed += perf_event_attr__fprintf(fp, &evsel->attr,
+ __print_attr__fprintf, &first);
+ } else if (details->freq) {
+ const char *term = "sample_freq";
+
+ if (!evsel->attr.freq)
+ term = "sample_period";
+
+ printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+ term, (u64)evsel->attr.sample_freq);
+ }
+
+ if (details->trace_fields) {
+ struct format_field *field;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+ goto out;
+ }
+
+ field = evsel->tp_format->format.fields;
+ if (field == NULL) {
+ printed += comma_fprintf(fp, &first, " (no trace field)");
+ goto out;
+ }
+
+ printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+ field = field->next;
+ while (field) {
+ printed += comma_fprintf(fp, &first, "%s", field->name);
+ field = field->next;
+ }
+ }
+out:
+ fputc('\n', fp);
+ return ++printed;
+}
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+ unsigned int print_opts, struct callchain_cursor *cursor,
+ FILE *fp)
+{
+ int printed = 0;
+ struct callchain_cursor_node *node;
+ int print_ip = print_opts & EVSEL__PRINT_IP;
+ int print_sym = print_opts & EVSEL__PRINT_SYM;
+ int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+ int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+ int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+ int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+ char s = print_oneline ? ' ' : '\t';
+
+ if (sample->callchain) {
+ struct addr_location node_al;
+
+ callchain_cursor_commit(cursor);
+
+ while (1) {
+ u64 addr = 0;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ if (node->sym && node->sym->ignore)
+ goto next;
+
+ printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+ if (print_ip)
+ printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+
+ if (node->map)
+ addr = node->map->map_ip(node->map, node->ip);
+
+ if (print_sym) {
+ printed += fprintf(fp, " ");
+ node_al.addr = addr;
+ node_al.map = node->map;
+
+ if (print_symoffset) {
+ printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
+ print_unknown_as_addr, fp);
+ } else {
+ printed += __symbol__fprintf_symname(node->sym, &node_al,
+ print_unknown_as_addr, fp);
+ }
+ }
+
+ if (print_dso) {
+ printed += fprintf(fp, " (");
+ printed += map__fprintf_dsoname(node->map, fp);
+ printed += fprintf(fp, ")");
+ }
+
+ if (print_srcline)
+ printed += map__fprintf_srcline(node->map, addr, "\n ", fp);
+
+ if (!print_oneline)
+ printed += fprintf(fp, "\n");
+next:
+ callchain_cursor_advance(cursor);
+ }
+ }
+
+ return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+ int left_alignment, unsigned int print_opts,
+ struct callchain_cursor *cursor, FILE *fp)
+{
+ int printed = 0;
+ int print_ip = print_opts & EVSEL__PRINT_IP;
+ int print_sym = print_opts & EVSEL__PRINT_SYM;
+ int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+ int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+ int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+ if (cursor != NULL) {
+ printed += sample__fprintf_callchain(sample, left_alignment,
+ print_opts, cursor, fp);
+ } else if (!(al->sym && al->sym->ignore)) {
+ printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+ if (print_ip)
+ printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+ if (print_sym) {
+ printed += fprintf(fp, " ");
+ if (print_symoffset) {
+ printed += __symbol__fprintf_symname_offs(al->sym, al,
+ print_unknown_as_addr, fp);
+ } else {
+ printed += __symbol__fprintf_symname(al->sym, al,
+ print_unknown_as_addr, fp);
+ }
+ }
+
+ if (print_dso) {
+ printed += fprintf(fp, " (");
+ printed += map__fprintf_dsoname(al->map, fp);
+ printed += fprintf(fp, ")");
+ }
+
+ if (print_srcline)
+ printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
+ }
+
+ return printed;
+}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 90680ec..08852dd 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1474,7 +1474,7 @@
dso = machine__findnew_dso(machine, filename);
if (dso != NULL) {
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
dso__set_build_id(dso, &bev->build_id);
@@ -1819,7 +1819,8 @@
ph->env.nr_sibling_cores = nr;
size += sizeof(u32);
- strbuf_init(&sb, 128);
+ if (strbuf_init(&sb, 128) < 0)
+ goto free_cpu;
for (i = 0; i < nr; i++) {
str = do_read_string(fd, ph);
@@ -1827,7 +1828,8 @@
goto error;
/* include a NULL character at the end */
- strbuf_add(&sb, str, strlen(str) + 1);
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
size += string_size(str);
free(str);
}
@@ -1849,7 +1851,8 @@
goto error;
/* include a NULL character at the end */
- strbuf_add(&sb, str, strlen(str) + 1);
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
size += string_size(str);
free(str);
}
@@ -1912,13 +1915,14 @@
/* nr nodes */
ret = readn(fd, &nr, sizeof(nr));
if (ret != sizeof(nr))
- goto error;
+ return -1;
if (ph->needs_swap)
nr = bswap_32(nr);
ph->env.nr_numa_nodes = nr;
- strbuf_init(&sb, 256);
+ if (strbuf_init(&sb, 256) < 0)
+ return -1;
for (i = 0; i < nr; i++) {
/* node number */
@@ -1940,15 +1944,17 @@
mem_free = bswap_64(mem_free);
}
- strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":",
- node, mem_total, mem_free);
+ if (strbuf_addf(&sb, "%u:%"PRIu64":%"PRIu64":",
+ node, mem_total, mem_free) < 0)
+ goto error;
str = do_read_string(fd, ph);
if (!str)
goto error;
/* include a NULL character at the end */
- strbuf_add(&sb, str, strlen(str) + 1);
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
free(str);
}
ph->env.numa_nodes = strbuf_detach(&sb, NULL);
@@ -1982,7 +1988,8 @@
}
ph->env.nr_pmu_mappings = pmu_num;
- strbuf_init(&sb, 128);
+ if (strbuf_init(&sb, 128) < 0)
+ return -1;
while (pmu_num) {
if (readn(fd, &type, sizeof(type)) != sizeof(type))
@@ -1994,9 +2001,11 @@
if (!name)
goto error;
- strbuf_addf(&sb, "%u:%s", type, name);
+ if (strbuf_addf(&sb, "%u:%s", type, name) < 0)
+ goto error;
/* include a NULL character at the end */
- strbuf_add(&sb, "", 1);
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto error;
if (!strcmp(name, "msr"))
ph->env.msr_pmu_type = type;
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
index 43a98a4..d62ccae 100644
--- a/tools/perf/util/help-unknown-cmd.c
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -27,16 +27,27 @@
return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
}
-static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
{
- unsigned int i;
+ unsigned int i, nr = cmds->cnt + old->cnt;
+ void *tmp;
- ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc);
-
+ if (nr > cmds->alloc) {
+ /* Choose bigger one to alloc */
+ if (alloc_nr(cmds->alloc) < nr)
+ cmds->alloc = nr;
+ else
+ cmds->alloc = alloc_nr(cmds->alloc);
+ tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names));
+ if (!tmp)
+ return -1;
+ cmds->names = tmp;
+ }
for (i = 0; i < old->cnt; i++)
cmds->names[cmds->cnt++] = old->names[i];
zfree(&old->names);
old->cnt = 0;
+ return 0;
}
const char *help_unknown_cmd(const char *cmd)
@@ -52,8 +63,11 @@
load_command_list("perf-", &main_cmds, &other_cmds);
- add_cmd_list(&main_cmds, &aliases);
- add_cmd_list(&main_cmds, &other_cmds);
+ if (add_cmd_list(&main_cmds, &aliases) < 0 ||
+ add_cmd_list(&main_cmds, &other_cmds) < 0) {
+ fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
+ goto end;
+ }
qsort(main_cmds.names, main_cmds.cnt,
sizeof(main_cmds.names), cmdname_compare);
uniq(&main_cmds);
@@ -99,6 +113,6 @@
for (i = 0; i < n; i++)
fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
}
-
+end:
exit(1);
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 31c4641..cfab531 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -295,7 +295,7 @@
root_in = &he->parent_he->hroot_in;
root_out = &he->parent_he->hroot_out;
} else {
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root_in = &hists->entries_collapsed;
else
root_in = hists->entries_in;
@@ -953,7 +953,7 @@
{
int err, err2;
- err = sample__resolve_callchain(iter->sample, &iter->parent,
+ err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
iter->evsel, al, max_stack_depth);
if (err)
return err;
@@ -1295,8 +1295,9 @@
return ret;
}
-int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root,
- struct hist_entry *he)
+static int hists__collapse_insert_entry(struct hists *hists,
+ struct rb_root *root,
+ struct hist_entry *he)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -1372,7 +1373,7 @@
struct hist_entry *n;
int ret;
- if (!sort__need_collapse)
+ if (!hists__has(hists, need_collapse))
return 0;
hists->nr_entries = 0;
@@ -1631,7 +1632,7 @@
return;
}
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -2035,7 +2036,7 @@
struct hist_entry *he;
int64_t cmp;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
@@ -2061,6 +2062,8 @@
if (he) {
memset(&he->stat, 0, sizeof(he->stat));
he->hists = hists;
+ if (symbol_conf.cumulate_callchain)
+ memset(he->stat_acc, 0, sizeof(he->stat));
rb_link_node(&he->rb_node_in, parent, p);
rb_insert_color(&he->rb_node_in, root);
hists__inc_stats(hists, he);
@@ -2075,7 +2078,7 @@
{
struct rb_node *n;
- if (sort__need_collapse)
+ if (hists__has(hists, need_collapse))
n = hists->entries_collapsed.rb_node;
else
n = hists->entries_in->rb_node;
@@ -2104,7 +2107,7 @@
struct rb_node *nd;
struct hist_entry *pos, *pair;
- if (sort__need_collapse)
+ if (hists__has(leader, need_collapse))
root = &leader->entries_collapsed;
else
root = leader->entries_in;
@@ -2129,7 +2132,7 @@
struct rb_node *nd;
struct hist_entry *pos, *pair;
- if (sort__need_collapse)
+ if (hists__has(other, need_collapse))
root = &other->entries_collapsed;
else
root = other->entries_in;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index bec0cd6..0f84bfb 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -82,6 +82,8 @@
int nr_hpp_node;
};
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
struct hist_entry_iter;
struct hist_iter_ops {
@@ -199,8 +201,6 @@
int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
-int hists__collapse_insert_entry(struct hists *hists,
- struct rb_root *root, struct hist_entry *he);
struct perf_hpp {
char *buf;
@@ -240,6 +240,14 @@
struct perf_hpp_list {
struct list_head fields;
struct list_head sorts;
+
+ int need_collapse;
+ int parent;
+ int sym;
+ int dso;
+ int socket;
+ int thread;
+ int comm;
};
extern struct perf_hpp_list perf_hpp_list;
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index abf1366..9df9960 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -66,6 +66,7 @@
u64 branches_id;
size_t branches_event_size;
bool synth_needs_swap;
+ unsigned long num_events;
};
struct intel_bts_queue {
@@ -275,6 +276,10 @@
union perf_event event;
struct perf_sample sample = { .ip = 0, };
+ if (bts->synth_opts.initial_skip &&
+ bts->num_events++ <= bts->synth_opts.initial_skip)
+ return 0;
+
event.sample.header.type = PERF_RECORD_SAMPLE;
event.sample.header.misc = PERF_RECORD_MISC_USER;
event.sample.header.size = sizeof(struct perf_event_header);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9409d01..9c8f15d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -356,7 +356,7 @@
int intel_pt__strerror(int code, char *buf, size_t buflen)
{
- if (code < 1 || code > INTEL_PT_ERR_MAX)
+ if (code < 1 || code >= INTEL_PT_ERR_MAX)
code = INTEL_PT_ERR_UNK;
strlcpy(buf, intel_pt_err_msgs[code], buflen);
return 0;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 6175784..1371969 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -100,6 +100,8 @@
u64 cyc_bit;
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;
+
+ unsigned long num_events;
};
enum switch_state {
@@ -972,6 +974,10 @@
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
+ if (pt->synth_opts.initial_skip &&
+ pt->num_events++ < pt->synth_opts.initial_skip)
+ return 0;
+
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1029,6 +1035,10 @@
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
+ if (pt->synth_opts.initial_skip &&
+ pt->num_events++ < pt->synth_opts.initial_skip)
+ return 0;
+
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1087,6 +1097,10 @@
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
+ if (pt->synth_opts.initial_skip &&
+ pt->num_events++ < pt->synth_opts.initial_skip)
+ return 0;
+
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1199,14 +1213,18 @@
ptq->have_sample = false;
if (pt->sample_instructions &&
- (state->type & INTEL_PT_INSTRUCTION)) {
+ (state->type & INTEL_PT_INSTRUCTION) &&
+ (!pt->synth_opts.initial_skip ||
+ pt->num_events++ >= pt->synth_opts.initial_skip)) {
err = intel_pt_synth_instruction_sample(ptq);
if (err)
return err;
}
if (pt->sample_transactions &&
- (state->type & INTEL_PT_TRANSACTION)) {
+ (state->type & INTEL_PT_TRANSACTION) &&
+ (!pt->synth_opts.initial_skip ||
+ pt->num_events++ >= pt->synth_opts.initial_skip)) {
err = intel_pt_synth_transaction_sample(ptq);
if (err)
return err;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index ad0c0bb..86afe96 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -17,6 +17,7 @@
#include "strlist.h"
#include <elf.h>
+#include "tsc.h"
#include "session.h"
#include "jit.h"
#include "jitdump.h"
@@ -33,6 +34,7 @@
size_t bufsize;
FILE *in;
bool needs_bswap; /* handles cross-endianess */
+ bool use_arch_timestamp;
void *debug_data;
size_t nr_debug_entries;
uint32_t code_load_count;
@@ -158,13 +160,16 @@
header.flags = bswap_64(header.flags);
}
+ jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
if (verbose > 2)
- pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n",
+ pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
header.version,
header.total_size,
(unsigned long long)header.timestamp,
header.pid,
- header.elf_mach);
+ header.elf_mach,
+ jd->use_arch_timestamp);
if (header.flags & JITDUMP_FLAGS_RESERVED) {
pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
@@ -172,10 +177,15 @@
goto error;
}
+ if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+ pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+ goto error;
+ }
+
/*
* validate event is using the correct clockid
*/
- if (jit_validate_events(jd->session)) {
+ if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
pr_err("error, jitted code must be sampled with perf record -k 1\n");
goto error;
}
@@ -329,6 +339,23 @@
return 0;
}
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+ struct perf_tsc_conversion tc;
+
+ if (!jd->use_arch_timestamp)
+ return timestamp;
+
+ tc.time_shift = jd->session->time_conv.time_shift;
+ tc.time_mult = jd->session->time_conv.time_mult;
+ tc.time_zero = jd->session->time_conv.time_zero;
+
+ if (!tc.time_mult)
+ return 0;
+
+ return tsc_to_perf_time(timestamp, &tc);
+}
+
static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
{
struct perf_sample sample;
@@ -385,7 +412,7 @@
return -1;
}
if (stat(filename, &st))
- memset(&st, 0, sizeof(stat));
+ memset(&st, 0, sizeof(st));
event->mmap2.header.type = PERF_RECORD_MMAP2;
event->mmap2.header.misc = PERF_RECORD_MISC_USER;
@@ -410,7 +437,7 @@
id->tid = tid;
}
if (jd->sample_type & PERF_SAMPLE_TIME)
- id->time = jr->load.p.timestamp;
+ id->time = convert_timestamp(jd, jr->load.p.timestamp);
/*
* create pseudo sample to induce dso hit increment
@@ -473,7 +500,7 @@
size++; /* for \0 */
if (stat(filename, &st))
- memset(&st, 0, sizeof(stat));
+ memset(&st, 0, sizeof(st));
size = PERF_ALIGN(size, sizeof(u64));
@@ -499,7 +526,7 @@
id->tid = tid;
}
if (jd->sample_type & PERF_SAMPLE_TIME)
- id->time = jr->load.p.timestamp;
+ id->time = convert_timestamp(jd, jr->load.p.timestamp);
/*
* create pseudo sample to induce dso hit increment
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
index b66c1f5..bcacd20 100644
--- a/tools/perf/util/jitdump.h
+++ b/tools/perf/util/jitdump.h
@@ -23,9 +23,12 @@
#define JITHEADER_VERSION 1
enum jitdump_flags_bits {
+ JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
JITDUMP_FLAGS_MAX_BIT,
};
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
(~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 80b9b6a..639a290 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -32,6 +32,7 @@
machine->threads = RB_ROOT;
pthread_rwlock_init(&machine->threads_lock, NULL);
+ machine->nr_threads = 0;
INIT_LIST_HEAD(&machine->dead_threads);
machine->last_match = NULL;
@@ -430,6 +431,7 @@
*/
thread__get(th);
machine->last_match = th;
+ ++machine->nr_threads;
}
return th;
@@ -681,11 +683,13 @@
size_t machine__fprintf(struct machine *machine, FILE *fp)
{
- size_t ret = 0;
+ size_t ret;
struct rb_node *nd;
pthread_rwlock_rdlock(&machine->threads_lock);
+ ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+
for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
@@ -908,11 +912,11 @@
return machine__create_kernel_maps(machine);
}
-int machine__load_kallsyms(struct machine *machine, const char *filename,
- enum map_type type, symbol_filter_t filter)
+int __machine__load_kallsyms(struct machine *machine, const char *filename,
+ enum map_type type, bool no_kcore, symbol_filter_t filter)
{
struct map *map = machine__kernel_map(machine);
- int ret = dso__load_kallsyms(map->dso, filename, map, filter);
+ int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter);
if (ret > 0) {
dso__set_loaded(map->dso, type);
@@ -927,6 +931,12 @@
return ret;
}
+int machine__load_kallsyms(struct machine *machine, const char *filename,
+ enum map_type type, symbol_filter_t filter)
+{
+ return __machine__load_kallsyms(machine, filename, type, false, filter);
+}
+
int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
symbol_filter_t filter)
{
@@ -1413,6 +1423,7 @@
pthread_rwlock_wrlock(&machine->threads_lock);
rb_erase_init(&th->rb_node, &machine->threads);
RB_CLEAR_NODE(&th->rb_node);
+ --machine->nr_threads;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
@@ -1599,6 +1610,7 @@
}
static int add_callchain_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
struct symbol **parent,
struct addr_location *root_al,
u8 *cpumode,
@@ -1630,7 +1642,7 @@
* It seems the callchain is corrupted.
* Discard all.
*/
- callchain_cursor_reset(&callchain_cursor);
+ callchain_cursor_reset(cursor);
return 1;
}
return 0;
@@ -1640,7 +1652,7 @@
}
if (al.sym != NULL) {
- if (sort__has_parent && !*parent &&
+ if (perf_hpp_list.parent && !*parent &&
symbol__match_regex(al.sym, &parent_regex))
*parent = al.sym;
else if (have_ignore_callees && root_al &&
@@ -1648,13 +1660,13 @@
/* Treat this symbol as the root,
forgetting its callees. */
*root_al = al;
- callchain_cursor_reset(&callchain_cursor);
+ callchain_cursor_reset(cursor);
}
}
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
- return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
+ return callchain_cursor_append(cursor, al.addr, al.map, al.sym);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1724,6 +1736,7 @@
* negative error code on other errors.
*/
static int resolve_lbr_callchain_sample(struct thread *thread,
+ struct callchain_cursor *cursor,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
@@ -1756,7 +1769,7 @@
*/
int mix_chain_nr = i + 1 + lbr_nr + 1;
- if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) {
+ if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}
@@ -1778,7 +1791,7 @@
ip = lbr_stack->entries[0].to;
}
- err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+ err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
if (err)
return (err < 0) ? err : 0;
}
@@ -1789,6 +1802,7 @@
}
static int thread__resolve_callchain_sample(struct thread *thread,
+ struct callchain_cursor *cursor,
struct perf_evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
@@ -1803,10 +1817,8 @@
int skip_idx = -1;
int first_call = 0;
- callchain_cursor_reset(&callchain_cursor);
-
- if (has_branch_callstack(evsel)) {
- err = resolve_lbr_callchain_sample(thread, sample, parent,
+ if (perf_evsel__has_branch_callstack(evsel)) {
+ err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
root_al, max_stack);
if (err)
return (err < 0) ? err : 0;
@@ -1816,7 +1828,7 @@
* Based on DWARF debug information, some architectures skip
* a callchain entry saved by the kernel.
*/
- if (chain->nr < PERF_MAX_STACK_DEPTH)
+ if (chain->nr < sysctl_perf_event_max_stack)
skip_idx = arch_skip_callchain_idx(thread, chain);
/*
@@ -1863,10 +1875,10 @@
nr = remove_loops(be, nr);
for (i = 0; i < nr; i++) {
- err = add_callchain_ip(thread, parent, root_al,
+ err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].to);
if (!err)
- err = add_callchain_ip(thread, parent, root_al,
+ err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from);
if (err == -EINVAL)
break;
@@ -1877,7 +1889,7 @@
}
check_calls:
- if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) {
+ if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) {
pr_warning("corrupted callchain. skipping...\n");
return 0;
}
@@ -1896,7 +1908,7 @@
#endif
ip = chain->ips[j];
- err = add_callchain_ip(thread, parent, root_al, &cpumode, ip);
+ err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip);
if (err)
return (err < 0) ? err : 0;
@@ -1915,19 +1927,12 @@
entry->map, entry->sym);
}
-int thread__resolve_callchain(struct thread *thread,
- struct perf_evsel *evsel,
- struct perf_sample *sample,
- struct symbol **parent,
- struct addr_location *root_al,
- int max_stack)
+static int thread__resolve_callchain_unwind(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ int max_stack)
{
- int ret = thread__resolve_callchain_sample(thread, evsel,
- sample, parent,
- root_al, max_stack);
- if (ret)
- return ret;
-
/* Can we do dwarf post unwind? */
if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
(evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
@@ -1938,9 +1943,45 @@
(!sample->user_stack.size))
return 0;
- return unwind__get_entries(unwind_entry, &callchain_cursor,
+ return unwind__get_entries(unwind_entry, cursor,
thread, sample, max_stack);
+}
+int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ int ret = 0;
+
+ callchain_cursor_reset(&callchain_cursor);
+
+ if (callchain_param.order == ORDER_CALLEE) {
+ ret = thread__resolve_callchain_sample(thread, cursor,
+ evsel, sample,
+ parent, root_al,
+ max_stack);
+ if (ret)
+ return ret;
+ ret = thread__resolve_callchain_unwind(thread, cursor,
+ evsel, sample,
+ max_stack);
+ } else {
+ ret = thread__resolve_callchain_unwind(thread, cursor,
+ evsel, sample,
+ max_stack);
+ if (ret)
+ return ret;
+ ret = thread__resolve_callchain_sample(thread, cursor,
+ evsel, sample,
+ parent, root_al,
+ max_stack);
+ }
+
+ return ret;
}
int machine__for_each_thread(struct machine *machine,
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 8499db2..83f4679 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -31,6 +31,7 @@
char *root_dir;
struct rb_root threads;
pthread_rwlock_t threads_lock;
+ unsigned int nr_threads;
struct list_head dead_threads;
struct thread *last_match;
struct vdso_info *vdso_info;
@@ -141,7 +142,11 @@
struct addr_location *al);
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
struct addr_location *al);
+
+struct callchain_cursor;
+
int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
struct perf_evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
@@ -211,6 +216,8 @@
struct map *machine__findnew_module_map(struct machine *machine, u64 start,
const char *filename);
+int __machine__load_kallsyms(struct machine *machine, const char *filename,
+ enum map_type type, bool no_kcore, symbol_filter_t filter);
int machine__load_kallsyms(struct machine *machine, const char *filename,
enum map_type type, symbol_filter_t filter);
int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 171b6d1..b19bcd3 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -289,7 +289,7 @@
nr = dso__load(map->dso, map, filter);
if (nr < 0) {
if (map->dso->has_build_id) {
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
build_id__sprintf(map->dso->build_id,
sizeof(map->dso->build_id),
@@ -431,6 +431,13 @@
if (map->dso->rel)
return rip - map->pgoff;
+ /*
+ * kernel modules also have DSO_TYPE_USER in dso->kernel,
+ * but all kernel modules are ET_REL, so won't get here.
+ */
+ if (map->dso->kernel == DSO_TYPE_USER)
+ return rip + map->dso->text_offset;
+
return map->unmap_ip(map, rip) - map->reloc;
}
@@ -454,6 +461,13 @@
if (map->dso->rel)
return map->unmap_ip(map, ip + map->pgoff);
+ /*
+ * kernel modules also have DSO_TYPE_USER in dso->kernel,
+ * but all kernel modules are ET_REL, so won't get here.
+ */
+ if (map->dso->kernel == DSO_TYPE_USER)
+ return map->unmap_ip(map, ip - map->dso->text_offset);
+
return ip + map->reloc;
}
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index b1b9e23..fe84df1 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -308,3 +308,12 @@
free(event);
}
}
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+ ordered_events__deliver_t old_deliver = oe->deliver;
+
+ ordered_events__free(oe);
+ memset(oe, '\0', sizeof(*oe));
+ ordered_events__init(oe, old_deliver);
+}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index f403991..e11468a 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -49,6 +49,7 @@
int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
static inline
void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index adef23b..ddb0261 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -602,14 +602,13 @@
static __u64 pmu_format_max_value(const unsigned long *format)
{
- int w;
+ __u64 w = 0;
+ int fbit;
- w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
- if (!w)
- return 0;
- if (w < 64)
- return (1ULL << w) - 1;
- return -1;
+ for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS)
+ w |= (1ULL << fbit);
+
+ return w;
}
/*
@@ -644,20 +643,20 @@
static char *pmu_formats_string(struct list_head *formats)
{
struct perf_pmu_format *format;
- char *str;
- struct strbuf buf;
+ char *str = NULL;
+ struct strbuf buf = STRBUF_INIT;
unsigned i = 0;
if (!formats)
return NULL;
- strbuf_init(&buf, 0);
/* sysfs exported terms */
list_for_each_entry(format, formats, list)
- strbuf_addf(&buf, i++ ? ",%s" : "%s",
- format->name);
+ if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0)
+ goto error;
str = strbuf_detach(&buf, NULL);
+error:
strbuf_release(&buf);
return str;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 8319fbb..74401a2 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -265,6 +265,65 @@
return true;
}
+/*
+ * NOTE:
+ * '.gnu.linkonce.this_module' section of kernel module elf directly
+ * maps to 'struct module' from linux/module.h. This section contains
+ * actual module name which will be used by kernel after loading it.
+ * But, we cannot use 'struct module' here since linux/module.h is not
+ * exposed to user-space. Offset of 'name' has remained same from long
+ * time, so hardcoding it here.
+ */
+#ifdef __LP64__
+#define MOD_NAME_OFFSET 24
+#else
+#define MOD_NAME_OFFSET 12
+#endif
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+ int fd;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ char *mod_name = NULL;
+
+ fd = open(module, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ goto elf_err;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto ret_err;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".gnu.linkonce.this_module", NULL);
+ if (!sec)
+ goto ret_err;
+
+ data = elf_getdata(sec, NULL);
+ if (!data || !data->d_buf)
+ goto ret_err;
+
+ mod_name = strdup((char *)data->d_buf + MOD_NAME_OFFSET);
+
+ret_err:
+ elf_end(elf);
+elf_err:
+ close(fd);
+ return mod_name;
+}
+
#ifdef HAVE_DWARF_SUPPORT
static int kernel_get_module_dso(const char *module, struct dso **pdso)
@@ -486,8 +545,10 @@
return -errno;
elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
- if (elf == NULL)
- return -EINVAL;
+ if (elf == NULL) {
+ ret = -EINVAL;
+ goto out_close;
+ }
if (gelf_getehdr(elf, &ehdr) == NULL)
goto out;
@@ -499,6 +560,9 @@
ret = 0;
out:
elf_end(elf);
+out_close:
+ close(fd);
+
return ret;
}
@@ -583,32 +647,23 @@
int ntevs, const char *module)
{
int i, ret = 0;
- char *tmp;
+ char *mod_name = NULL;
if (!module)
return 0;
- tmp = strrchr(module, '/');
- if (tmp) {
- /* This is a module path -- get the module name */
- module = strdup(tmp + 1);
- if (!module)
- return -ENOMEM;
- tmp = strchr(module, '.');
- if (tmp)
- *tmp = '\0';
- tmp = (char *)module; /* For free() */
- }
+ mod_name = find_module_name(module);
for (i = 0; i < ntevs; i++) {
- tevs[i].point.module = strdup(module);
+ tevs[i].point.module =
+ strdup(mod_name ? mod_name : module);
if (!tevs[i].point.module) {
ret = -ENOMEM;
break;
}
}
- free(tmp);
+ free(mod_name);
return ret;
}
@@ -1618,69 +1673,65 @@
}
/* Compose only probe arg */
-int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
{
struct perf_probe_arg_field *field = pa->field;
- int ret;
- char *tmp = buf;
+ struct strbuf buf;
+ char *ret = NULL;
+ int err;
+
+ if (strbuf_init(&buf, 64) < 0)
+ return NULL;
if (pa->name && pa->var)
- ret = e_snprintf(tmp, len, "%s=%s", pa->name, pa->var);
+ err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
else
- ret = e_snprintf(tmp, len, "%s", pa->name ? pa->name : pa->var);
- if (ret <= 0)
- goto error;
- tmp += ret;
- len -= ret;
+ err = strbuf_addstr(&buf, pa->name ?: pa->var);
+ if (err)
+ goto out;
while (field) {
if (field->name[0] == '[')
- ret = e_snprintf(tmp, len, "%s", field->name);
+ err = strbuf_addstr(&buf, field->name);
else
- ret = e_snprintf(tmp, len, "%s%s",
- field->ref ? "->" : ".", field->name);
- if (ret <= 0)
- goto error;
- tmp += ret;
- len -= ret;
+ err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+ field->name);
field = field->next;
+ if (err)
+ goto out;
}
- if (pa->type) {
- ret = e_snprintf(tmp, len, ":%s", pa->type);
- if (ret <= 0)
- goto error;
- tmp += ret;
- len -= ret;
- }
+ if (pa->type)
+ if (strbuf_addf(&buf, ":%s", pa->type) < 0)
+ goto out;
- return tmp - buf;
-error:
- pr_debug("Failed to synthesize perf probe argument: %d\n", ret);
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
return ret;
}
/* Compose only probe point (not argument) */
static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
{
- char *buf, *tmp;
- char offs[32] = "", line[32] = "", file[32] = "";
- int ret, len;
+ struct strbuf buf;
+ char *tmp, *ret = NULL;
+ int len, err = 0;
- buf = zalloc(MAX_CMDLEN);
- if (buf == NULL) {
- ret = -ENOMEM;
- goto error;
- }
- if (pp->offset) {
- ret = e_snprintf(offs, 32, "+%lu", pp->offset);
- if (ret <= 0)
- goto error;
- }
- if (pp->line) {
- ret = e_snprintf(line, 32, ":%d", pp->line);
- if (ret <= 0)
- goto error;
+ if (strbuf_init(&buf, 64) < 0)
+ return NULL;
+
+ if (pp->function) {
+ if (strbuf_addstr(&buf, pp->function) < 0)
+ goto out;
+ if (pp->offset)
+ err = strbuf_addf(&buf, "+%lu", pp->offset);
+ else if (pp->line)
+ err = strbuf_addf(&buf, ":%d", pp->line);
+ else if (pp->retprobe)
+ err = strbuf_addstr(&buf, "%return");
+ if (err)
+ goto out;
}
if (pp->file) {
tmp = pp->file;
@@ -1689,25 +1740,15 @@
tmp = strchr(pp->file + len - 30, '/');
tmp = tmp ? tmp + 1 : pp->file + len - 30;
}
- ret = e_snprintf(file, 32, "@%s", tmp);
- if (ret <= 0)
- goto error;
+ err = strbuf_addf(&buf, "@%s", tmp);
+ if (!err && !pp->function && pp->line)
+ err = strbuf_addf(&buf, ":%d", pp->line);
}
-
- if (pp->function)
- ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function,
- offs, pp->retprobe ? "%return" : "", line,
- file);
- else
- ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line);
- if (ret <= 0)
- goto error;
-
- return buf;
-error:
- pr_debug("Failed to synthesize perf probe point: %d\n", ret);
- free(buf);
- return NULL;
+ if (!err)
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
}
#if 0
@@ -1736,45 +1777,32 @@
#endif
static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
- char **buf, size_t *buflen,
- int depth)
+ struct strbuf *buf, int depth)
{
- int ret;
+ int err;
if (ref->next) {
depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
- buflen, depth + 1);
+ depth + 1);
if (depth < 0)
- goto out;
+ return depth;
}
-
- ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset);
- if (ret < 0)
- depth = ret;
- else {
- *buf += ret;
- *buflen -= ret;
- }
-out:
- return depth;
-
+ err = strbuf_addf(buf, "%+ld(", ref->offset);
+ return (err < 0) ? err : depth;
}
static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
- char *buf, size_t buflen)
+ struct strbuf *buf)
{
struct probe_trace_arg_ref *ref = arg->ref;
- int ret, depth = 0;
- char *tmp = buf;
+ int depth = 0, err;
/* Argument name or separator */
if (arg->name)
- ret = e_snprintf(buf, buflen, " %s=", arg->name);
+ err = strbuf_addf(buf, " %s=", arg->name);
else
- ret = e_snprintf(buf, buflen, " ");
- if (ret < 0)
- return ret;
- buf += ret;
- buflen -= ret;
+ err = strbuf_addch(buf, ' ');
+ if (err)
+ return err;
/* Special case: @XXX */
if (arg->value[0] == '@' && arg->ref)
@@ -1782,59 +1810,44 @@
/* Dereferencing arguments */
if (ref) {
- depth = __synthesize_probe_trace_arg_ref(ref, &buf,
- &buflen, 1);
+ depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
if (depth < 0)
return depth;
}
/* Print argument value */
if (arg->value[0] == '@' && arg->ref)
- ret = e_snprintf(buf, buflen, "%s%+ld", arg->value,
- arg->ref->offset);
+ err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
else
- ret = e_snprintf(buf, buflen, "%s", arg->value);
- if (ret < 0)
- return ret;
- buf += ret;
- buflen -= ret;
+ err = strbuf_addstr(buf, arg->value);
/* Closing */
- while (depth--) {
- ret = e_snprintf(buf, buflen, ")");
- if (ret < 0)
- return ret;
- buf += ret;
- buflen -= ret;
- }
- /* Print argument type */
- if (arg->type) {
- ret = e_snprintf(buf, buflen, ":%s", arg->type);
- if (ret <= 0)
- return ret;
- buf += ret;
- }
+ while (!err && depth--)
+ err = strbuf_addch(buf, ')');
- return buf - tmp;
+ /* Print argument type */
+ if (!err && arg->type)
+ err = strbuf_addf(buf, ":%s", arg->type);
+
+ return err;
}
char *synthesize_probe_trace_command(struct probe_trace_event *tev)
{
struct probe_trace_point *tp = &tev->point;
- char *buf;
- int i, len, ret;
-
- buf = zalloc(MAX_CMDLEN);
- if (buf == NULL)
- return NULL;
-
- len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
- tev->group, tev->event);
- if (len <= 0)
- goto error;
+ struct strbuf buf;
+ char *ret = NULL;
+ int i, err;
/* Uprobes must have tp->module */
if (tev->uprobes && !tp->module)
+ return NULL;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return NULL;
+
+ if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+ tev->group, tev->event) < 0)
goto error;
/*
* If tp->address == 0, then this point must be a
@@ -1849,34 +1862,25 @@
/* Use the tp->address for uprobes */
if (tev->uprobes)
- ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx",
- tp->module, tp->address);
+ err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
else if (!strncmp(tp->symbol, "0x", 2))
/* Absolute address. See try_to_find_absolute_address() */
- ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx",
- tp->module ?: "", tp->module ? ":" : "",
- tp->address);
+ err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
+ tp->module ? ":" : "", tp->address);
else
- ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu",
- tp->module ?: "", tp->module ? ":" : "",
- tp->symbol, tp->offset);
-
- if (ret <= 0)
+ err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
+ tp->module ? ":" : "", tp->symbol, tp->offset);
+ if (err)
goto error;
- len += ret;
- for (i = 0; i < tev->nargs; i++) {
- ret = synthesize_probe_trace_arg(&tev->args[i], buf + len,
- MAX_CMDLEN - len);
- if (ret <= 0)
+ for (i = 0; i < tev->nargs; i++)
+ if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0)
goto error;
- len += ret;
- }
- return buf;
+ ret = strbuf_detach(&buf, NULL);
error:
- free(buf);
- return NULL;
+ strbuf_release(&buf);
+ return ret;
}
static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
@@ -1958,7 +1962,7 @@
static int convert_to_perf_probe_event(struct probe_trace_event *tev,
struct perf_probe_event *pev, bool is_kprobe)
{
- char buf[64] = "";
+ struct strbuf buf = STRBUF_INIT;
int i, ret;
/* Convert event/group name */
@@ -1981,14 +1985,15 @@
if (tev->args[i].name)
pev->args[i].name = strdup(tev->args[i].name);
else {
- ret = synthesize_probe_trace_arg(&tev->args[i],
- buf, 64);
- pev->args[i].name = strdup(buf);
+ if ((ret = strbuf_init(&buf, 32)) < 0)
+ goto error;
+ ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+ pev->args[i].name = strbuf_detach(&buf, NULL);
}
if (pev->args[i].name == NULL && ret >= 0)
ret = -ENOMEM;
}
-
+error:
if (ret < 0)
clear_perf_probe_event(pev);
@@ -2162,35 +2167,38 @@
struct strbuf *result)
{
int i, ret;
- char buf[128];
- char *place;
+ char *buf;
+
+ if (asprintf(&buf, "%s:%s", group, event) < 0)
+ return -errno;
+ ret = strbuf_addf(result, " %-20s (on ", buf);
+ free(buf);
+ if (ret)
+ return ret;
/* Synthesize only event probe point */
- place = synthesize_perf_probe_point(&pev->point);
- if (!place)
- return -EINVAL;
+ buf = synthesize_perf_probe_point(&pev->point);
+ if (!buf)
+ return -ENOMEM;
+ ret = strbuf_addstr(result, buf);
+ free(buf);
- ret = e_snprintf(buf, 128, "%s:%s", group, event);
- if (ret < 0)
- goto out;
+ if (!ret && module)
+ ret = strbuf_addf(result, " in %s", module);
- strbuf_addf(result, " %-20s (on %s", buf, place);
- if (module)
- strbuf_addf(result, " in %s", module);
-
- if (pev->nargs > 0) {
- strbuf_add(result, " with", 5);
- for (i = 0; i < pev->nargs; i++) {
- ret = synthesize_perf_probe_arg(&pev->args[i],
- buf, 128);
- if (ret < 0)
- goto out;
- strbuf_addf(result, " %s", buf);
+ if (!ret && pev->nargs > 0) {
+ ret = strbuf_add(result, " with", 5);
+ for (i = 0; !ret && i < pev->nargs; i++) {
+ buf = synthesize_perf_probe_arg(&pev->args[i]);
+ if (!buf)
+ return -ENOMEM;
+ ret = strbuf_addf(result, " %s", buf);
+ free(buf);
}
}
- strbuf_addch(result, ')');
-out:
- free(place);
+ if (!ret)
+ ret = strbuf_addch(result, ')');
+
return ret;
}
@@ -2498,7 +2506,8 @@
void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
struct probe_trace_event *tev __maybe_unused,
- struct map *map __maybe_unused) { }
+ struct map *map __maybe_unused,
+ struct symbol *sym __maybe_unused) { }
/*
* Find probe function addresses from map.
@@ -2516,6 +2525,7 @@
struct probe_trace_point *tp;
int num_matched_functions;
int ret, i, j, skipped = 0;
+ char *mod_name;
map = get_target_map(pev->target, pev->uprobes);
if (!map) {
@@ -2600,9 +2610,19 @@
tp->realname = strdup_or_goto(sym->name, nomem_out);
tp->retprobe = pp->retprobe;
- if (pev->target)
- tev->point.module = strdup_or_goto(pev->target,
- nomem_out);
+ if (pev->target) {
+ if (pev->uprobes) {
+ tev->point.module = strdup_or_goto(pev->target,
+ nomem_out);
+ } else {
+ mod_name = find_module_name(pev->target);
+ tev->point.module =
+ strdup(mod_name ? mod_name : pev->target);
+ free(mod_name);
+ if (!tev->point.module)
+ goto nomem_out;
+ }
+ }
tev->uprobes = pev->uprobes;
tev->nargs = pev->nargs;
if (tev->nargs) {
@@ -2624,7 +2644,7 @@
strdup_or_goto(pev->args[i].type,
nomem_out);
}
- arch__fix_tev_from_maps(pev, tev, map);
+ arch__fix_tev_from_maps(pev, tev, map, sym);
}
if (ret == skipped) {
ret = -ENOENT;
@@ -2743,9 +2763,13 @@
{
int ret;
- if (pev->uprobes && !pev->group) {
- /* Replace group name if not given */
- ret = convert_exec_to_group(pev->target, &pev->group);
+ if (!pev->group) {
+ /* Set group name if not given */
+ if (!pev->uprobes) {
+ pev->group = strdup(PERFPROBE_GROUP);
+ ret = pev->group ? 0 : -ENOMEM;
+ } else
+ ret = convert_exec_to_group(pev->target, &pev->group);
if (ret != 0) {
pr_warning("Failed to make a group name.\n");
return ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e54e7b0..5a27eb4 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -120,7 +120,7 @@
/* Events to command string */
char *synthesize_perf_probe_command(struct perf_probe_event *pev);
char *synthesize_probe_trace_command(struct probe_trace_event *tev);
-int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
/* Check the perf_probe_event needs debuginfo */
bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
@@ -154,7 +154,8 @@
int show_available_funcs(const char *module, struct strfilter *filter, bool user);
bool arch__prefers_symtab(void);
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
- struct probe_trace_event *tev, struct map *map);
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym);
/* If there is no space to write, returns -E2BIG. */
int e_snprintf(char *str, size_t size, const char *format, ...)
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index e3b3b92..3fe6214 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -220,8 +220,7 @@
pr_debug("Writing event: %s\n", buf);
if (!probe_event_dry_run) {
- ret = write(fd, buf, strlen(buf));
- if (ret <= 0) {
+ if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
ret = -errno;
pr_warning("Failed to write event: %s\n",
strerror_r(errno, sbuf, sizeof(sbuf)));
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b3bd0fb..1259839 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -553,7 +553,7 @@
static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
{
Dwarf_Die vr_die;
- char buf[32], *ptr;
+ char *buf, *ptr;
int ret = 0;
/* Copy raw parameters */
@@ -563,13 +563,13 @@
if (pf->pvar->name)
pf->tvar->name = strdup(pf->pvar->name);
else {
- ret = synthesize_perf_probe_arg(pf->pvar, buf, 32);
- if (ret < 0)
- return ret;
+ buf = synthesize_perf_probe_arg(pf->pvar);
+ if (!buf)
+ return -ENOMEM;
ptr = strchr(buf, ':'); /* Change type separator to _ */
if (ptr)
*ptr = '_';
- pf->tvar->name = strdup(buf);
+ pf->tvar->name = buf;
}
if (pf->tvar->name == NULL)
return -ENOMEM;
@@ -1294,6 +1294,7 @@
{
struct available_var_finder *af = data;
struct variable_list *vl;
+ struct strbuf buf = STRBUF_INIT;
int tag, ret;
vl = &af->vls[af->nvls - 1];
@@ -1307,25 +1308,26 @@
if (ret == 0 || ret == -ERANGE) {
int ret2;
bool externs = !af->child;
- struct strbuf buf;
- strbuf_init(&buf, 64);
+ if (strbuf_init(&buf, 64) < 0)
+ goto error;
if (probe_conf.show_location_range) {
- if (!externs) {
- if (ret)
- strbuf_add(&buf, "[INV]\t", 6);
- else
- strbuf_add(&buf, "[VAL]\t", 6);
- } else
- strbuf_add(&buf, "[EXT]\t", 6);
+ if (!externs)
+ ret2 = strbuf_add(&buf,
+ ret ? "[INV]\t" : "[VAL]\t", 6);
+ else
+ ret2 = strbuf_add(&buf, "[EXT]\t", 6);
+ if (ret2)
+ goto error;
}
ret2 = die_get_varname(die_mem, &buf);
if (!ret2 && probe_conf.show_location_range &&
!externs) {
- strbuf_addch(&buf, '\t');
+ if (strbuf_addch(&buf, '\t') < 0)
+ goto error;
ret2 = die_get_var_range(&af->pf.sp_die,
die_mem, &buf);
}
@@ -1343,6 +1345,10 @@
return DIE_FIND_CB_CONTINUE;
else
return DIE_FIND_CB_SIBLING;
+error:
+ strbuf_release(&buf);
+ pr_debug("Error in strbuf\n");
+ return DIE_FIND_CB_END;
}
/* Add a found vars into available variables list */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 8162ba0..36c6862 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -23,3 +23,4 @@
util/trace-event.c
../lib/rbtree.c
util/string.c
+util/symbol_fprintf.c
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
index 01f0324..c6d4ee2 100644
--- a/tools/perf/util/quote.c
+++ b/tools/perf/util/quote.c
@@ -17,38 +17,42 @@
return (c == '\'' || c == '!');
}
-static void sq_quote_buf(struct strbuf *dst, const char *src)
+static int sq_quote_buf(struct strbuf *dst, const char *src)
{
char *to_free = NULL;
+ int ret;
if (dst->buf == src)
to_free = strbuf_detach(dst, NULL);
- strbuf_addch(dst, '\'');
- while (*src) {
+ ret = strbuf_addch(dst, '\'');
+ while (!ret && *src) {
size_t len = strcspn(src, "'!");
- strbuf_add(dst, src, len);
+ ret = strbuf_add(dst, src, len);
src += len;
- while (need_bs_quote(*src)) {
- strbuf_addstr(dst, "'\\");
- strbuf_addch(dst, *src++);
- strbuf_addch(dst, '\'');
- }
+ while (!ret && need_bs_quote(*src))
+ ret = strbuf_addf(dst, "'\\%c\'", *src++);
}
- strbuf_addch(dst, '\'');
+ if (!ret)
+ ret = strbuf_addch(dst, '\'');
free(to_free);
+
+ return ret;
}
-void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
+int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen)
{
- int i;
+ int i, ret;
/* Copy into destination buffer. */
- strbuf_grow(dst, 255);
- for (i = 0; argv[i]; ++i) {
- strbuf_addch(dst, ' ');
- sq_quote_buf(dst, argv[i]);
+ ret = strbuf_grow(dst, 255);
+ for (i = 0; !ret && argv[i]; ++i) {
+ ret = strbuf_addch(dst, ' ');
+ if (ret)
+ break;
+ ret = sq_quote_buf(dst, argv[i]);
if (maxlen && dst->len > maxlen)
die("Too many or long arguments");
}
+ return ret;
}
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index 3340c9c..e1ec191 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h
@@ -24,6 +24,6 @@
* sq_quote() in a real application.
*/
-void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
#endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644
index 0000000..abc76e3
--- /dev/null
+++ b/tools/perf/util/rb_resort.h
@@ -0,0 +1,149 @@
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+ b->thread->shortname) < 0,
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+ struct rb_node *nd;
+
+ resort_rb__for_each(nd, threads) {
+ struct thread *t = threads_entry;
+ printf("%s: %d\n", t->shortname, t->tid);
+ }
+
+ * Then delete it:
+
+ resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * struct threads_sorted_entry {}
+ * threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...) \
+struct __name##_sorted_entry { \
+ struct rb_node rb_node; \
+ __VA_ARGS__ \
+}; \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry); \
+ \
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
+{ \
+ struct __name##_sorted_entry *a, *b; \
+ a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
+ b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
+ return __comp; \
+} \
+ \
+struct __name##_sorted { \
+ struct rb_root entries; \
+ struct __name##_sorted_entry nd[0]; \
+}; \
+ \
+static void __name##_sorted__insert(struct __name##_sorted *sorted, \
+ struct rb_node *sorted_nd) \
+{ \
+ struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
+ while (*p != NULL) { \
+ parent = *p; \
+ if (__name##_sorted__cmp(sorted_nd, parent)) \
+ p = &(*p)->rb_left; \
+ else \
+ p = &(*p)->rb_right; \
+ } \
+ rb_link_node(sorted_nd, parent, p); \
+ rb_insert_color(sorted_nd, &sorted->entries); \
+} \
+ \
+static void __name##_sorted__sort(struct __name##_sorted *sorted, \
+ struct rb_root *entries) \
+{ \
+ struct rb_node *nd; \
+ unsigned int i = 0; \
+ for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
+ struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
+ __name##_sorted__init_entry(nd, snd); \
+ __name##_sorted__insert(sorted, &snd->rb_node); \
+ } \
+} \
+ \
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
+ int nr_entries) \
+{ \
+ struct __name##_sorted *sorted; \
+ sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
+ if (sorted) { \
+ sorted->entries = RB_ROOT; \
+ __name##_sorted__sort(sorted, entries); \
+ } \
+ return sorted; \
+} \
+ \
+static void __name##_sorted__delete(struct __name##_sorted *sorted) \
+{ \
+ free(sorted); \
+} \
+ \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name) \
+struct __name##_sorted_entry *__name##_entry; \
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each(__nd, __name) \
+ for (__nd = rb_first(&__name->entries); \
+ __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
+ rb_node), __nd; \
+ __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name) \
+ __name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
+ DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \
+ __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+
+#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 0467367..481792c 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -129,7 +129,8 @@
return true;
}
-void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+ struct callchain_param *callchain)
{
struct perf_evsel *evsel;
bool use_sample_identifier = false;
@@ -148,7 +149,7 @@
use_comm_exec = perf_can_comm_exec();
evlist__for_each(evlist, evsel) {
- perf_evsel__config(evsel, opts);
+ perf_evsel__config(evsel, opts, callchain);
if (evsel->tracking && use_comm_exec)
evsel->attr.comm_exec = 1;
}
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b3aabc0..62c7f69 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -31,6 +31,8 @@
#include <perl.h>
#include "../../perf.h"
+#include "../callchain.h"
+#include "../machine.h"
#include "../thread.h"
#include "../event.h"
#include "../trace-event.h"
@@ -248,10 +250,90 @@
define_event_symbols(event, ev_name, args->next);
}
+static SV *perl_process_callchain(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ AV *list;
+
+ list = newAV();
+ if (!list)
+ goto exit;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ goto exit;
+
+ if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL,
+ sysctl_perf_event_max_stack) != 0) {
+ pr_err("Failed to resolve callchain. Skipping\n");
+ goto exit;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+
+ while (1) {
+ HV *elem;
+ struct callchain_cursor_node *node;
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+
+ elem = newHV();
+ if (!elem)
+ goto exit;
+
+ if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+ hv_undef(elem);
+ goto exit;
+ }
+
+ if (node->sym) {
+ HV *sym = newHV();
+ if (!sym) {
+ hv_undef(elem);
+ goto exit;
+ }
+ if (!hv_stores(sym, "start", newSVuv(node->sym->start)) ||
+ !hv_stores(sym, "end", newSVuv(node->sym->end)) ||
+ !hv_stores(sym, "binding", newSVuv(node->sym->binding)) ||
+ !hv_stores(sym, "name", newSVpvn(node->sym->name,
+ node->sym->namelen)) ||
+ !hv_stores(elem, "sym", newRV_noinc((SV*)sym))) {
+ hv_undef(sym);
+ hv_undef(elem);
+ goto exit;
+ }
+ }
+
+ if (node->map) {
+ struct map *map = node->map;
+ const char *dsoname = "[unknown]";
+ if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+ if (symbol_conf.show_kernel_path && map->dso->long_name)
+ dsoname = map->dso->long_name;
+ else if (map->dso->name)
+ dsoname = map->dso->name;
+ }
+ if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+ hv_undef(elem);
+ goto exit;
+ }
+ }
+
+ callchain_cursor_advance(&callchain_cursor);
+ av_push(list, newRV_noinc((SV*)elem));
+ }
+
+exit:
+ return newRV_noinc((SV*)list);
+}
+
static void perl_process_tracepoint(struct perf_sample *sample,
struct perf_evsel *evsel,
- struct thread *thread)
+ struct addr_location *al)
{
+ struct thread *thread = al->thread;
struct event_format *event = evsel->tp_format;
struct format_field *field;
static char handler[256];
@@ -295,6 +377,7 @@
XPUSHs(sv_2mortal(newSVuv(ns)));
XPUSHs(sv_2mortal(newSViv(pid)));
XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+ XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
/* common fields other than pid can be accessed via xsub fns */
@@ -329,6 +412,7 @@
XPUSHs(sv_2mortal(newSVuv(nsecs)));
XPUSHs(sv_2mortal(newSViv(pid)));
XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+ XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
call_pv("main::trace_unhandled", G_SCALAR);
}
SPAGAIN;
@@ -366,7 +450,7 @@
struct perf_evsel *evsel,
struct addr_location *al)
{
- perl_process_tracepoint(sample, evsel, al->thread);
+ perl_process_tracepoint(sample, evsel, al);
perl_process_event_generic(event, sample, evsel);
}
@@ -490,7 +574,27 @@
fprintf(ofp, "use Perf::Trace::Util;\n\n");
fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
- fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n");
+ fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+ fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+ my $callchain = shift;\n\
+ for my $node (@$callchain)\n\
+ {\n\
+ if(exists $node->{sym})\n\
+ {\n\
+ printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+ }\n\
+ else\n\
+ {\n\
+ printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+ }\n\
+ }\n\
+}\n\n\
+");
+
while ((event = trace_find_next_event(pevent, event))) {
fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
@@ -502,7 +606,8 @@
fprintf(ofp, "$common_secs, ");
fprintf(ofp, "$common_nsecs,\n");
fprintf(ofp, "\t $common_pid, ");
- fprintf(ofp, "$common_comm,\n\t ");
+ fprintf(ofp, "$common_comm, ");
+ fprintf(ofp, "$common_callchain,\n\t ");
not_first = 0;
count = 0;
@@ -519,7 +624,7 @@
fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
"$common_secs, $common_nsecs,\n\t "
- "$common_pid, $common_comm);\n\n");
+ "$common_pid, $common_comm, $common_callchain);\n\n");
fprintf(ofp, "\tprintf(\"");
@@ -581,17 +686,22 @@
fprintf(ofp, "$%s", f->name);
}
- fprintf(ofp, ");\n");
+ fprintf(ofp, ");\n\n");
+
+ fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
fprintf(ofp, "}\n\n");
}
fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
"$common_cpu, $common_secs, $common_nsecs,\n\t "
- "$common_pid, $common_comm) = @_;\n\n");
+ "$common_pid, $common_comm, $common_callchain) = @_;\n\n");
fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
"$common_secs, $common_nsecs,\n\t $common_pid, "
- "$common_comm);\n}\n\n");
+ "$common_comm, $common_callchain);\n");
+ fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+ fprintf(ofp, "}\n\n");
fprintf(ofp, "sub print_header\n{\n"
"\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index fbd0524..ff13470 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -41,6 +41,7 @@
#include "../thread-stack.h"
#include "../trace-event.h"
#include "../machine.h"
+#include "../call-path.h"
#include "thread_map.h"
#include "cpumap.h"
#include "stat.h"
@@ -323,7 +324,7 @@
if (!symbol_conf.use_callchain || !sample->callchain)
goto exit;
- if (thread__resolve_callchain(al->thread, evsel,
+ if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL,
scripting_max_stack) != 0) {
pr_err("Failed to resolve callchain. Skipping\n");
@@ -407,8 +408,11 @@
if (!t)
Py_FatalError("couldn't create Python tuple");
- if (!event)
- die("ug! no event found for type %d", (int)evsel->attr.config);
+ if (!event) {
+ snprintf(handler_name, sizeof(handler_name),
+ "ug! no event found for type %" PRIu64, (u64)evsel->attr.config);
+ Py_FatalError(handler_name);
+ }
pid = raw_field_value(event, "common_pid", data);
@@ -614,7 +618,7 @@
struct machine *machine)
{
struct tables *tables = container_of(dbe, struct tables, dbe);
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
PyObject *t;
build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
@@ -681,7 +685,7 @@
struct tables *tables = container_of(dbe, struct tables, dbe);
PyObject *t;
- t = tuple_new(21);
+ t = tuple_new(22);
tuple_set_u64(t, 0, es->db_id);
tuple_set_u64(t, 1, es->evsel->db_id);
@@ -704,6 +708,7 @@
tuple_set_u64(t, 18, es->sample->data_src);
tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+ tuple_set_u64(t, 21, es->call_path_id);
call_object(tables->sample_handler, t, "sample_table");
@@ -998,8 +1003,10 @@
{
const char *perf_db_export_mode = "perf_db_export_mode";
const char *perf_db_export_calls = "perf_db_export_calls";
- PyObject *db_export_mode, *db_export_calls;
+ const char *perf_db_export_callchains = "perf_db_export_callchains";
+ PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
bool export_calls = false;
+ bool export_callchains = false;
int ret;
memset(tables, 0, sizeof(struct tables));
@@ -1016,6 +1023,7 @@
if (!ret)
return;
+ /* handle export calls */
tables->dbe.crp = NULL;
db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
if (db_export_calls) {
@@ -1033,6 +1041,33 @@
Py_FatalError("failed to create calls processor");
}
+ /* handle export callchains */
+ tables->dbe.cpr = NULL;
+ db_export_callchains = PyDict_GetItemString(main_dict,
+ perf_db_export_callchains);
+ if (db_export_callchains) {
+ ret = PyObject_IsTrue(db_export_callchains);
+ if (ret == -1)
+ handler_call_die(perf_db_export_callchains);
+ export_callchains = !!ret;
+ }
+
+ if (export_callchains) {
+ /*
+ * Attempt to use the call path root from the call return
+ * processor, if the call return processor is in use. Otherwise,
+ * we allocate a new call path root. This prevents exporting
+ * duplicate call path ids when both are in use simultaniously.
+ */
+ if (tables->dbe.crp)
+ tables->dbe.cpr = tables->dbe.crp->cpr;
+ else
+ tables->dbe.cpr = call_path_root__new();
+
+ if (!tables->dbe.cpr)
+ Py_FatalError("failed to create call path root");
+ }
+
tables->db_export_mode = true;
/*
* Reserve per symbol space for symbol->db_id via symbol__priv()
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4abd85c..2335b28 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -409,6 +409,8 @@
tool->stat = process_stat_stub;
if (tool->stat_round == NULL)
tool->stat_round = process_stat_round_stub;
+ if (tool->time_conv == NULL)
+ tool->time_conv = process_event_op2_stub;
}
static void swap_sample_id_all(union perf_event *event, void *data)
@@ -794,6 +796,7 @@
[PERF_RECORD_STAT] = perf_event__stat_swap,
[PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap,
[PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap,
+ [PERF_RECORD_TIME_CONV] = perf_event__all64_swap,
[PERF_RECORD_HEADER_MAX] = NULL,
};
@@ -904,7 +907,7 @@
unsigned int i;
struct ip_callchain *callchain = sample->callchain;
- if (has_branch_callstack(evsel))
+ if (perf_evsel__has_branch_callstack(evsel))
callchain__lbr_callstack_printf(sample);
printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
@@ -1078,7 +1081,7 @@
if (sample_type & PERF_SAMPLE_CALLCHAIN)
callchain__printf(evsel, sample);
- if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel))
+ if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
branch_stack__printf(sample);
if (sample_type & PERF_SAMPLE_REGS_USER)
@@ -1341,6 +1344,9 @@
return tool->stat(tool, event, session);
case PERF_RECORD_STAT_ROUND:
return tool->stat_round(tool, event, session);
+ case PERF_RECORD_TIME_CONV:
+ session->time_conv = event->time_conv;
+ return tool->time_conv(tool, event, session);
default:
return -EINVAL;
}
@@ -1830,7 +1836,11 @@
out_err:
ui_progress__finish();
perf_session__warn_about_errors(session);
- ordered_events__free(&session->ordered_events);
+ /*
+ * We may switching perf.data output, make ordered_events
+ * reusable.
+ */
+ ordered_events__reinit(&session->ordered_events);
auxtrace__free_events(session);
session->one_mmap = false;
return err;
@@ -1947,105 +1957,6 @@
return NULL;
}
-void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
- struct addr_location *al,
- unsigned int print_opts, unsigned int stack_depth)
-{
- struct callchain_cursor_node *node;
- int print_ip = print_opts & PRINT_IP_OPT_IP;
- int print_sym = print_opts & PRINT_IP_OPT_SYM;
- int print_dso = print_opts & PRINT_IP_OPT_DSO;
- int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET;
- int print_oneline = print_opts & PRINT_IP_OPT_ONELINE;
- int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE;
- char s = print_oneline ? ' ' : '\t';
-
- if (symbol_conf.use_callchain && sample->callchain) {
- struct addr_location node_al;
-
- if (thread__resolve_callchain(al->thread, evsel,
- sample, NULL, NULL,
- stack_depth) != 0) {
- if (verbose)
- error("Failed to resolve callchain. Skipping\n");
- return;
- }
- callchain_cursor_commit(&callchain_cursor);
-
- if (print_symoffset)
- node_al = *al;
-
- while (stack_depth) {
- u64 addr = 0;
-
- node = callchain_cursor_current(&callchain_cursor);
- if (!node)
- break;
-
- if (node->sym && node->sym->ignore)
- goto next;
-
- if (print_ip)
- printf("%c%16" PRIx64, s, node->ip);
-
- if (node->map)
- addr = node->map->map_ip(node->map, node->ip);
-
- if (print_sym) {
- printf(" ");
- if (print_symoffset) {
- node_al.addr = addr;
- node_al.map = node->map;
- symbol__fprintf_symname_offs(node->sym, &node_al, stdout);
- } else
- symbol__fprintf_symname(node->sym, stdout);
- }
-
- if (print_dso) {
- printf(" (");
- map__fprintf_dsoname(node->map, stdout);
- printf(")");
- }
-
- if (print_srcline)
- map__fprintf_srcline(node->map, addr, "\n ",
- stdout);
-
- if (!print_oneline)
- printf("\n");
-
- stack_depth--;
-next:
- callchain_cursor_advance(&callchain_cursor);
- }
-
- } else {
- if (al->sym && al->sym->ignore)
- return;
-
- if (print_ip)
- printf("%16" PRIx64, sample->ip);
-
- if (print_sym) {
- printf(" ");
- if (print_symoffset)
- symbol__fprintf_symname_offs(al->sym, al,
- stdout);
- else
- symbol__fprintf_symname(al->sym, stdout);
- }
-
- if (print_dso) {
- printf(" (");
- map__fprintf_dsoname(al->map, stdout);
- printf(")");
- }
-
- if (print_srcline)
- map__fprintf_srcline(al->map, al->addr, "\n ", stdout);
- }
-}
-
int perf_session__cpu_bitmap(struct perf_session *session,
const char *cpu_list, unsigned long *cpu_bitmap)
{
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5f792e3..4bd7585 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -26,6 +26,7 @@
struct itrace_synth_opts *itrace_synth_opts;
struct list_head auxtrace_index;
struct trace_event tevent;
+ struct time_conv_event time_conv;
bool repipe;
bool one_mmap;
void *one_mmap_addr;
@@ -35,13 +36,6 @@
struct perf_tool *tool;
};
-#define PRINT_IP_OPT_IP (1<<0)
-#define PRINT_IP_OPT_SYM (1<<1)
-#define PRINT_IP_OPT_DSO (1<<2)
-#define PRINT_IP_OPT_SYMOFFSET (1<<3)
-#define PRINT_IP_OPT_ONELINE (1<<4)
-#define PRINT_IP_OPT_SRCLINE (1<<5)
-
struct perf_tool;
struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -103,10 +97,6 @@
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
unsigned int type);
-void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample,
- struct addr_location *al,
- unsigned int print_opts, unsigned int stack_depth);
-
int perf_session__cpu_bitmap(struct perf_session *session,
const char *cpu_list, unsigned long *cpu_bitmap);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f5ba111..20e69ed 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -21,13 +21,6 @@
const char *field_order;
regex_t ignore_callees_regex;
int have_ignore_callees = 0;
-int sort__need_collapse = 0;
-int sort__has_parent = 0;
-int sort__has_sym = 0;
-int sort__has_dso = 0;
-int sort__has_socket = 0;
-int sort__has_thread = 0;
-int sort__has_comm = 0;
enum sort_mode sort__mode = SORT_MODE__NORMAL;
/*
@@ -244,7 +237,7 @@
* comparing symbol address alone is not enough since it's a
* relative address within a dso.
*/
- if (!sort__has_dso) {
+ if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
ret = sort__dso_cmp(left, right);
if (ret != 0)
return ret;
@@ -2163,7 +2156,7 @@
return -1;
if (sd->entry->se_collapse)
- sort__need_collapse = 1;
+ list->need_collapse = 1;
sd->taken = 1;
@@ -2245,9 +2238,9 @@
pr_err("Invalid regex: %s\n%s", parent_pattern, err);
return -EINVAL;
}
- sort__has_parent = 1;
+ list->parent = 1;
} else if (sd->entry == &sort_sym) {
- sort__has_sym = 1;
+ list->sym = 1;
/*
* perf diff displays the performance difference amongst
* two or more perf.data files. Those files could come
@@ -2258,13 +2251,13 @@
sd->entry->se_collapse = sort__sym_sort;
} else if (sd->entry == &sort_dso) {
- sort__has_dso = 1;
+ list->dso = 1;
} else if (sd->entry == &sort_socket) {
- sort__has_socket = 1;
+ list->socket = 1;
} else if (sd->entry == &sort_thread) {
- sort__has_thread = 1;
+ list->thread = 1;
} else if (sd->entry == &sort_comm) {
- sort__has_comm = 1;
+ list->comm = 1;
}
return __sort_dimension__add(sd, list, level);
@@ -2289,7 +2282,7 @@
return -EINVAL;
if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
- sort__has_sym = 1;
+ list->sym = 1;
__sort_dimension__add(sd, list, level);
return 0;
@@ -2305,7 +2298,7 @@
return -EINVAL;
if (sd->entry == &sort_mem_daddr_sym)
- sort__has_sym = 1;
+ list->sym = 1;
__sort_dimension__add(sd, list, level);
return 0;
@@ -2749,10 +2742,10 @@
void reset_output_field(void)
{
- sort__need_collapse = 0;
- sort__has_parent = 0;
- sort__has_sym = 0;
- sort__has_dso = 0;
+ perf_hpp_list.need_collapse = 0;
+ perf_hpp_list.parent = 0;
+ perf_hpp_list.sym = 0;
+ perf_hpp_list.dso = 0;
field_order = NULL;
sort_order = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 3f4e359..42927f4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -31,13 +31,6 @@
extern const char default_sort_order[];
extern regex_t ignore_callees_regex;
extern int have_ignore_callees;
-extern int sort__need_collapse;
-extern int sort__has_dso;
-extern int sort__has_parent;
-extern int sort__has_sym;
-extern int sort__has_socket;
-extern int sort__has_thread;
-extern int sort__has_comm;
extern enum sort_mode sort__mode;
extern struct sort_entry sort_comm;
extern struct sort_entry sort_dso;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 4d9b481..ffa1d06 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -307,6 +307,7 @@
struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat_evsel *ps = counter->priv;
u64 *count = counter->counts->aggr.values;
+ u64 val;
int i, ret;
aggr->val = aggr->ena = aggr->run = 0;
@@ -346,7 +347,8 @@
/*
* Save the full runtime - to allow normalization during printout:
*/
- perf_stat__update_shadow_stats(counter, count, 0);
+ val = counter->scale * *count;
+ perf_stat__update_shadow_stats(counter, &val, 0);
return 0;
}
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 8fb7329..f95f682 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -1,3 +1,4 @@
+#include "debug.h"
#include "cache.h"
#include <linux/kernel.h>
@@ -17,12 +18,13 @@
*/
char strbuf_slopbuf[1];
-void strbuf_init(struct strbuf *sb, ssize_t hint)
+int strbuf_init(struct strbuf *sb, ssize_t hint)
{
sb->alloc = sb->len = 0;
sb->buf = strbuf_slopbuf;
if (hint)
- strbuf_grow(sb, hint);
+ return strbuf_grow(sb, hint);
+ return 0;
}
void strbuf_release(struct strbuf *sb)
@@ -42,67 +44,104 @@
return res;
}
-void strbuf_grow(struct strbuf *sb, size_t extra)
+int strbuf_grow(struct strbuf *sb, size_t extra)
{
- if (sb->len + extra + 1 <= sb->len)
- die("you want to use way too much memory");
- if (!sb->alloc)
- sb->buf = NULL;
- ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
+ char *buf;
+ size_t nr = sb->len + extra + 1;
+
+ if (nr < sb->alloc)
+ return 0;
+
+ if (nr <= sb->len)
+ return -E2BIG;
+
+ if (alloc_nr(sb->alloc) > nr)
+ nr = alloc_nr(sb->alloc);
+
+ /*
+ * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is
+ * a static variable. Thus we have to avoid passing it to realloc.
+ */
+ buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf));
+ if (!buf)
+ return -ENOMEM;
+
+ sb->buf = buf;
+ sb->alloc = nr;
+ return 0;
}
-void strbuf_addch(struct strbuf *sb, int c)
+int strbuf_addch(struct strbuf *sb, int c)
{
- strbuf_grow(sb, 1);
+ int ret = strbuf_grow(sb, 1);
+ if (ret)
+ return ret;
+
sb->buf[sb->len++] = c;
sb->buf[sb->len] = '\0';
+ return 0;
}
-void strbuf_add(struct strbuf *sb, const void *data, size_t len)
+int strbuf_add(struct strbuf *sb, const void *data, size_t len)
{
- strbuf_grow(sb, len);
+ int ret = strbuf_grow(sb, len);
+ if (ret)
+ return ret;
+
memcpy(sb->buf + sb->len, data, len);
- strbuf_setlen(sb, sb->len + len);
+ return strbuf_setlen(sb, sb->len + len);
}
-static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
{
- int len;
+ int len, ret;
va_list ap_saved;
- if (!strbuf_avail(sb))
- strbuf_grow(sb, 64);
+ if (!strbuf_avail(sb)) {
+ ret = strbuf_grow(sb, 64);
+ if (ret)
+ return ret;
+ }
va_copy(ap_saved, ap);
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
if (len < 0)
- die("your vsnprintf is broken");
+ return len;
if (len > strbuf_avail(sb)) {
- strbuf_grow(sb, len);
+ ret = strbuf_grow(sb, len);
+ if (ret)
+ return ret;
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
va_end(ap_saved);
if (len > strbuf_avail(sb)) {
- die("this should not happen, your vsnprintf is broken");
+ pr_debug("this should not happen, your vsnprintf is broken");
+ return -EINVAL;
}
}
- strbuf_setlen(sb, sb->len + len);
+ return strbuf_setlen(sb, sb->len + len);
}
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...)
{
va_list ap;
+ int ret;
va_start(ap, fmt);
- strbuf_addv(sb, fmt, ap);
+ ret = strbuf_addv(sb, fmt, ap);
va_end(ap);
+ return ret;
}
ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint)
{
size_t oldlen = sb->len;
size_t oldalloc = sb->alloc;
+ int ret;
- strbuf_grow(sb, hint ? hint : 8192);
+ ret = strbuf_grow(sb, hint ? hint : 8192);
+ if (ret)
+ return ret;
+
for (;;) {
ssize_t cnt;
@@ -112,12 +151,14 @@
strbuf_release(sb);
else
strbuf_setlen(sb, oldlen);
- return -1;
+ return cnt;
}
if (!cnt)
break;
sb->len += cnt;
- strbuf_grow(sb, 8192);
+ ret = strbuf_grow(sb, 8192);
+ if (ret)
+ return ret;
}
sb->buf[sb->len] = '\0';
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index ab9be0fb..54b4092 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -51,7 +51,7 @@
#define STRBUF_INIT { 0, 0, strbuf_slopbuf }
/*----- strbuf life cycle -----*/
-void strbuf_init(struct strbuf *buf, ssize_t hint);
+int strbuf_init(struct strbuf *buf, ssize_t hint);
void strbuf_release(struct strbuf *buf);
char *strbuf_detach(struct strbuf *buf, size_t *);
@@ -60,26 +60,31 @@
return sb->alloc ? sb->alloc - sb->len - 1 : 0;
}
-void strbuf_grow(struct strbuf *buf, size_t);
+int strbuf_grow(struct strbuf *buf, size_t);
-static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
- if (!sb->alloc)
- strbuf_grow(sb, 0);
+static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
+ int ret;
+ if (!sb->alloc) {
+ ret = strbuf_grow(sb, 0);
+ if (ret)
+ return ret;
+ }
assert(len < sb->alloc);
sb->len = len;
sb->buf[len] = '\0';
+ return 0;
}
/*----- add data in your buffer -----*/
-void strbuf_addch(struct strbuf *sb, int c);
+int strbuf_addch(struct strbuf *sb, int c);
-void strbuf_add(struct strbuf *buf, const void *, size_t);
-static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
- strbuf_add(sb, s, strlen(s));
+int strbuf_add(struct strbuf *buf, const void *, size_t);
+static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
+ return strbuf_add(sb, s, strlen(s));
}
__attribute__((format(printf,2,3)))
-void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...);
/* XXX: if read fails, any partial read is undone */
ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index bc229a7..87a297d 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -709,17 +709,10 @@
if (ss->opdshdr.sh_type != SHT_PROGBITS)
ss->opdsec = NULL;
- if (dso->kernel == DSO_TYPE_USER) {
- GElf_Shdr shdr;
- ss->adjust_symbols = (ehdr.e_type == ET_EXEC ||
- ehdr.e_type == ET_REL ||
- dso__is_vdso(dso) ||
- elf_section_by_name(elf, &ehdr, &shdr,
- ".gnu.prelink_undo",
- NULL) != NULL);
- } else {
+ if (dso->kernel == DSO_TYPE_USER)
+ ss->adjust_symbols = true;
+ else
ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
- }
ss->name = strdup(name);
if (!ss->name) {
@@ -777,7 +770,8 @@
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
}
-void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+ GElf_Sym *sym __maybe_unused) { }
int dso__load_sym(struct dso *dso, struct map *map,
struct symsrc *syms_ss, struct symsrc *runtime_ss,
@@ -954,8 +948,6 @@
(sym.st_value & 1))
--sym.st_value;
- arch__elf_sym_adjust(&sym);
-
if (dso->kernel || kmodule) {
char dso_name[PATH_MAX];
@@ -1089,6 +1081,8 @@
if (!f)
goto out_elf_end;
+ arch__sym_update(f, &sym);
+
if (filter && filter(curr_map, f))
symbol__delete(f);
else {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e7588dc..7fb3330 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -255,40 +255,6 @@
free(((void *)sym) - symbol_conf.priv_size);
}
-size_t symbol__fprintf(struct symbol *sym, FILE *fp)
-{
- return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
- sym->start, sym->end,
- sym->binding == STB_GLOBAL ? 'g' :
- sym->binding == STB_LOCAL ? 'l' : 'w',
- sym->name);
-}
-
-size_t symbol__fprintf_symname_offs(const struct symbol *sym,
- const struct addr_location *al, FILE *fp)
-{
- unsigned long offset;
- size_t length;
-
- if (sym && sym->name) {
- length = fprintf(fp, "%s", sym->name);
- if (al) {
- if (al->addr < sym->end)
- offset = al->addr - sym->start;
- else
- offset = al->addr - al->map->start - sym->start;
- length += fprintf(fp, "+0x%lx", offset);
- }
- return length;
- } else
- return fprintf(fp, "[unknown]");
-}
-
-size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
-{
- return symbol__fprintf_symname_offs(sym, NULL, fp);
-}
-
void symbols__delete(struct rb_root *symbols)
{
struct symbol *pos;
@@ -335,7 +301,7 @@
if (ip < s->start)
n = n->rb_left;
- else if (ip >= s->end)
+ else if (ip > s->end || (ip == s->end && ip != s->start))
n = n->rb_right;
else
return s;
@@ -364,11 +330,6 @@
return NULL;
}
-struct symbol_name_rb_node {
- struct rb_node rb_node;
- struct symbol sym;
-};
-
static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
{
struct rb_node **p = &symbols->rb_node;
@@ -452,6 +413,18 @@
}
}
+void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
+{
+ symbols__insert(&dso->symbols[type], sym);
+
+ /* update the symbol cache if necessary */
+ if (dso->last_find_result[type].addr >= sym->start &&
+ (dso->last_find_result[type].addr < sym->end ||
+ sym->start == sym->end)) {
+ dso->last_find_result[type].symbol = sym;
+ }
+}
+
struct symbol *dso__find_symbol(struct dso *dso,
enum map_type type, u64 addr)
{
@@ -497,21 +470,6 @@
&dso->symbols[type]);
}
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
- enum map_type type, FILE *fp)
-{
- size_t ret = 0;
- struct rb_node *nd;
- struct symbol_name_rb_node *pos;
-
- for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
- pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
- fprintf(fp, "%s\n", pos->sym.name);
- }
-
- return ret;
-}
-
int modules__parse(const char *filename, void *arg,
int (*process_module)(void *arg, const char *name,
u64 start))
@@ -1262,8 +1220,8 @@
return 0;
}
-int dso__load_kallsyms(struct dso *dso, const char *filename,
- struct map *map, symbol_filter_t filter)
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+ struct map *map, bool no_kcore, symbol_filter_t filter)
{
u64 delta = 0;
@@ -1284,12 +1242,18 @@
else
dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
- if (!dso__load_kcore(dso, map, filename))
+ if (!no_kcore && !dso__load_kcore(dso, map, filename))
return dso__split_kallsyms_for_kcore(dso, map, filter);
else
return dso__split_kallsyms(dso, map, delta, filter);
}
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+ struct map *map, symbol_filter_t filter)
+{
+ return __dso__load_kallsyms(dso, filename, map, false, filter);
+}
+
static int dso__load_perf_map(struct dso *dso, struct map *map,
symbol_filter_t filter)
{
@@ -1644,25 +1608,27 @@
return err;
}
+static bool visible_dir_filter(const char *name, struct dirent *d)
+{
+ if (d->d_type != DT_DIR)
+ return false;
+ return lsdir_no_dot_filter(name, d);
+}
+
static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
{
char kallsyms_filename[PATH_MAX];
- struct dirent *dent;
int ret = -1;
- DIR *d;
+ struct strlist *dirs;
+ struct str_node *nd;
- d = opendir(dir);
- if (!d)
+ dirs = lsdir(dir, visible_dir_filter);
+ if (!dirs)
return -1;
- while (1) {
- dent = readdir(d);
- if (!dent)
- break;
- if (dent->d_type != DT_DIR)
- continue;
+ strlist__for_each(nd, dirs) {
scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
- "%s/%s/kallsyms", dir, dent->d_name);
+ "%s/%s/kallsyms", dir, nd->s);
if (!validate_kcore_addresses(kallsyms_filename, map)) {
strlcpy(dir, kallsyms_filename, dir_sz);
ret = 0;
@@ -1670,7 +1636,7 @@
}
}
- closedir(d);
+ strlist__delete(dirs);
return ret;
}
@@ -1678,7 +1644,7 @@
static char *dso__find_kallsyms(struct dso *dso, struct map *map)
{
u8 host_build_id[BUILD_ID_SIZE];
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+ char sbuild_id[SBUILD_ID_SIZE];
bool is_host = false;
char path[PATH_MAX];
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index c8b7544..2b5e4ed 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -55,6 +55,7 @@
u16 namelen;
u8 binding;
bool ignore;
+ u8 arch_sym;
char name[0];
};
@@ -140,6 +141,11 @@
extern struct symbol_conf symbol_conf;
+struct symbol_name_rb_node {
+ struct rb_node rb_node;
+ struct symbol sym;
+};
+
static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
{
return path__join(bf, size, symbol_conf.symfs, path);
@@ -235,9 +241,14 @@
symbol_filter_t filter);
int dso__load_vmlinux_path(struct dso *dso, struct map *map,
symbol_filter_t filter);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+ bool no_kcore, symbol_filter_t filter);
int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
symbol_filter_t filter);
+void dso__insert_symbol(struct dso *dso, enum map_type type,
+ struct symbol *sym);
+
struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
u64 addr);
struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
@@ -262,8 +273,14 @@
void symbol__exit(void);
void symbol__elf_init(void);
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp);
size_t symbol__fprintf_symname_offs(const struct symbol *sym,
const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp);
size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
size_t symbol__fprintf(struct symbol *sym, FILE *fp);
bool symbol_type__is_a(char symbol_type, enum map_type map_type);
@@ -310,7 +327,7 @@
#ifdef HAVE_LIBELF_SUPPORT
bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
-void arch__elf_sym_adjust(GElf_Sym *sym);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
#endif
#define SYMBOL_A 0
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
new file mode 100644
index 0000000..a680bda
--- /dev/null
+++ b/tools/perf/util/symbol_fprintf.c
@@ -0,0 +1,71 @@
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+ return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+ sym->start, sym->end,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp)
+{
+ unsigned long offset;
+ size_t length;
+
+ if (sym && sym->name) {
+ length = fprintf(fp, "%s", sym->name);
+ if (al) {
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+ length += fprintf(fp, "+0x%lx", offset);
+ }
+ return length;
+ } else if (al && unknown_as_addr)
+ return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+ else
+ return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, al, false, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, NULL, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+ enum map_type type, FILE *fp)
+{
+ size_t ret = 0;
+ struct rb_node *nd;
+ struct symbol_name_rb_node *pos;
+
+ for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+ fprintf(fp, "%s\n", pos->sym.name);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
new file mode 100644
index 0000000..bbb4c19
--- /dev/null
+++ b/tools/perf/util/syscalltbl.c
@@ -0,0 +1,134 @@
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+
+#ifdef HAVE_SYSCALL_TABLE
+#include <linux/compiler.h>
+#include <string.h>
+#include "util.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_x86_64;
+#endif
+
+struct syscall {
+ int id;
+ const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+ const char *key = vkey;
+ const struct syscall *entry = ventry;
+
+ return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+ const struct syscall *a = va, *b = vb;
+
+ return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+ int nr_entries = 0, i, j;
+ struct syscall *entries;
+
+ for (i = 0; i <= syscalltbl_native_max_id; ++i)
+ if (syscalltbl_native[i])
+ ++nr_entries;
+
+ entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+ if (tbl->syscalls.entries == NULL)
+ return -1;
+
+ for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+ if (syscalltbl_native[i]) {
+ entries[j].name = syscalltbl_native[i];
+ entries[j].id = i;
+ ++j;
+ }
+ }
+
+ qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+ tbl->syscalls.nr_entries = nr_entries;
+ return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+ struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ if (tbl) {
+ if (syscalltbl__init_native(tbl)) {
+ free(tbl);
+ return NULL;
+ }
+ }
+ return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+ zfree(&tbl->syscalls.entries);
+ free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+ return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+ struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+ tbl->syscalls.nr_entries, sizeof(*sc),
+ syscallcmpname);
+
+ return sc ? sc->id : -1;
+}
+
+#else /* HAVE_SYSCALL_TABLE */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+ struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ if (tbl)
+ tbl->audit_machine = audit_detect_machine();
+ return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+ free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+ return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+ return audit_name_to_syscall(name, tbl->audit_machine);
+}
+#endif /* HAVE_SYSCALL_TABLE */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
new file mode 100644
index 0000000..e295151
--- /dev/null
+++ b/tools/perf/util/syscalltbl.h
@@ -0,0 +1,20 @@
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+ union {
+ int audit_machine;
+ struct {
+ int nr_entries;
+ void *entries;
+ } syscalls;
+ };
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 679688e..825086a 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -22,44 +22,9 @@
#include "debug.h"
#include "symbol.h"
#include "comm.h"
+#include "call-path.h"
#include "thread-stack.h"
-#define CALL_PATH_BLOCK_SHIFT 8
-#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
-#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
-
-struct call_path_block {
- struct call_path cp[CALL_PATH_BLOCK_SIZE];
- struct list_head node;
-};
-
-/**
- * struct call_path_root - root of all call paths.
- * @call_path: root call path
- * @blocks: list of blocks to store call paths
- * @next: next free space
- * @sz: number of spaces
- */
-struct call_path_root {
- struct call_path call_path;
- struct list_head blocks;
- size_t next;
- size_t sz;
-};
-
-/**
- * struct call_return_processor - provides a call-back to consume call-return
- * information.
- * @cpr: call path root
- * @process: call-back that accepts call/return information
- * @data: anonymous data for call-back
- */
-struct call_return_processor {
- struct call_path_root *cpr;
- int (*process)(struct call_return *cr, void *data);
- void *data;
-};
-
#define STACK_GROWTH 2048
/**
@@ -335,108 +300,6 @@
chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
}
-static void call_path__init(struct call_path *cp, struct call_path *parent,
- struct symbol *sym, u64 ip, bool in_kernel)
-{
- cp->parent = parent;
- cp->sym = sym;
- cp->ip = sym ? 0 : ip;
- cp->db_id = 0;
- cp->in_kernel = in_kernel;
- RB_CLEAR_NODE(&cp->rb_node);
- cp->children = RB_ROOT;
-}
-
-static struct call_path_root *call_path_root__new(void)
-{
- struct call_path_root *cpr;
-
- cpr = zalloc(sizeof(struct call_path_root));
- if (!cpr)
- return NULL;
- call_path__init(&cpr->call_path, NULL, NULL, 0, false);
- INIT_LIST_HEAD(&cpr->blocks);
- return cpr;
-}
-
-static void call_path_root__free(struct call_path_root *cpr)
-{
- struct call_path_block *pos, *n;
-
- list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
- list_del(&pos->node);
- free(pos);
- }
- free(cpr);
-}
-
-static struct call_path *call_path__new(struct call_path_root *cpr,
- struct call_path *parent,
- struct symbol *sym, u64 ip,
- bool in_kernel)
-{
- struct call_path_block *cpb;
- struct call_path *cp;
- size_t n;
-
- if (cpr->next < cpr->sz) {
- cpb = list_last_entry(&cpr->blocks, struct call_path_block,
- node);
- } else {
- cpb = zalloc(sizeof(struct call_path_block));
- if (!cpb)
- return NULL;
- list_add_tail(&cpb->node, &cpr->blocks);
- cpr->sz += CALL_PATH_BLOCK_SIZE;
- }
-
- n = cpr->next++ & CALL_PATH_BLOCK_MASK;
- cp = &cpb->cp[n];
-
- call_path__init(cp, parent, sym, ip, in_kernel);
-
- return cp;
-}
-
-static struct call_path *call_path__findnew(struct call_path_root *cpr,
- struct call_path *parent,
- struct symbol *sym, u64 ip, u64 ks)
-{
- struct rb_node **p;
- struct rb_node *node_parent = NULL;
- struct call_path *cp;
- bool in_kernel = ip >= ks;
-
- if (sym)
- ip = 0;
-
- if (!parent)
- return call_path__new(cpr, parent, sym, ip, in_kernel);
-
- p = &parent->children.rb_node;
- while (*p != NULL) {
- node_parent = *p;
- cp = rb_entry(node_parent, struct call_path, rb_node);
-
- if (cp->sym == sym && cp->ip == ip)
- return cp;
-
- if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- cp = call_path__new(cpr, parent, sym, ip, in_kernel);
- if (!cp)
- return NULL;
-
- rb_link_node(&cp->rb_node, node_parent, p);
- rb_insert_color(&cp->rb_node, &parent->children);
-
- return cp;
-}
-
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, void *data),
void *data)
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1528f1..ad44c79 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -19,17 +19,16 @@
#include <sys/types.h>
#include <linux/types.h>
-#include <linux/rbtree.h>
struct thread;
struct comm;
struct ip_callchain;
struct symbol;
struct dso;
-struct call_return_processor;
struct comm;
struct perf_sample;
struct addr_location;
+struct call_path;
/*
* Call/Return flags.
@@ -69,26 +68,16 @@
};
/**
- * struct call_path - node in list of calls leading to a function call.
- * @parent: call path to the parent function call
- * @sym: symbol of function called
- * @ip: only if sym is null, the ip of the function
- * @db_id: id used for db-export
- * @in_kernel: whether function is a in the kernel
- * @rb_node: node in parent's tree of called functions
- * @children: tree of call paths of functions called
- *
- * In combination with the call_return structure, the call_path structure
- * defines a context-sensitve call-graph.
+ * struct call_return_processor - provides a call-back to consume call-return
+ * information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
*/
-struct call_path {
- struct call_path *parent;
- struct symbol *sym;
- u64 ip;
- u64 db_id;
- bool in_kernel;
- struct rb_node rb_node;
- struct rb_root children;
+struct call_return_processor {
+ struct call_path_root *cpr;
+ int (*process)(struct call_return *cr, void *data);
+ void *data;
};
int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index dfd00c6..45fcb71 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -10,6 +10,8 @@
#include "comm.h"
#include "unwind.h"
+#include <api/fs/fs.h>
+
int thread__init_map_groups(struct thread *thread, struct machine *machine)
{
struct thread *leader;
@@ -153,6 +155,23 @@
return 0;
}
+int thread__set_comm_from_proc(struct thread *thread)
+{
+ char path[64];
+ char *comm = NULL;
+ size_t sz;
+ int err = -1;
+
+ if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+ thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+ procfs__read_str(path, &comm, &sz) == 0) {
+ comm[sz - 1] = '\0';
+ err = thread__set_comm(thread, comm, 0);
+ }
+
+ return err;
+}
+
const char *thread__comm_str(const struct thread *thread)
{
const struct comm *comm = thread__comm(thread);
@@ -233,7 +252,7 @@
struct addr_location *al)
{
size_t i;
- const u8 const cpumodes[] = {
+ const u8 cpumodes[] = {
PERF_RECORD_MISC_USER,
PERF_RECORD_MISC_KERNEL,
PERF_RECORD_MISC_GUEST_USER,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index a0ac031..45fba13 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -9,6 +9,9 @@
#include "symbol.h"
#include <strlist.h>
#include <intlist.h>
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#include <libunwind.h>
+#endif
struct thread_stack;
@@ -32,6 +35,9 @@
void *priv;
struct thread_stack *ts;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+ unw_addr_space_t addr_space;
+#endif
};
struct machine;
@@ -65,6 +71,8 @@
return __thread__set_comm(thread, comm, timestamp, false);
}
+int thread__set_comm_from_proc(struct thread *thread);
+
int thread__comm_len(struct thread *thread);
struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 267112b..5654fe1 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -260,7 +260,7 @@
return threads;
}
-static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
{
struct thread_map *threads = NULL, *nt;
int ntasks = 0;
@@ -436,3 +436,15 @@
return threads;
}
+
+bool thread_map__has(struct thread_map *threads, pid_t pid)
+{
+ int i;
+
+ for (i = 0; i < threads->nr; ++i) {
+ if (threads->map[i].pid == pid)
+ return true;
+ }
+
+ return false;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 85e4c7c..bd3b971 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -31,6 +31,8 @@
struct thread_map *thread_map__new_str(const char *pid,
const char *tid, uid_t uid);
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
static inline int thread_map__nr(struct thread_map *threads)
@@ -55,4 +57,5 @@
}
void thread_map__read_comms(struct thread_map *threads);
+bool thread_map__has(struct thread_map *threads, pid_t pid);
#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 55de4cf..ac2590a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -57,6 +57,7 @@
id_index,
auxtrace_info,
auxtrace_error,
+ time_conv,
thread_map,
cpu_map,
stat_config,
diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
new file mode 100644
index 0000000..e97d701
--- /dev/null
+++ b/tools/perf/util/trigger.h
@@ -0,0 +1,94 @@
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "util/debug.h"
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ * OFF--(on)--> READY --(hit)--> HIT
+ * ^ |
+ * | (ready)
+ * | |
+ * \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+ volatile enum {
+ TRIGGER_ERROR = -2,
+ TRIGGER_OFF = -1,
+ TRIGGER_READY = 0,
+ TRIGGER_HIT = 1,
+ } state;
+ const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+ WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+ t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+ return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+ return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+ TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+ t->state = TRIGGER_READY;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+ t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+ t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+ return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+ return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
index a8b78f1..d5b11e2 100644
--- a/tools/perf/util/tsc.h
+++ b/tools/perf/util/tsc.h
@@ -3,10 +3,29 @@
#include <linux/types.h>
-#include "../arch/x86/util/tsc.h"
+#include "event.h"
+
+struct perf_tsc_conversion {
+ u16 time_shift;
+ u32 time_mult;
+ u64 time_zero;
+};
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc);
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
u64 rdtsc(void);
+struct perf_event_mmap_page;
+struct perf_tool;
+struct machine;
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+
#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index ee7e372..63687d3 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -32,6 +32,7 @@
#include "symbol.h"
#include "util.h"
#include "debug.h"
+#include "asm/bug.h"
extern int
UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
@@ -580,43 +581,33 @@
int unwind__prepare_access(struct thread *thread)
{
- unw_addr_space_t addr_space;
-
if (callchain_param.record_mode != CALLCHAIN_DWARF)
return 0;
- addr_space = unw_create_addr_space(&accessors, 0);
- if (!addr_space) {
+ thread->addr_space = unw_create_addr_space(&accessors, 0);
+ if (!thread->addr_space) {
pr_err("unwind: Can't create unwind address space.\n");
return -ENOMEM;
}
- unw_set_caching_policy(addr_space, UNW_CACHE_GLOBAL);
- thread__set_priv(thread, addr_space);
-
+ unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
return 0;
}
void unwind__flush_access(struct thread *thread)
{
- unw_addr_space_t addr_space;
-
if (callchain_param.record_mode != CALLCHAIN_DWARF)
return;
- addr_space = thread__priv(thread);
- unw_flush_cache(addr_space, 0, 0);
+ unw_flush_cache(thread->addr_space, 0, 0);
}
void unwind__finish_access(struct thread *thread)
{
- unw_addr_space_t addr_space;
-
if (callchain_param.record_mode != CALLCHAIN_DWARF)
return;
- addr_space = thread__priv(thread);
- unw_destroy_addr_space(addr_space);
+ unw_destroy_addr_space(thread->addr_space);
}
static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
@@ -639,7 +630,9 @@
* unwind itself.
*/
if (max_stack - 1 > 0) {
- addr_space = thread__priv(ui->thread);
+ WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+ addr_space = ui->thread->addr_space;
+
if (addr_space == NULL)
return -1;
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index b7766c5..eab077a 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -33,6 +33,8 @@
unsigned int page_size;
int cacheline_size;
+unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+
bool test_attr__enabled;
bool perf_host = true;
@@ -117,6 +119,40 @@
return rmdir(path);
}
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+ return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+ bool (*filter)(const char *, struct dirent *))
+{
+ struct strlist *list = NULL;
+ DIR *dir;
+ struct dirent *d;
+
+ dir = opendir(name);
+ if (!dir)
+ return NULL;
+
+ list = strlist__new(NULL, NULL);
+ if (!list) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ while ((d = readdir(dir)) != NULL) {
+ if (!filter || filter(name, d))
+ strlist__add(list, d->d_name);
+ }
+
+out:
+ closedir(dir);
+ return list;
+}
+
static int slow_copyfile(const char *from, const char *to)
{
int err = -1;
@@ -471,7 +507,6 @@
"needed for --call-graph fp\n");
break;
-#ifdef HAVE_DWARF_UNWIND_SUPPORT
/* Dwarf style */
} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
const unsigned long default_stack_dump_size = 8192;
@@ -487,7 +522,6 @@
ret = get_stack_size(tok, &size);
param->dump_size = size;
}
-#endif /* HAVE_DWARF_UNWIND_SUPPORT */
} else if (!strncmp(name, "lbr", sizeof("lbr"))) {
if (!strtok_r(NULL, ",", &saveptr)) {
param->record_mode = CALLCHAIN_LBR;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 8298d60..7651633 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -79,6 +79,7 @@
#include <termios.h>
#include <linux/bitops.h>
#include <termios.h>
+#include "strlist.h"
extern const char *graph_line;
extern const char *graph_dotted_line;
@@ -159,12 +160,6 @@
}
#endif
-/*
- * Wrappers:
- */
-void *xrealloc(void *ptr, size_t size) __attribute__((weak));
-
-
static inline void *zalloc(size_t size)
{
return calloc(1, size);
@@ -222,6 +217,8 @@
int mkdir_p(char *path, mode_t mode);
int rm_rf(char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
int copyfile(const char *from, const char *to);
int copyfile_mode(const char *from, const char *to, mode_t mode);
int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
@@ -254,11 +251,17 @@
char *ltrim(char *s);
char *rtrim(char *s);
+static inline char *trim(char *s)
+{
+ return ltrim(rtrim(s));
+}
+
void dump_stack(void);
void sighandler_dump_stack(int sig);
extern unsigned int page_size;
extern int cacheline_size;
+extern unsigned int sysctl_perf_event_max_stack;
struct parse_tag {
char tag;
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
deleted file mode 100644
index 5f1a07c..0000000
--- a/tools/perf/util/wrapper.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Various trivial helper wrappers around standard functions
- */
-#include "cache.h"
-
-/*
- * There's no pack memory to release - but stay close to the Git
- * version so wrap this away:
- */
-static inline void release_pack_memory(size_t size __maybe_unused,
- int flag __maybe_unused)
-{
-}
-
-void *xrealloc(void *ptr, size_t size)
-{
- void *ret = realloc(ptr, size);
- if (!ret && !size)
- ret = realloc(ptr, 1);
- if (!ret) {
- release_pack_memory(size, -1);
- ret = realloc(ptr, size);
- if (!ret && !size)
- ret = realloc(ptr, 1);
- if (!ret)
- die("Out of memory, realloc failed");
- }
- return ret;
-}
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index d0e6b85..546cf4a 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -91,7 +91,7 @@
char *signature,
u32 instance,
struct acpi_table_header **table,
- acpi_physical_address * address);
+ acpi_physical_address *address);
static acpi_status osl_list_bios_tables(void);
@@ -99,7 +99,7 @@
osl_get_bios_table(char *signature,
u32 instance,
struct acpi_table_header **table,
- acpi_physical_address * address);
+ acpi_physical_address *address);
static acpi_status osl_get_last_status(acpi_status default_status);
@@ -187,7 +187,7 @@
acpi_status
acpi_os_get_table_by_address(acpi_physical_address address,
- struct acpi_table_header ** table)
+ struct acpi_table_header **table)
{
u32 table_length;
struct acpi_table_header *mapped_table;
@@ -252,8 +252,8 @@
acpi_status
acpi_os_get_table_by_name(char *signature,
u32 instance,
- struct acpi_table_header ** table,
- acpi_physical_address * address)
+ struct acpi_table_header **table,
+ acpi_physical_address *address)
{
acpi_status status;
@@ -380,8 +380,8 @@
acpi_status
acpi_os_get_table_by_index(u32 index,
- struct acpi_table_header ** table,
- u32 *instance, acpi_physical_address * address)
+ struct acpi_table_header **table,
+ u32 *instance, acpi_physical_address *address)
{
struct osl_table_info *info;
acpi_status status;
@@ -447,7 +447,7 @@
}
}
- return ((acpi_physical_address) (address));
+ return ((acpi_physical_address)(address));
}
/******************************************************************************
@@ -751,10 +751,10 @@
for (i = 0; i < number_of_tables; ++i, table_data += item_size) {
if (osl_can_use_xsdt()) {
table_address =
- (acpi_physical_address) (*ACPI_CAST64(table_data));
+ (acpi_physical_address)(*ACPI_CAST64(table_data));
} else {
table_address =
- (acpi_physical_address) (*ACPI_CAST32(table_data));
+ (acpi_physical_address)(*ACPI_CAST32(table_data));
}
/* Skip NULL entries in RSDT/XSDT */
@@ -800,7 +800,7 @@
osl_get_bios_table(char *signature,
u32 instance,
struct acpi_table_header **table,
- acpi_physical_address * address)
+ acpi_physical_address *address)
{
struct acpi_table_header *local_table = NULL;
struct acpi_table_header *mapped_table = NULL;
@@ -833,38 +833,37 @@
if ((gbl_fadt->header.length >= MIN_FADT_FOR_XDSDT) &&
gbl_fadt->Xdsdt) {
table_address =
- (acpi_physical_address) gbl_fadt->Xdsdt;
+ (acpi_physical_address)gbl_fadt->Xdsdt;
} else
if ((gbl_fadt->header.length >= MIN_FADT_FOR_DSDT)
&& gbl_fadt->dsdt) {
table_address =
- (acpi_physical_address) gbl_fadt->dsdt;
+ (acpi_physical_address)gbl_fadt->dsdt;
}
} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
if ((gbl_fadt->header.length >= MIN_FADT_FOR_XFACS) &&
gbl_fadt->Xfacs) {
table_address =
- (acpi_physical_address) gbl_fadt->Xfacs;
+ (acpi_physical_address)gbl_fadt->Xfacs;
} else
if ((gbl_fadt->header.length >= MIN_FADT_FOR_FACS)
&& gbl_fadt->facs) {
table_address =
- (acpi_physical_address) gbl_fadt->facs;
+ (acpi_physical_address)gbl_fadt->facs;
}
} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) {
if (!gbl_revision) {
return (AE_BAD_SIGNATURE);
}
table_address =
- (acpi_physical_address) gbl_rsdp.
+ (acpi_physical_address)gbl_rsdp.
xsdt_physical_address;
} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) {
table_address =
- (acpi_physical_address) gbl_rsdp.
+ (acpi_physical_address)gbl_rsdp.
rsdt_physical_address;
} else {
- table_address =
- (acpi_physical_address) gbl_rsdp_address;
+ table_address = (acpi_physical_address)gbl_rsdp_address;
signature = ACPI_SIG_RSDP;
}
@@ -904,12 +903,12 @@
for (i = 0; i < number_of_tables; ++i, table_data += item_size) {
if (osl_can_use_xsdt()) {
table_address =
- (acpi_physical_address) (*ACPI_CAST64
- (table_data));
+ (acpi_physical_address)(*ACPI_CAST64
+ (table_data));
} else {
table_address =
- (acpi_physical_address) (*ACPI_CAST32
- (table_data));
+ (acpi_physical_address)(*ACPI_CAST32
+ (table_data));
}
/* Skip NULL entries in RSDT/XSDT */
@@ -1301,7 +1300,7 @@
char *signature,
u32 instance,
struct acpi_table_header **table,
- acpi_physical_address * address)
+ acpi_physical_address *address)
{
void *table_dir;
u32 current_instance = 0;
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 3818fd0..cbfbce1 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -54,7 +54,7 @@
#ifndef O_BINARY
#define O_BINARY 0
#endif
-#if defined(_dragon_fly) || defined(_free_BSD)
+#if defined(_dragon_fly) || defined(_free_BSD) || defined(_QNX)
#define MMAP_FLAGS MAP_SHARED
#else
#define MMAP_FLAGS MAP_PRIVATE
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 08cb8b2..88aa66e 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -246,8 +246,8 @@
*****************************************************************************/
acpi_status
-acpi_os_predefined_override(const struct acpi_predefined_names * init_val,
- acpi_string * new_val)
+acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
+ acpi_string *new_val)
{
if (!init_val || !new_val) {
@@ -274,8 +274,8 @@
*****************************************************************************/
acpi_status
-acpi_os_table_override(struct acpi_table_header * existing_table,
- struct acpi_table_header ** new_table)
+acpi_os_table_override(struct acpi_table_header *existing_table,
+ struct acpi_table_header **new_table)
{
if (!existing_table || !new_table) {
@@ -311,8 +311,8 @@
*****************************************************************************/
acpi_status
-acpi_os_physical_table_override(struct acpi_table_header * existing_table,
- acpi_physical_address * new_address,
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *new_address,
u32 *new_table_length)
{
@@ -506,7 +506,7 @@
void *acpi_os_map_memory(acpi_physical_address where, acpi_size length)
{
- return (ACPI_TO_POINTER((acpi_size) where));
+ return (ACPI_TO_POINTER((acpi_size)where));
}
/******************************************************************************
@@ -603,9 +603,9 @@
acpi_status
acpi_os_create_semaphore(u32 max_units,
- u32 initial_units, acpi_handle * out_handle)
+ u32 initial_units, acpi_handle *out_handle)
{
- *out_handle = (acpi_handle) 1;
+ *out_handle = (acpi_handle)1;
return (AE_OK);
}
@@ -640,7 +640,7 @@
acpi_status
acpi_os_create_semaphore(u32 max_units,
- u32 initial_units, acpi_handle * out_handle)
+ u32 initial_units, acpi_handle *out_handle)
{
sem_t *sem;
@@ -672,7 +672,7 @@
}
#endif
- *out_handle = (acpi_handle) sem;
+ *out_handle = (acpi_handle)sem;
return (AE_OK);
}
@@ -1035,7 +1035,7 @@
*****************************************************************************/
acpi_status
-acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id,
+acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id,
u32 pci_register, u64 value, u32 width)
{
diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c
index d070fcc..a88ac45 100644
--- a/tools/power/acpi/tools/acpidbg/acpidbg.c
+++ b/tools/power/acpi/tools/acpidbg/acpidbg.c
@@ -375,7 +375,7 @@
int main(int argc, char **argv)
{
- int fd = 0;
+ int fd = -1;
int ch;
int len;
int ret = EXIT_SUCCESS;
@@ -430,7 +430,7 @@
acpi_aml_loop(fd);
exit:
- if (fd < 0)
+ if (fd >= 0)
close(fd);
if (acpi_aml_batch_cmd)
free(acpi_aml_batch_cmd);
diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile
index 8d76157..2942cdc 100644
--- a/tools/power/acpi/tools/acpidump/Makefile
+++ b/tools/power/acpi/tools/acpidump/Makefile
@@ -31,6 +31,7 @@
osunixxf.o\
tbprint.o\
tbxfroot.o\
+ utascii.o\
utbuffer.o\
utdebug.o\
utexcep.o\
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index da44458..fb8f1d9 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -68,7 +68,7 @@
/* Make sure signature is all ASCII and a valid ACPI name */
- if (!acpi_ut_valid_acpi_name(table->signature)) {
+ if (!acpi_ut_valid_nameseg(table->signature)) {
acpi_log_error("Table signature (0x%8.8X) is invalid\n",
*(u32 *)table->signature);
return (FALSE);
@@ -286,14 +286,15 @@
/* Convert argument to an integer physical address */
- status = acpi_ut_strtoul64(ascii_address, 0, &long_address);
+ status = acpi_ut_strtoul64(ascii_address, ACPI_ANY_BASE,
+ ACPI_MAX64_BYTE_WIDTH, &long_address);
if (ACPI_FAILURE(status)) {
acpi_log_error("%s: Could not convert to a physical address\n",
ascii_address);
return (-1);
}
- address = (acpi_physical_address) long_address;
+ address = (acpi_physical_address)long_address;
status = acpi_os_get_table_by_address(address, &table);
if (ACPI_FAILURE(status)) {
acpi_log_error("Could not get table at 0x%8.8X%8.8X, %s\n",
@@ -406,6 +407,12 @@
return (-1);
}
+ if (!acpi_ut_valid_nameseg(table->signature)) {
+ acpi_log_error
+ ("No valid ACPI signature was found in input file %s\n",
+ pathname);
+ }
+
/* File must be at least as long as the table length */
if (table->length > file_size) {
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index c3c0915..7692e6b 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -209,7 +209,8 @@
case 'r': /* Dump tables from specified RSDP */
status =
- acpi_ut_strtoul64(acpi_gbl_optarg, 0,
+ acpi_ut_strtoul64(acpi_gbl_optarg, ACPI_ANY_BASE,
+ ACPI_MAX64_BYTE_WIDTH,
&gbl_rsdp_base);
if (ACPI_FAILURE(status)) {
acpi_log_error
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 0adaf0c..8358863 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -63,7 +63,7 @@
# and _should_ modify the PACKAGE_BUGREPORT definition
VERSION= $(shell ./utils/version-gen.sh)
-LIB_MAJ= 0.0.0
+LIB_MAJ= 0.0.1
LIB_MIN= 0
PACKAGE = cpupower
@@ -129,7 +129,7 @@
CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
-DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE
-UTIL_OBJS = utils/helpers/amd.o utils/helpers/topology.o utils/helpers/msr.o \
+UTIL_OBJS = utils/helpers/amd.o utils/helpers/msr.o \
utils/helpers/sysfs.o utils/helpers/misc.o utils/helpers/cpuid.o \
utils/helpers/pci.o utils/helpers/bitmask.o \
utils/idle_monitor/nhm_idle.o utils/idle_monitor/snb_idle.o \
@@ -148,9 +148,9 @@
utils/helpers/bitmask.h \
utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
-LIB_HEADERS = lib/cpufreq.h lib/sysfs.h
-LIB_SRC = lib/cpufreq.c lib/sysfs.c
-LIB_OBJS = lib/cpufreq.o lib/sysfs.o
+LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
+LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
+LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS))
CFLAGS += -pipe
@@ -280,6 +280,7 @@
$(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/
$(INSTALL) -d $(DESTDIR)${includedir}
$(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h
+ $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h
install-tools:
$(INSTALL) -d $(DESTDIR)${bindir}
@@ -315,6 +316,7 @@
uninstall:
- rm -f $(DESTDIR)${libdir}/libcpupower.*
- rm -f $(DESTDIR)${includedir}/cpufreq.h
+ - rm -f $(DESTDIR)${includedir}/cpuidle.h
- rm -f $(DESTDIR)${bindir}/utils/cpupower
- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
index d0f879b..3e59f1a 100644
--- a/tools/power/cpupower/bench/Makefile
+++ b/tools/power/cpupower/bench/Makefile
@@ -22,7 +22,7 @@
$(OUTPUT)cpufreq-bench: $(OBJS)
$(ECHO) " CC " $@
- $(QUIET) $(CC) -o $@ $(CFLAGS) $(OBJS) $(LIBS)
+ $(QUIET) $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS)
all: $(OUTPUT)cpufreq-bench
diff --git a/tools/power/cpupower/bench/README-BENCH b/tools/power/cpupower/bench/README-BENCH
index 8093ec7..97727ae 100644
--- a/tools/power/cpupower/bench/README-BENCH
+++ b/tools/power/cpupower/bench/README-BENCH
@@ -113,7 +113,7 @@
-c, --cpu=<unsigned int> CPU Number to use, starting at 0
-p, --prio=<priority> scheduler priority, HIGH, LOW or DEFAULT
-g, --governor=<governor> cpufreq governor to test
--n, --cycles=<int> load/sleep cycles to get an avarage value to compare
+-n, --cycles=<int> load/sleep cycles to get an average value to compare
-r, --rounds<int> load/sleep rounds
-f, --file=<configfile> config file to use
-o, --output=<dir> output dir, must exist
diff --git a/tools/power/cpupower/bench/benchmark.c b/tools/power/cpupower/bench/benchmark.c
index 81b1c48..429d51a 100644
--- a/tools/power/cpupower/bench/benchmark.c
+++ b/tools/power/cpupower/bench/benchmark.c
@@ -130,7 +130,7 @@
_round, load_time, sleep_time);
if (config->verbose)
- printf("avarage: %lius, rps:%li\n",
+ printf("average: %lius, rps:%li\n",
load_time / calculations,
1000000 * calculations / load_time);
@@ -177,7 +177,7 @@
progress_time += sleep_time + load_time;
- /* compare the avarage sleep/load cycles */
+ /* compare the average sleep/load cycles */
fprintf(config->output, "%li ",
powersave_time / config->cycles);
fprintf(config->output, "%.3f\n",
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index f503fb5..9b65f05 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -65,7 +65,7 @@
{
FILE *output = NULL;
int len;
- char *filename;
+ char *filename, *filename_tmp;
struct utsname sysdata;
DIR *dir;
@@ -81,16 +81,22 @@
len = strlen(dirname) + 30;
filename = malloc(sizeof(char) * len);
+ if (!filename) {
+ perror("malloc");
+ goto out_dir;
+ }
if (uname(&sysdata) == 0) {
len += strlen(sysdata.nodename) + strlen(sysdata.release);
- filename = realloc(filename, sizeof(char) * len);
+ filename_tmp = realloc(filename, sizeof(*filename) * len);
- if (filename == NULL) {
+ if (filename_tmp == NULL) {
+ free(filename);
perror("realloc");
- return NULL;
+ goto out_dir;
}
+ filename = filename_tmp;
snprintf(filename, len - 1, "%s/benchmark_%s_%s_%li.log",
dirname, sysdata.nodename, sysdata.release, time(NULL));
} else {
@@ -104,12 +110,16 @@
if (output == NULL) {
perror("fopen");
fprintf(stderr, "error: unable to open logfile\n");
+ goto out;
}
fprintf(stdout, "Logfile: %s\n", filename);
- free(filename);
fprintf(output, "#round load sleep performance powersave percentage\n");
+out:
+ free(filename);
+out_dir:
+ closedir(dir);
return output;
}
diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c
index f01e3f4..c25a74a 100644
--- a/tools/power/cpupower/bench/system.c
+++ b/tools/power/cpupower/bench/system.c
@@ -26,6 +26,7 @@
#include <sched.h>
#include <cpufreq.h>
+#include <cpupower.h>
#include "config.h"
#include "system.h"
@@ -60,7 +61,7 @@
dprintf("set %s as cpufreq governor\n", governor);
- if (cpufreq_cpu_exists(cpu) != 0) {
+ if (cpupower_is_cpu_online(cpu) != 0) {
perror("cpufreq_cpu_exists");
fprintf(stderr, "error: cpu %u does not exist\n", cpu);
return -1;
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index d961101..1b993fe 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -9,28 +9,190 @@
#include <errno.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
#include "cpufreq.h"
-#include "sysfs.h"
+#include "cpupower_intern.h"
-int cpufreq_cpu_exists(unsigned int cpu)
+/* CPUFREQ sysfs access **************************************************/
+
+/* helper function to read file from /sys into given buffer */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
+ char *buf, size_t buflen)
{
- return sysfs_cpu_exists(cpu);
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+ cpu, fname);
+ return sysfs_read_file(path, buf, buflen);
}
+/* helper function to write a new value to a /sys file */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
+ const char *fname,
+ const char *value, size_t len)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numwrite;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+ cpu, fname);
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwrite = write(fd, value, len);
+ if (numwrite < 1) {
+ close(fd);
+ return 0;
+ }
+
+ close(fd);
+
+ return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum cpufreq_value {
+ CPUINFO_CUR_FREQ,
+ CPUINFO_MIN_FREQ,
+ CPUINFO_MAX_FREQ,
+ CPUINFO_LATENCY,
+ SCALING_CUR_FREQ,
+ SCALING_MIN_FREQ,
+ SCALING_MAX_FREQ,
+ STATS_NUM_TRANSITIONS,
+ MAX_CPUFREQ_VALUE_READ_FILES
+};
+
+static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
+ [CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
+ [CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
+ [CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
+ [CPUINFO_LATENCY] = "cpuinfo_transition_latency",
+ [SCALING_CUR_FREQ] = "scaling_cur_freq",
+ [SCALING_MIN_FREQ] = "scaling_min_freq",
+ [SCALING_MAX_FREQ] = "scaling_max_freq",
+ [STATS_NUM_TRANSITIONS] = "stats/total_trans"
+};
+
+
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+ enum cpufreq_value which)
+{
+ unsigned long value;
+ unsigned int len;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+
+ if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+ return 0;
+
+ len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
+ linebuf, sizeof(linebuf));
+
+ if (len == 0)
+ return 0;
+
+ value = strtoul(linebuf, &endp, 0);
+
+ if (endp == linebuf || errno == ERANGE)
+ return 0;
+
+ return value;
+}
+
+/* read access to files which contain one string */
+
+enum cpufreq_string {
+ SCALING_DRIVER,
+ SCALING_GOVERNOR,
+ MAX_CPUFREQ_STRING_FILES
+};
+
+static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
+ [SCALING_DRIVER] = "scaling_driver",
+ [SCALING_GOVERNOR] = "scaling_governor",
+};
+
+
+static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
+ enum cpufreq_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_CPUFREQ_STRING_FILES)
+ return NULL;
+
+ len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+/* write access */
+
+enum cpufreq_write {
+ WRITE_SCALING_MIN_FREQ,
+ WRITE_SCALING_MAX_FREQ,
+ WRITE_SCALING_GOVERNOR,
+ WRITE_SCALING_SET_SPEED,
+ MAX_CPUFREQ_WRITE_FILES
+};
+
+static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
+ [WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
+ [WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
+ [WRITE_SCALING_GOVERNOR] = "scaling_governor",
+ [WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
+};
+
+static int sysfs_cpufreq_write_one_value(unsigned int cpu,
+ enum cpufreq_write which,
+ const char *new_value, size_t len)
+{
+ if (which >= MAX_CPUFREQ_WRITE_FILES)
+ return 0;
+
+ if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
+ new_value, len) != len)
+ return -ENODEV;
+
+ return 0;
+};
+
unsigned long cpufreq_get_freq_kernel(unsigned int cpu)
{
- return sysfs_get_freq_kernel(cpu);
+ return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
}
unsigned long cpufreq_get_freq_hardware(unsigned int cpu)
{
- return sysfs_get_freq_hardware(cpu);
+ return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
}
unsigned long cpufreq_get_transition_latency(unsigned int cpu)
{
- return sysfs_get_freq_transition_latency(cpu);
+ return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
}
int cpufreq_get_hardware_limits(unsigned int cpu,
@@ -39,12 +201,21 @@
{
if ((!min) || (!max))
return -EINVAL;
- return sysfs_get_freq_hardware_limits(cpu, min, max);
+
+ *min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
+ if (!*min)
+ return -ENODEV;
+
+ *max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
+ if (!*max)
+ return -ENODEV;
+
+ return 0;
}
char *cpufreq_get_driver(unsigned int cpu)
{
- return sysfs_get_freq_driver(cpu);
+ return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
}
void cpufreq_put_driver(char *ptr)
@@ -56,7 +227,26 @@
struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu)
{
- return sysfs_get_freq_policy(cpu);
+ struct cpufreq_policy *policy;
+
+ policy = malloc(sizeof(struct cpufreq_policy));
+ if (!policy)
+ return NULL;
+
+ policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
+ if (!policy->governor) {
+ free(policy);
+ return NULL;
+ }
+ policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+ policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
+ if ((!policy->min) || (!policy->max)) {
+ free(policy->governor);
+ free(policy);
+ return NULL;
+ }
+
+ return policy;
}
void cpufreq_put_policy(struct cpufreq_policy *policy)
@@ -72,7 +262,57 @@
struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned
int cpu)
{
- return sysfs_get_freq_available_governors(cpu);
+ struct cpufreq_available_governors *first = NULL;
+ struct cpufreq_available_governors *current = NULL;
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ current->governor = malloc(i - pos + 1);
+ if (!current->governor)
+ goto error_out;
+
+ memcpy(current->governor, linebuf + pos, i - pos);
+ current->governor[i - pos] = '\0';
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ if (first->governor)
+ free(first->governor);
+ free(first);
+ first = current;
+ }
+ return NULL;
}
void cpufreq_put_available_governors(struct cpufreq_available_governors *any)
@@ -96,7 +336,57 @@
struct cpufreq_available_frequencies
*cpufreq_get_available_frequencies(unsigned int cpu)
{
- return sysfs_get_available_frequencies(cpu);
+ struct cpufreq_available_frequencies *first = NULL;
+ struct cpufreq_available_frequencies *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if (i - pos >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+ if (sscanf(one_value, "%lu", ¤t->frequency) != 1)
+ goto error_out;
+
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
}
void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies
@@ -114,10 +404,65 @@
}
}
+static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
+ const char *file)
+{
+ struct cpufreq_affected_cpus *first = NULL;
+ struct cpufreq_affected_cpus *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 1)
+ continue;
+ if (i - pos >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+
+ if (sscanf(one_value, "%u", ¤t->cpu) != 1)
+ goto error_out;
+
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
+}
struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned int cpu)
{
- return sysfs_get_freq_affected_cpus(cpu);
+ return sysfs_get_cpu_list(cpu, "affected_cpus");
}
void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any)
@@ -138,7 +483,7 @@
struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned int cpu)
{
- return sysfs_get_freq_related_cpus(cpu);
+ return sysfs_get_cpu_list(cpu, "related_cpus");
}
void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any)
@@ -146,45 +491,208 @@
cpufreq_put_affected_cpus(any);
}
+static int verify_gov(char *new_gov, char *passed_gov)
+{
+ unsigned int i, j = 0;
+
+ if (!passed_gov || (strlen(passed_gov) > 19))
+ return -EINVAL;
+
+ strncpy(new_gov, passed_gov, 20);
+ for (i = 0; i < 20; i++) {
+ if (j) {
+ new_gov[i] = '\0';
+ continue;
+ }
+ if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
+ continue;
+
+ if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
+ continue;
+
+ if (new_gov[i] == '-')
+ continue;
+
+ if (new_gov[i] == '_')
+ continue;
+
+ if (new_gov[i] == '\0') {
+ j = 1;
+ continue;
+ }
+ return -EINVAL;
+ }
+ new_gov[19] = '\0';
+ return 0;
+}
int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy)
{
+ char min[SYSFS_PATH_MAX];
+ char max[SYSFS_PATH_MAX];
+ char gov[SYSFS_PATH_MAX];
+ int ret;
+ unsigned long old_min;
+ int write_max_first;
+
if (!policy || !(policy->governor))
return -EINVAL;
- return sysfs_set_freq_policy(cpu, policy);
+ if (policy->max < policy->min)
+ return -EINVAL;
+
+ if (verify_gov(gov, policy->governor))
+ return -EINVAL;
+
+ snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
+ snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
+
+ old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+ write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
+
+ if (write_max_first) {
+ ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+ max, strlen(max));
+ if (ret)
+ return ret;
+ }
+
+ ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
+ strlen(min));
+ if (ret)
+ return ret;
+
+ if (!write_max_first) {
+ ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+ max, strlen(max));
+ if (ret)
+ return ret;
+ }
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+ gov, strlen(gov));
}
int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq)
{
- return sysfs_modify_freq_policy_min(cpu, min_freq);
+ char value[SYSFS_PATH_MAX];
+
+ snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
+ value, strlen(value));
}
int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq)
{
- return sysfs_modify_freq_policy_max(cpu, max_freq);
-}
+ char value[SYSFS_PATH_MAX];
+ snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+ value, strlen(value));
+}
int cpufreq_modify_policy_governor(unsigned int cpu, char *governor)
{
+ char new_gov[SYSFS_PATH_MAX];
+
if ((!governor) || (strlen(governor) > 19))
return -EINVAL;
- return sysfs_modify_freq_policy_governor(cpu, governor);
+ if (verify_gov(new_gov, governor))
+ return -EINVAL;
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+ new_gov, strlen(new_gov));
}
int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency)
{
- return sysfs_set_frequency(cpu, target_frequency);
+ struct cpufreq_policy *pol = cpufreq_get_policy(cpu);
+ char userspace_gov[] = "userspace";
+ char freq[SYSFS_PATH_MAX];
+ int ret;
+
+ if (!pol)
+ return -ENODEV;
+
+ if (strncmp(pol->governor, userspace_gov, 9) != 0) {
+ ret = cpufreq_modify_policy_governor(cpu, userspace_gov);
+ if (ret) {
+ cpufreq_put_policy(pol);
+ return ret;
+ }
+ }
+
+ cpufreq_put_policy(pol);
+
+ snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
+ freq, strlen(freq));
}
struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
unsigned long long *total_time)
{
- return sysfs_get_freq_stats(cpu, total_time);
+ struct cpufreq_stats *first = NULL;
+ struct cpufreq_stats *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ *total_time = 0;
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (i == strlen(linebuf) || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if ((i - pos) >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+ if (sscanf(one_value, "%lu %llu",
+ ¤t->frequency,
+ ¤t->time_in_state) != 2)
+ goto error_out;
+
+ *total_time = *total_time + current->time_in_state;
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
}
void cpufreq_put_stats(struct cpufreq_stats *any)
@@ -204,5 +712,5 @@
unsigned long cpufreq_get_transitions(unsigned int cpu)
{
- return sysfs_get_freq_transitions(cpu);
+ return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
}
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 3aae8e7..3b005c3 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -17,8 +17,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#ifndef _CPUFREQ_H
-#define _CPUFREQ_H 1
+#ifndef __CPUPOWER_CPUFREQ_H__
+#define __CPUPOWER_CPUFREQ_H__
struct cpufreq_policy {
unsigned long min;
@@ -58,13 +58,6 @@
extern "C" {
#endif
-/*
- * returns 0 if the specified CPU is present (it doesn't say
- * whether it is online!), and an error value if not.
- */
-
-extern int cpufreq_cpu_exists(unsigned int cpu);
-
/* determine current CPU frequency
* - _kernel variant means kernel's opinion of CPU frequency
* - _hardware variant means actual hardware CPU frequency,
@@ -73,9 +66,9 @@
* returns 0 on failure, else frequency in kHz.
*/
-extern unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
-extern unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
@@ -84,7 +77,7 @@
*
* returns 0 on failure, else transition latency in 10^(-9) s = nanoseconds
*/
-extern unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
/* determine hardware CPU frequency limits
@@ -93,7 +86,7 @@
* considerations by cpufreq policy notifiers in the kernel.
*/
-extern int cpufreq_get_hardware_limits(unsigned int cpu,
+int cpufreq_get_hardware_limits(unsigned int cpu,
unsigned long *min,
unsigned long *max);
@@ -104,9 +97,9 @@
* to avoid memory leakage, please.
*/
-extern char *cpufreq_get_driver(unsigned int cpu);
+char *cpufreq_get_driver(unsigned int cpu);
-extern void cpufreq_put_driver(char *ptr);
+void cpufreq_put_driver(char *ptr);
/* determine CPUfreq policy currently used
@@ -116,9 +109,9 @@
*/
-extern struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
-extern void cpufreq_put_policy(struct cpufreq_policy *policy);
+void cpufreq_put_policy(struct cpufreq_policy *policy);
/* determine CPUfreq governors currently available
@@ -129,10 +122,10 @@
*/
-extern struct cpufreq_available_governors
+struct cpufreq_available_governors
*cpufreq_get_available_governors(unsigned int cpu);
-extern void cpufreq_put_available_governors(
+void cpufreq_put_available_governors(
struct cpufreq_available_governors *first);
@@ -143,10 +136,10 @@
* cpufreq_put_available_frequencies after use.
*/
-extern struct cpufreq_available_frequencies
+struct cpufreq_available_frequencies
*cpufreq_get_available_frequencies(unsigned int cpu);
-extern void cpufreq_put_available_frequencies(
+void cpufreq_put_available_frequencies(
struct cpufreq_available_frequencies *first);
@@ -156,10 +149,10 @@
* to avoid memory leakage, please.
*/
-extern struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
int cpu);
-extern void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
/* determine related CPUs
@@ -168,10 +161,10 @@
* to avoid memory leakage, please.
*/
-extern struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
int cpu);
-extern void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
/* determine stats for cpufreq subsystem
@@ -179,12 +172,12 @@
* This is not available in all kernel versions or configurations.
*/
-extern struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
unsigned long long *total_time);
-extern void cpufreq_put_stats(struct cpufreq_stats *stats);
+void cpufreq_put_stats(struct cpufreq_stats *stats);
-extern unsigned long cpufreq_get_transitions(unsigned int cpu);
+unsigned long cpufreq_get_transitions(unsigned int cpu);
/* set new cpufreq policy
@@ -193,7 +186,7 @@
* but results may differ depending e.g. on governors being available.
*/
-extern int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
/* modify a policy by only changing min/max freq or governor
@@ -201,9 +194,9 @@
* Does not check whether result is what was intended.
*/
-extern int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
-extern int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
-extern int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
/* set a specific frequency
@@ -213,7 +206,7 @@
* occurs. Also does not work on ->range() cpufreq drivers.
*/
-extern int cpufreq_set_frequency(unsigned int cpu,
+int cpufreq_set_frequency(unsigned int cpu,
unsigned long target_frequency);
#ifdef __cplusplus
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
new file mode 100644
index 0000000..9bd4c76
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -0,0 +1,380 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ * (C) 2011 Thomas Renninger <trenn@novell.com> Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpuidle.h"
+#include "cpupower_intern.h"
+
+/*
+ * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir
+ * exists.
+ * For example the functionality to disable c-states was introduced in later
+ * kernel versions, this function can be used to explicitly check for this
+ * feature.
+ *
+ * returns 1 if the file exists, 0 otherwise.
+ */
+static
+unsigned int cpuidle_state_file_exists(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname)
+{
+ char path[SYSFS_PATH_MAX];
+ struct stat statbuf;
+
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+ if (stat(path, &statbuf) != 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpuX/cpuidle/stateX/" dir
+ * cstates starting with 0, C0 is not counted as cstate.
+ * This means if you want C1 info, pass 0 as idlestate param
+ */
+static
+unsigned int cpuidle_state_read_file(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname, char *buf,
+ size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numread;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+/*
+ * helper function to write a new value to a /sys file
+ * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir
+ *
+ * Returns the number of bytes written or 0 on error
+ */
+static
+unsigned int cpuidle_state_write_file(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname,
+ const char *value, size_t len)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numwrite;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwrite = write(fd, value, len);
+ if (numwrite < 1) {
+ close(fd);
+ return 0;
+ }
+
+ close(fd);
+
+ return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum idlestate_value {
+ IDLESTATE_USAGE,
+ IDLESTATE_POWER,
+ IDLESTATE_LATENCY,
+ IDLESTATE_TIME,
+ IDLESTATE_DISABLE,
+ MAX_IDLESTATE_VALUE_FILES
+};
+
+static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
+ [IDLESTATE_USAGE] = "usage",
+ [IDLESTATE_POWER] = "power",
+ [IDLESTATE_LATENCY] = "latency",
+ [IDLESTATE_TIME] = "time",
+ [IDLESTATE_DISABLE] = "disable",
+};
+
+static
+unsigned long long cpuidle_state_get_one_value(unsigned int cpu,
+ unsigned int idlestate,
+ enum idlestate_value which)
+{
+ unsigned long long value;
+ unsigned int len;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+
+ if (which >= MAX_IDLESTATE_VALUE_FILES)
+ return 0;
+
+ len = cpuidle_state_read_file(cpu, idlestate,
+ idlestate_value_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return 0;
+
+ value = strtoull(linebuf, &endp, 0);
+
+ if (endp == linebuf || errno == ERANGE)
+ return 0;
+
+ return value;
+}
+
+/* read access to files which contain one string */
+
+enum idlestate_string {
+ IDLESTATE_DESC,
+ IDLESTATE_NAME,
+ MAX_IDLESTATE_STRING_FILES
+};
+
+static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = {
+ [IDLESTATE_DESC] = "desc",
+ [IDLESTATE_NAME] = "name",
+};
+
+
+static char *cpuidle_state_get_one_string(unsigned int cpu,
+ unsigned int idlestate,
+ enum idlestate_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_IDLESTATE_STRING_FILES)
+ return NULL;
+
+ len = cpuidle_state_read_file(cpu, idlestate,
+ idlestate_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+/*
+ * Returns:
+ * 1 if disabled
+ * 0 if enabled
+ * -1 if idlestate is not available
+ * -2 if disabling is not supported by the kernel
+ */
+int cpuidle_is_state_disabled(unsigned int cpu,
+ unsigned int idlestate)
+{
+ if (cpuidle_state_count(cpu) <= idlestate)
+ return -1;
+
+ if (!cpuidle_state_file_exists(cpu, idlestate,
+ idlestate_value_files[IDLESTATE_DISABLE]))
+ return -2;
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_DISABLE);
+}
+
+/*
+ * Pass 1 as last argument to disable or 0 to enable the state
+ * Returns:
+ * 0 on success
+ * negative values on error, for example:
+ * -1 if idlestate is not available
+ * -2 if disabling is not supported by the kernel
+ * -3 No write access to disable/enable C-states
+ */
+int cpuidle_state_disable(unsigned int cpu,
+ unsigned int idlestate,
+ unsigned int disable)
+{
+ char value[SYSFS_PATH_MAX];
+ int bytes_written;
+
+ if (cpuidle_state_count(cpu) <= idlestate)
+ return -1;
+
+ if (!cpuidle_state_file_exists(cpu, idlestate,
+ idlestate_value_files[IDLESTATE_DISABLE]))
+ return -2;
+
+ snprintf(value, SYSFS_PATH_MAX, "%u", disable);
+
+ bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable",
+ value, sizeof(disable));
+ if (bytes_written)
+ return 0;
+ return -3;
+}
+
+unsigned long cpuidle_state_latency(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
+}
+
+unsigned long cpuidle_state_usage(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_USAGE);
+}
+
+unsigned long long cpuidle_state_time(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_TIME);
+}
+
+char *cpuidle_state_name(unsigned int cpu, unsigned int idlestate)
+{
+ return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_NAME);
+}
+
+char *cpuidle_state_desc(unsigned int cpu, unsigned int idlestate)
+{
+ return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_DESC);
+}
+
+/*
+ * Returns number of supported C-states of CPU core cpu
+ * Negativ in error case
+ * Zero if cpuidle does not export any C-states
+ */
+unsigned int cpuidle_state_count(unsigned int cpu)
+{
+ char file[SYSFS_PATH_MAX];
+ struct stat statbuf;
+ int idlestates = 1;
+
+
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle");
+ if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+ return 0;
+
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu);
+ if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+ return 0;
+
+ while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) {
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU
+ "cpu%u/cpuidle/state%d", cpu, idlestates);
+ idlestates++;
+ }
+ idlestates--;
+ return idlestates;
+}
+
+/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpu/cpuidle/" dir
+ */
+static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
+ size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
+
+ return sysfs_read_file(path, buf, buflen);
+}
+
+
+
+/* read access to files which contain one string */
+
+enum cpuidle_string {
+ CPUIDLE_GOVERNOR,
+ CPUIDLE_GOVERNOR_RO,
+ CPUIDLE_DRIVER,
+ MAX_CPUIDLE_STRING_FILES
+};
+
+static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = {
+ [CPUIDLE_GOVERNOR] = "current_governor",
+ [CPUIDLE_GOVERNOR_RO] = "current_governor_ro",
+ [CPUIDLE_DRIVER] = "current_driver",
+};
+
+
+static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_CPUIDLE_STRING_FILES)
+ return NULL;
+
+ len = sysfs_cpuidle_read_file(cpuidle_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+char *cpuidle_get_governor(void)
+{
+ char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO);
+ if (!tmp)
+ return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR);
+ else
+ return tmp;
+}
+
+char *cpuidle_get_driver(void)
+{
+ return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER);
+}
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
new file mode 100644
index 0000000..04eb3cf
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.h
@@ -0,0 +1,23 @@
+#ifndef __CPUPOWER_CPUIDLE_H__
+#define __CPUPOWER_CPUIDLE_H__
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+ unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+ unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+ unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+char *cpuidle_get_driver(void);
+
+#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
new file mode 100644
index 0000000..9c395ec9
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -0,0 +1,192 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "cpupower.h"
+#include "cpupower_intern.h"
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * negative errno values in error case
+ */
+int cpupower_is_cpu_online(unsigned int cpu)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numread;
+ unsigned long long value;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+ struct stat statbuf;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+ if (stat(path, &statbuf) != 0)
+ return 0;
+
+ /*
+ * kernel without CONFIG_HOTPLUG_CPU
+ * -> cpuX directory exists, but not cpuX/online file
+ */
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+ if (stat(path, &statbuf) != 0)
+ return 1;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -errno;
+
+ numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+ if (numread < 1) {
+ close(fd);
+ return -EIO;
+ }
+ linebuf[numread] = '\0';
+ close(fd);
+
+ value = strtoull(linebuf, &endp, 0);
+ if (value > 1)
+ return -EINVAL;
+
+ return value;
+}
+
+/* returns -1 on failure, 0 on success */
+static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+ cpu, fname);
+ if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
+ return -1;
+ *result = strtol(linebuf, &endp, 0);
+ if (endp == linebuf || errno == ERANGE)
+ return -1;
+ return 0;
+}
+
+static int __compare(const void *t1, const void *t2)
+{
+ struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+ struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+ if (top1->pkg < top2->pkg)
+ return -1;
+ else if (top1->pkg > top2->pkg)
+ return 1;
+ else if (top1->core < top2->core)
+ return -1;
+ else if (top1->core > top2->core)
+ return 1;
+ else if (top1->cpu < top2->cpu)
+ return -1;
+ else if (top1->cpu > top2->cpu)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Returns amount of cpus, negative on error, cpu_top must be
+ * passed to cpu_topology_release to free resources
+ *
+ * Array is sorted after ->pkg, ->core, then ->cpu
+ */
+int get_cpu_topology(struct cpupower_topology *cpu_top)
+{
+ int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
+ if (cpu_top->core_info == NULL)
+ return -ENOMEM;
+ cpu_top->pkgs = cpu_top->cores = 0;
+ for (cpu = 0; cpu < cpus; cpu++) {
+ cpu_top->core_info[cpu].cpu = cpu;
+ cpu_top->core_info[cpu].is_online = cpupower_is_cpu_online(cpu);
+ if(sysfs_topology_read_file(
+ cpu,
+ "physical_package_id",
+ &(cpu_top->core_info[cpu].pkg)) < 0) {
+ cpu_top->core_info[cpu].pkg = -1;
+ cpu_top->core_info[cpu].core = -1;
+ continue;
+ }
+ if(sysfs_topology_read_file(
+ cpu,
+ "core_id",
+ &(cpu_top->core_info[cpu].core)) < 0) {
+ cpu_top->core_info[cpu].pkg = -1;
+ cpu_top->core_info[cpu].core = -1;
+ continue;
+ }
+ }
+
+ qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+ __compare);
+
+ /* Count the number of distinct pkgs values. This works
+ because the primary sort of the core_info struct was just
+ done by pkg value. */
+ last_pkg = cpu_top->core_info[0].pkg;
+ for(cpu = 1; cpu < cpus; cpu++) {
+ if (cpu_top->core_info[cpu].pkg != last_pkg &&
+ cpu_top->core_info[cpu].pkg != -1) {
+
+ last_pkg = cpu_top->core_info[cpu].pkg;
+ cpu_top->pkgs++;
+ }
+ }
+ if (!(cpu_top->core_info[0].pkg == -1))
+ cpu_top->pkgs++;
+
+ /* Intel's cores count is not consecutively numbered, there may
+ * be a core_id of 3, but none of 2. Assume there always is 0
+ * Get amount of cores by counting duplicates in a package
+ for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
+ if (cpu_top->core_info[cpu].core == 0)
+ cpu_top->cores++;
+ */
+ return cpus;
+}
+
+void cpu_topology_release(struct cpupower_topology cpu_top)
+{
+ free(cpu_top.core_info);
+}
diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
new file mode 100644
index 0000000..fa031fc
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.h
@@ -0,0 +1,35 @@
+#ifndef __CPUPOWER_CPUPOWER_H__
+#define __CPUPOWER_CPUPOWER_H__
+
+struct cpupower_topology {
+ /* Amount of CPU cores, packages and threads per core in the system */
+ unsigned int cores;
+ unsigned int pkgs;
+ unsigned int threads; /* per core */
+
+ /* Array gets mallocated with cores entries, holding per core info */
+ struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+ int pkg;
+ int core;
+ int cpu;
+
+ /* flags */
+ unsigned int is_online:1;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+void cpu_topology_release(struct cpupower_topology cpu_top);
+int cpupower_is_cpu_online(unsigned int cpu);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
new file mode 100644
index 0000000..f8ec400
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -0,0 +1,5 @@
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
diff --git a/tools/power/cpupower/lib/sysfs.c b/tools/power/cpupower/lib/sysfs.c
deleted file mode 100644
index 870713a..0000000
--- a/tools/power/cpupower/lib/sysfs.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
- *
- * Licensed under the terms of the GNU GPL License version 2.
- */
-
-#include <stdio.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#include "cpufreq.h"
-
-#define PATH_TO_CPU "/sys/devices/system/cpu/"
-#define MAX_LINE_LEN 4096
-#define SYSFS_PATH_MAX 255
-
-
-static unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
-{
- int fd;
- ssize_t numread;
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return 0;
-
- numread = read(fd, buf, buflen - 1);
- if (numread < 1) {
- close(fd);
- return 0;
- }
-
- buf[numread] = '\0';
- close(fd);
-
- return (unsigned int) numread;
-}
-
-
-/* CPUFREQ sysfs access **************************************************/
-
-/* helper function to read file from /sys into given buffer */
-/* fname is a relative path under "cpuX/cpufreq" dir */
-static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
- char *buf, size_t buflen)
-{
- char path[SYSFS_PATH_MAX];
-
- snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
- cpu, fname);
- return sysfs_read_file(path, buf, buflen);
-}
-
-/* helper function to write a new value to a /sys file */
-/* fname is a relative path under "cpuX/cpufreq" dir */
-static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
- const char *fname,
- const char *value, size_t len)
-{
- char path[SYSFS_PATH_MAX];
- int fd;
- ssize_t numwrite;
-
- snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
- cpu, fname);
-
- fd = open(path, O_WRONLY);
- if (fd == -1)
- return 0;
-
- numwrite = write(fd, value, len);
- if (numwrite < 1) {
- close(fd);
- return 0;
- }
-
- close(fd);
-
- return (unsigned int) numwrite;
-}
-
-/* read access to files which contain one numeric value */
-
-enum cpufreq_value {
- CPUINFO_CUR_FREQ,
- CPUINFO_MIN_FREQ,
- CPUINFO_MAX_FREQ,
- CPUINFO_LATENCY,
- SCALING_CUR_FREQ,
- SCALING_MIN_FREQ,
- SCALING_MAX_FREQ,
- STATS_NUM_TRANSITIONS,
- MAX_CPUFREQ_VALUE_READ_FILES
-};
-
-static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
- [CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
- [CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
- [CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
- [CPUINFO_LATENCY] = "cpuinfo_transition_latency",
- [SCALING_CUR_FREQ] = "scaling_cur_freq",
- [SCALING_MIN_FREQ] = "scaling_min_freq",
- [SCALING_MAX_FREQ] = "scaling_max_freq",
- [STATS_NUM_TRANSITIONS] = "stats/total_trans"
-};
-
-
-static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
- enum cpufreq_value which)
-{
- unsigned long value;
- unsigned int len;
- char linebuf[MAX_LINE_LEN];
- char *endp;
-
- if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
- return 0;
-
- len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
- linebuf, sizeof(linebuf));
-
- if (len == 0)
- return 0;
-
- value = strtoul(linebuf, &endp, 0);
-
- if (endp == linebuf || errno == ERANGE)
- return 0;
-
- return value;
-}
-
-/* read access to files which contain one string */
-
-enum cpufreq_string {
- SCALING_DRIVER,
- SCALING_GOVERNOR,
- MAX_CPUFREQ_STRING_FILES
-};
-
-static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
- [SCALING_DRIVER] = "scaling_driver",
- [SCALING_GOVERNOR] = "scaling_governor",
-};
-
-
-static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
- enum cpufreq_string which)
-{
- char linebuf[MAX_LINE_LEN];
- char *result;
- unsigned int len;
-
- if (which >= MAX_CPUFREQ_STRING_FILES)
- return NULL;
-
- len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
- linebuf, sizeof(linebuf));
- if (len == 0)
- return NULL;
-
- result = strdup(linebuf);
- if (result == NULL)
- return NULL;
-
- if (result[strlen(result) - 1] == '\n')
- result[strlen(result) - 1] = '\0';
-
- return result;
-}
-
-/* write access */
-
-enum cpufreq_write {
- WRITE_SCALING_MIN_FREQ,
- WRITE_SCALING_MAX_FREQ,
- WRITE_SCALING_GOVERNOR,
- WRITE_SCALING_SET_SPEED,
- MAX_CPUFREQ_WRITE_FILES
-};
-
-static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
- [WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
- [WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
- [WRITE_SCALING_GOVERNOR] = "scaling_governor",
- [WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
-};
-
-static int sysfs_cpufreq_write_one_value(unsigned int cpu,
- enum cpufreq_write which,
- const char *new_value, size_t len)
-{
- if (which >= MAX_CPUFREQ_WRITE_FILES)
- return 0;
-
- if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
- new_value, len) != len)
- return -ENODEV;
-
- return 0;
-};
-
-unsigned long sysfs_get_freq_kernel(unsigned int cpu)
-{
- return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
-}
-
-unsigned long sysfs_get_freq_hardware(unsigned int cpu)
-{
- return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
-}
-
-unsigned long sysfs_get_freq_transition_latency(unsigned int cpu)
-{
- return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
-}
-
-int sysfs_get_freq_hardware_limits(unsigned int cpu,
- unsigned long *min,
- unsigned long *max)
-{
- if ((!min) || (!max))
- return -EINVAL;
-
- *min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
- if (!*min)
- return -ENODEV;
-
- *max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
- if (!*max)
- return -ENODEV;
-
- return 0;
-}
-
-char *sysfs_get_freq_driver(unsigned int cpu)
-{
- return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
-}
-
-struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu)
-{
- struct cpufreq_policy *policy;
-
- policy = malloc(sizeof(struct cpufreq_policy));
- if (!policy)
- return NULL;
-
- policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
- if (!policy->governor) {
- free(policy);
- return NULL;
- }
- policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
- policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
- if ((!policy->min) || (!policy->max)) {
- free(policy->governor);
- free(policy);
- return NULL;
- }
-
- return policy;
-}
-
-struct cpufreq_available_governors *
-sysfs_get_freq_available_governors(unsigned int cpu) {
- struct cpufreq_available_governors *first = NULL;
- struct cpufreq_available_governors *current = NULL;
- char linebuf[MAX_LINE_LEN];
- unsigned int pos, i;
- unsigned int len;
-
- len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
- linebuf, sizeof(linebuf));
- if (len == 0)
- return NULL;
-
- pos = 0;
- for (i = 0; i < len; i++) {
- if (linebuf[i] == ' ' || linebuf[i] == '\n') {
- if (i - pos < 2)
- continue;
- if (current) {
- current->next = malloc(sizeof(*current));
- if (!current->next)
- goto error_out;
- current = current->next;
- } else {
- first = malloc(sizeof(*first));
- if (!first)
- goto error_out;
- current = first;
- }
- current->first = first;
- current->next = NULL;
-
- current->governor = malloc(i - pos + 1);
- if (!current->governor)
- goto error_out;
-
- memcpy(current->governor, linebuf + pos, i - pos);
- current->governor[i - pos] = '\0';
- pos = i + 1;
- }
- }
-
- return first;
-
- error_out:
- while (first) {
- current = first->next;
- if (first->governor)
- free(first->governor);
- free(first);
- first = current;
- }
- return NULL;
-}
-
-
-struct cpufreq_available_frequencies *
-sysfs_get_available_frequencies(unsigned int cpu) {
- struct cpufreq_available_frequencies *first = NULL;
- struct cpufreq_available_frequencies *current = NULL;
- char one_value[SYSFS_PATH_MAX];
- char linebuf[MAX_LINE_LEN];
- unsigned int pos, i;
- unsigned int len;
-
- len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
- linebuf, sizeof(linebuf));
- if (len == 0)
- return NULL;
-
- pos = 0;
- for (i = 0; i < len; i++) {
- if (linebuf[i] == ' ' || linebuf[i] == '\n') {
- if (i - pos < 2)
- continue;
- if (i - pos >= SYSFS_PATH_MAX)
- goto error_out;
- if (current) {
- current->next = malloc(sizeof(*current));
- if (!current->next)
- goto error_out;
- current = current->next;
- } else {
- first = malloc(sizeof(*first));
- if (!first)
- goto error_out;
- current = first;
- }
- current->first = first;
- current->next = NULL;
-
- memcpy(one_value, linebuf + pos, i - pos);
- one_value[i - pos] = '\0';
- if (sscanf(one_value, "%lu", ¤t->frequency) != 1)
- goto error_out;
-
- pos = i + 1;
- }
- }
-
- return first;
-
- error_out:
- while (first) {
- current = first->next;
- free(first);
- first = current;
- }
- return NULL;
-}
-
-static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
- const char *file)
-{
- struct cpufreq_affected_cpus *first = NULL;
- struct cpufreq_affected_cpus *current = NULL;
- char one_value[SYSFS_PATH_MAX];
- char linebuf[MAX_LINE_LEN];
- unsigned int pos, i;
- unsigned int len;
-
- len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
- if (len == 0)
- return NULL;
-
- pos = 0;
- for (i = 0; i < len; i++) {
- if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
- if (i - pos < 1)
- continue;
- if (i - pos >= SYSFS_PATH_MAX)
- goto error_out;
- if (current) {
- current->next = malloc(sizeof(*current));
- if (!current->next)
- goto error_out;
- current = current->next;
- } else {
- first = malloc(sizeof(*first));
- if (!first)
- goto error_out;
- current = first;
- }
- current->first = first;
- current->next = NULL;
-
- memcpy(one_value, linebuf + pos, i - pos);
- one_value[i - pos] = '\0';
-
- if (sscanf(one_value, "%u", ¤t->cpu) != 1)
- goto error_out;
-
- pos = i + 1;
- }
- }
-
- return first;
-
- error_out:
- while (first) {
- current = first->next;
- free(first);
- first = current;
- }
- return NULL;
-}
-
-struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(unsigned int cpu)
-{
- return sysfs_get_cpu_list(cpu, "affected_cpus");
-}
-
-struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(unsigned int cpu)
-{
- return sysfs_get_cpu_list(cpu, "related_cpus");
-}
-
-struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu,
- unsigned long long *total_time) {
- struct cpufreq_stats *first = NULL;
- struct cpufreq_stats *current = NULL;
- char one_value[SYSFS_PATH_MAX];
- char linebuf[MAX_LINE_LEN];
- unsigned int pos, i;
- unsigned int len;
-
- len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
- linebuf, sizeof(linebuf));
- if (len == 0)
- return NULL;
-
- *total_time = 0;
- pos = 0;
- for (i = 0; i < len; i++) {
- if (i == strlen(linebuf) || linebuf[i] == '\n') {
- if (i - pos < 2)
- continue;
- if ((i - pos) >= SYSFS_PATH_MAX)
- goto error_out;
- if (current) {
- current->next = malloc(sizeof(*current));
- if (!current->next)
- goto error_out;
- current = current->next;
- } else {
- first = malloc(sizeof(*first));
- if (!first)
- goto error_out;
- current = first;
- }
- current->first = first;
- current->next = NULL;
-
- memcpy(one_value, linebuf + pos, i - pos);
- one_value[i - pos] = '\0';
- if (sscanf(one_value, "%lu %llu",
- ¤t->frequency,
- ¤t->time_in_state) != 2)
- goto error_out;
-
- *total_time = *total_time + current->time_in_state;
- pos = i + 1;
- }
- }
-
- return first;
-
- error_out:
- while (first) {
- current = first->next;
- free(first);
- first = current;
- }
- return NULL;
-}
-
-unsigned long sysfs_get_freq_transitions(unsigned int cpu)
-{
- return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
-}
-
-static int verify_gov(char *new_gov, char *passed_gov)
-{
- unsigned int i, j = 0;
-
- if (!passed_gov || (strlen(passed_gov) > 19))
- return -EINVAL;
-
- strncpy(new_gov, passed_gov, 20);
- for (i = 0; i < 20; i++) {
- if (j) {
- new_gov[i] = '\0';
- continue;
- }
- if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
- continue;
-
- if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
- continue;
-
- if (new_gov[i] == '-')
- continue;
-
- if (new_gov[i] == '_')
- continue;
-
- if (new_gov[i] == '\0') {
- j = 1;
- continue;
- }
- return -EINVAL;
- }
- new_gov[19] = '\0';
- return 0;
-}
-
-int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor)
-{
- char new_gov[SYSFS_PATH_MAX];
-
- if (!governor)
- return -EINVAL;
-
- if (verify_gov(new_gov, governor))
- return -EINVAL;
-
- return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
- new_gov, strlen(new_gov));
-};
-
-int sysfs_modify_freq_policy_max(unsigned int cpu, unsigned long max_freq)
-{
- char value[SYSFS_PATH_MAX];
-
- snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
-
- return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
- value, strlen(value));
-};
-
-
-int sysfs_modify_freq_policy_min(unsigned int cpu, unsigned long min_freq)
-{
- char value[SYSFS_PATH_MAX];
-
- snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
-
- return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
- value, strlen(value));
-};
-
-
-int sysfs_set_freq_policy(unsigned int cpu, struct cpufreq_policy *policy)
-{
- char min[SYSFS_PATH_MAX];
- char max[SYSFS_PATH_MAX];
- char gov[SYSFS_PATH_MAX];
- int ret;
- unsigned long old_min;
- int write_max_first;
-
- if (!policy || !(policy->governor))
- return -EINVAL;
-
- if (policy->max < policy->min)
- return -EINVAL;
-
- if (verify_gov(gov, policy->governor))
- return -EINVAL;
-
- snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
- snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
-
- old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
- write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
-
- if (write_max_first) {
- ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
- max, strlen(max));
- if (ret)
- return ret;
- }
-
- ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
- strlen(min));
- if (ret)
- return ret;
-
- if (!write_max_first) {
- ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
- max, strlen(max));
- if (ret)
- return ret;
- }
-
- return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
- gov, strlen(gov));
-}
-
-int sysfs_set_frequency(unsigned int cpu, unsigned long target_frequency)
-{
- struct cpufreq_policy *pol = sysfs_get_freq_policy(cpu);
- char userspace_gov[] = "userspace";
- char freq[SYSFS_PATH_MAX];
- int ret;
-
- if (!pol)
- return -ENODEV;
-
- if (strncmp(pol->governor, userspace_gov, 9) != 0) {
- ret = sysfs_modify_freq_policy_governor(cpu, userspace_gov);
- if (ret) {
- cpufreq_put_policy(pol);
- return ret;
- }
- }
-
- cpufreq_put_policy(pol);
-
- snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
-
- return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
- freq, strlen(freq));
-}
-
-/* CPUFREQ sysfs access **************************************************/
-
-/* General sysfs access **************************************************/
-int sysfs_cpu_exists(unsigned int cpu)
-{
- char file[SYSFS_PATH_MAX];
- struct stat statbuf;
-
- snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/", cpu);
-
- if (stat(file, &statbuf) != 0)
- return -ENOSYS;
-
- return S_ISDIR(statbuf.st_mode) ? 0 : -ENOSYS;
-}
-
-/* General sysfs access **************************************************/
diff --git a/tools/power/cpupower/lib/sysfs.h b/tools/power/cpupower/lib/sysfs.h
deleted file mode 100644
index c76a5e0..0000000
--- a/tools/power/cpupower/lib/sysfs.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* General */
-extern unsigned int sysfs_cpu_exists(unsigned int cpu);
-
-/* CPUfreq */
-extern unsigned long sysfs_get_freq_kernel(unsigned int cpu);
-extern unsigned long sysfs_get_freq_hardware(unsigned int cpu);
-extern unsigned long sysfs_get_freq_transition_latency(unsigned int cpu);
-extern int sysfs_get_freq_hardware_limits(unsigned int cpu,
- unsigned long *min, unsigned long *max);
-extern char *sysfs_get_freq_driver(unsigned int cpu);
-extern struct cpufreq_policy *sysfs_get_freq_policy(unsigned int cpu);
-extern struct cpufreq_available_governors *sysfs_get_freq_available_governors(
- unsigned int cpu);
-extern struct cpufreq_available_frequencies *sysfs_get_available_frequencies(
- unsigned int cpu);
-extern struct cpufreq_affected_cpus *sysfs_get_freq_affected_cpus(
- unsigned int cpu);
-extern struct cpufreq_affected_cpus *sysfs_get_freq_related_cpus(
- unsigned int cpu);
-extern struct cpufreq_stats *sysfs_get_freq_stats(unsigned int cpu,
- unsigned long long *total_time);
-extern unsigned long sysfs_get_freq_transitions(unsigned int cpu);
-extern int sysfs_set_freq_policy(unsigned int cpu,
- struct cpufreq_policy *policy);
-extern int sysfs_modify_freq_policy_min(unsigned int cpu,
- unsigned long min_freq);
-extern int sysfs_modify_freq_policy_max(unsigned int cpu,
- unsigned long max_freq);
-extern int sysfs_modify_freq_policy_governor(unsigned int cpu, char *governor);
-extern int sysfs_set_frequency(unsigned int cpu,
- unsigned long target_frequency);
diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
index 9c85a38..6aa8d23 100644
--- a/tools/power/cpupower/man/cpupower-frequency-info.1
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -1,7 +1,7 @@
.TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual"
.SH "NAME"
.LP
-cpupower frequency\-info \- Utility to retrieve cpufreq kernel information
+cpupower\-frequency\-info \- Utility to retrieve cpufreq kernel information
.SH "SYNTAX"
.LP
cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1
index 3eacc8d..b505702 100644
--- a/tools/power/cpupower/man/cpupower-frequency-set.1
+++ b/tools/power/cpupower/man/cpupower-frequency-set.1
@@ -1,7 +1,7 @@
.TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual"
.SH "NAME"
.LP
-cpupower frequency\-set \- A small tool which allows to modify cpufreq settings.
+cpupower\-frequency\-set \- A small tool which allows to modify cpufreq settings.
.SH "SYNTAX"
.LP
cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
index 7b3646a..80a1311 100644
--- a/tools/power/cpupower/man/cpupower-idle-info.1
+++ b/tools/power/cpupower/man/cpupower-idle-info.1
@@ -1,7 +1,7 @@
.TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual"
.SH "NAME"
.LP
-cpupower idle\-info \- Utility to retrieve cpu idle kernel information
+cpupower\-idle\-info \- Utility to retrieve cpu idle kernel information
.SH "SYNTAX"
.LP
cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
index 580c4e3..21916cf 100644
--- a/tools/power/cpupower/man/cpupower-idle-set.1
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -1,7 +1,7 @@
.TH "CPUPOWER-IDLE-SET" "1" "0.1" "" "cpupower Manual"
.SH "NAME"
.LP
-cpupower idle\-set \- Utility to set cpu idle state specific kernel options
+cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
.SH "SYNTAX"
.LP
cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 0fbd1a2..b4bf769 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -16,8 +16,8 @@
#include <getopt.h>
#include "cpufreq.h"
+#include "cpuidle.h"
#include "helpers/helpers.h"
-#include "helpers/sysfs.h"
#define NORM_FREQ_LEN 32
@@ -296,7 +296,7 @@
struct cpufreq_affected_cpus *cpus;
if (!bitmask_isbitset(cpus_chosen, cpu) ||
- cpufreq_cpu_exists(cpu))
+ cpupower_is_cpu_online(cpu))
continue;
cpus = cpufreq_get_related_cpus(cpu);
@@ -316,10 +316,10 @@
cpu <= bitmask_last(cpus_chosen); cpu++) {
if (!bitmask_isbitset(cpus_chosen, cpu) ||
- cpufreq_cpu_exists(cpu))
+ cpupower_is_cpu_online(cpu))
continue;
- if (sysfs_is_cpu_online(cpu) != 1)
+ if (cpupower_is_cpu_online(cpu) != 1)
continue;
printf(_("Setting cpu: %d\n"), cpu);
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 8bf8ab5..b59c85d 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -13,8 +13,10 @@
#include <string.h>
#include <getopt.h>
-#include "helpers/helpers.h"
+#include <cpuidle.h>
+
#include "helpers/sysfs.h"
+#include "helpers/helpers.h"
#include "helpers/bitmask.h"
#define LINE_LEN 10
@@ -24,7 +26,7 @@
unsigned int idlestates, idlestate;
char *tmp;
- idlestates = sysfs_get_idlestate_count(cpu);
+ idlestates = cpuidle_state_count(cpu);
if (idlestates == 0) {
printf(_("CPU %u: No idle states\n"), cpu);
return;
@@ -33,7 +35,7 @@
printf(_("Number of idle states: %d\n"), idlestates);
printf(_("Available idle states:"));
for (idlestate = 0; idlestate < idlestates; idlestate++) {
- tmp = sysfs_get_idlestate_name(cpu, idlestate);
+ tmp = cpuidle_state_name(cpu, idlestate);
if (!tmp)
continue;
printf(" %s", tmp);
@@ -45,28 +47,28 @@
return;
for (idlestate = 0; idlestate < idlestates; idlestate++) {
- int disabled = sysfs_is_idlestate_disabled(cpu, idlestate);
+ int disabled = cpuidle_is_state_disabled(cpu, idlestate);
/* Disabled interface not supported on older kernels */
if (disabled < 0)
disabled = 0;
- tmp = sysfs_get_idlestate_name(cpu, idlestate);
+ tmp = cpuidle_state_name(cpu, idlestate);
if (!tmp)
continue;
printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : "");
free(tmp);
- tmp = sysfs_get_idlestate_desc(cpu, idlestate);
+ tmp = cpuidle_state_desc(cpu, idlestate);
if (!tmp)
continue;
printf(_("Flags/Description: %s\n"), tmp);
free(tmp);
printf(_("Latency: %lu\n"),
- sysfs_get_idlestate_latency(cpu, idlestate));
+ cpuidle_state_latency(cpu, idlestate));
printf(_("Usage: %lu\n"),
- sysfs_get_idlestate_usage(cpu, idlestate));
+ cpuidle_state_usage(cpu, idlestate));
printf(_("Duration: %llu\n"),
- sysfs_get_idlestate_time(cpu, idlestate));
+ cpuidle_state_time(cpu, idlestate));
}
}
@@ -74,7 +76,7 @@
{
char *tmp;
- tmp = sysfs_get_cpuidle_driver();
+ tmp = cpuidle_get_driver();
if (!tmp) {
printf(_("Could not determine cpuidle driver\n"));
return;
@@ -83,7 +85,7 @@
printf(_("CPUidle driver: %s\n"), tmp);
free(tmp);
- tmp = sysfs_get_cpuidle_governor();
+ tmp = cpuidle_get_governor();
if (!tmp) {
printf(_("Could not determine cpuidle governor\n"));
return;
@@ -98,7 +100,7 @@
long max_allowed_cstate = 2000000000;
unsigned int cstate, cstates;
- cstates = sysfs_get_idlestate_count(cpu);
+ cstates = cpuidle_state_count(cpu);
if (cstates == 0) {
printf(_("CPU %u: No C-states info\n"), cpu);
return;
@@ -113,11 +115,11 @@
"type[C%d] "), cstate, cstate);
printf(_("promotion[--] demotion[--] "));
printf(_("latency[%03lu] "),
- sysfs_get_idlestate_latency(cpu, cstate));
+ cpuidle_state_latency(cpu, cstate));
printf(_("usage[%08lu] "),
- sysfs_get_idlestate_usage(cpu, cstate));
+ cpuidle_state_usage(cpu, cstate));
printf(_("duration[%020Lu] \n"),
- sysfs_get_idlestate_time(cpu, cstate));
+ cpuidle_state_time(cpu, cstate));
}
}
diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
index d6b6ae4..691c24d 100644
--- a/tools/power/cpupower/utils/cpuidle-set.c
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -5,12 +5,12 @@
#include <limits.h>
#include <string.h>
#include <ctype.h>
-
#include <getopt.h>
-#include "cpufreq.h"
+#include <cpufreq.h>
+#include <cpuidle.h>
+
#include "helpers/helpers.h"
-#include "helpers/sysfs.h"
static struct option info_opts[] = {
{"disable", required_argument, NULL, 'd'},
@@ -104,16 +104,16 @@
if (!bitmask_isbitset(cpus_chosen, cpu))
continue;
- if (sysfs_is_cpu_online(cpu) != 1)
+ if (cpupower_is_cpu_online(cpu) != 1)
continue;
- idlestates = sysfs_get_idlestate_count(cpu);
+ idlestates = cpuidle_state_count(cpu);
if (idlestates <= 0)
continue;
switch (param) {
case 'd':
- ret = sysfs_idlestate_disable(cpu, idlestate, 1);
+ ret = cpuidle_state_disable(cpu, idlestate, 1);
if (ret == 0)
printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
else if (ret == -1)
@@ -126,7 +126,7 @@
idlestate, cpu);
break;
case 'e':
- ret = sysfs_idlestate_disable(cpu, idlestate, 0);
+ ret = cpuidle_state_disable(cpu, idlestate, 0);
if (ret == 0)
printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
else if (ret == -1)
@@ -140,13 +140,13 @@
break;
case 'D':
for (idlestate = 0; idlestate < idlestates; idlestate++) {
- disabled = sysfs_is_idlestate_disabled
+ disabled = cpuidle_is_state_disabled
(cpu, idlestate);
- state_latency = sysfs_get_idlestate_latency
+ state_latency = cpuidle_state_latency
(cpu, idlestate);
if (disabled == 1) {
if (latency > state_latency){
- ret = sysfs_idlestate_disable
+ ret = cpuidle_state_disable
(cpu, idlestate, 0);
if (ret == 0)
printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
@@ -154,7 +154,7 @@
continue;
}
if (latency <= state_latency){
- ret = sysfs_idlestate_disable
+ ret = cpuidle_state_disable
(cpu, idlestate, 1);
if (ret == 0)
printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
@@ -163,10 +163,10 @@
break;
case 'E':
for (idlestate = 0; idlestate < idlestates; idlestate++) {
- disabled = sysfs_is_idlestate_disabled
+ disabled = cpuidle_is_state_disabled
(cpu, idlestate);
if (disabled == 1) {
- ret = sysfs_idlestate_disable
+ ret = cpuidle_state_disable
(cpu, idlestate, 0);
if (ret == 0)
printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index aa9e954..afb66f8 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -14,6 +14,7 @@
#include <locale.h>
#include "helpers/bitmask.h"
+#include <cpupower.h>
/* Internationalization ****************************/
#ifdef NLS
@@ -92,31 +93,6 @@
extern struct cpupower_cpu_info cpupower_cpu_info;
/* cpuid and cpuinfo helpers **************************/
-struct cpuid_core_info {
- int pkg;
- int core;
- int cpu;
-
- /* flags */
- unsigned int is_online:1;
-};
-
-/* CPU topology/hierarchy parsing ******************/
-struct cpupower_topology {
- /* Amount of CPU cores, packages and threads per core in the system */
- unsigned int cores;
- unsigned int pkgs;
- unsigned int threads; /* per core */
-
- /* Array gets mallocated with cores entries, holding per core info */
- struct cpuid_core_info *core_info;
-};
-
-extern int get_cpu_topology(struct cpupower_topology *cpu_top);
-extern void cpu_topology_release(struct cpupower_topology cpu_top);
-
-/* CPU topology/hierarchy parsing ******************/
-
/* X86 ONLY ****************************************/
#if defined(__i386__) || defined(__x86_64__)
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
index 5f9c908..a1a6c60 100644
--- a/tools/power/cpupower/utils/helpers/topology.c
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -16,110 +16,7 @@
#include <errno.h>
#include <fcntl.h>
-#include <helpers/helpers.h>
-#include <helpers/sysfs.h>
+#include <cpuidle.h>
-/* returns -1 on failure, 0 on success */
-static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
-{
- char linebuf[MAX_LINE_LEN];
- char *endp;
- char path[SYSFS_PATH_MAX];
+/* CPU topology/hierarchy parsing ******************/
- snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
- cpu, fname);
- if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
- return -1;
- *result = strtol(linebuf, &endp, 0);
- if (endp == linebuf || errno == ERANGE)
- return -1;
- return 0;
-}
-
-static int __compare(const void *t1, const void *t2)
-{
- struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
- struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
- if (top1->pkg < top2->pkg)
- return -1;
- else if (top1->pkg > top2->pkg)
- return 1;
- else if (top1->core < top2->core)
- return -1;
- else if (top1->core > top2->core)
- return 1;
- else if (top1->cpu < top2->cpu)
- return -1;
- else if (top1->cpu > top2->cpu)
- return 1;
- else
- return 0;
-}
-
-/*
- * Returns amount of cpus, negative on error, cpu_top must be
- * passed to cpu_topology_release to free resources
- *
- * Array is sorted after ->pkg, ->core, then ->cpu
- */
-int get_cpu_topology(struct cpupower_topology *cpu_top)
-{
- int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
-
- cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
- if (cpu_top->core_info == NULL)
- return -ENOMEM;
- cpu_top->pkgs = cpu_top->cores = 0;
- for (cpu = 0; cpu < cpus; cpu++) {
- cpu_top->core_info[cpu].cpu = cpu;
- cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu);
- if(sysfs_topology_read_file(
- cpu,
- "physical_package_id",
- &(cpu_top->core_info[cpu].pkg)) < 0) {
- cpu_top->core_info[cpu].pkg = -1;
- cpu_top->core_info[cpu].core = -1;
- continue;
- }
- if(sysfs_topology_read_file(
- cpu,
- "core_id",
- &(cpu_top->core_info[cpu].core)) < 0) {
- cpu_top->core_info[cpu].pkg = -1;
- cpu_top->core_info[cpu].core = -1;
- continue;
- }
- }
-
- qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
- __compare);
-
- /* Count the number of distinct pkgs values. This works
- because the primary sort of the core_info struct was just
- done by pkg value. */
- last_pkg = cpu_top->core_info[0].pkg;
- for(cpu = 1; cpu < cpus; cpu++) {
- if (cpu_top->core_info[cpu].pkg != last_pkg &&
- cpu_top->core_info[cpu].pkg != -1) {
-
- last_pkg = cpu_top->core_info[cpu].pkg;
- cpu_top->pkgs++;
- }
- }
- if (!(cpu_top->core_info[0].pkg == -1))
- cpu_top->pkgs++;
-
- /* Intel's cores count is not consecutively numbered, there may
- * be a core_id of 3, but none of 2. Assume there always is 0
- * Get amount of cores by counting duplicates in a package
- for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
- if (cpu_top->core_info[cpu].core == 0)
- cpu_top->cores++;
- */
- return cpus;
-}
-
-void cpu_topology_release(struct cpupower_topology cpu_top)
-{
- free(cpu_top.core_info);
-}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index bcd22a1..1b5da00 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -10,8 +10,8 @@
#include <stdint.h>
#include <string.h>
#include <limits.h>
+#include <cpuidle.h>
-#include "helpers/sysfs.h"
#include "helpers/helpers.h"
#include "idle_monitor/cpupower-monitor.h"
@@ -51,7 +51,7 @@
for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
state++) {
previous_count[cpu][state] =
- sysfs_get_idlestate_time(cpu, state);
+ cpuidle_state_time(cpu, state);
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
@@ -70,7 +70,7 @@
for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
state++) {
current_count[cpu][state] =
- sysfs_get_idlestate_time(cpu, state);
+ cpuidle_state_time(cpu, state);
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
@@ -132,13 +132,13 @@
char *tmp;
/* Assume idle state count is the same for all CPUs */
- cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0);
+ cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(0);
if (cpuidle_sysfs_monitor.hw_states_num <= 0)
return NULL;
for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
- tmp = sysfs_get_idlestate_name(0, num);
+ tmp = cpuidle_state_name(0, num);
if (tmp == NULL)
continue;
@@ -146,7 +146,7 @@
strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
free(tmp);
- tmp = sysfs_get_idlestate_desc(0, num);
+ tmp = cpuidle_state_desc(0, num);
if (tmp == NULL)
continue;
strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b04afc3..ff9e5f2 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -19,6 +19,7 @@
TARGETS += pstore
TARGETS += ptrace
TARGETS += seccomp
+TARGETS += sigaltstack
TARGETS += size
TARGETS += static_keys
TARGETS += sysctl
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
new file mode 100755
index 0000000..3633828
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Alternate sleeping and spinning on randomly selected CPUs. The purpose
+# of this script is to inflict random OS jitter on a concurrently running
+# test.
+#
+# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+#
+# me: Random-number-generator seed salt.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+me=$(($1 * 1000))
+duration=$2
+sleepmax=${3-1000000}
+spinmax=${4-1000}
+
+n=1
+
+starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+
+while :
+do
+ # Check for done.
+ t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+ if test "$t" -gt "$duration"
+ then
+ exit 0;
+ fi
+
+ # Set affinity to randomly selected CPU
+ cpus=`ls /sys/devices/system/cpu/*/online |
+ sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
+ grep -v '^0*$'`
+ cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
+ srand(n + me + systime());
+ ncpus = split(cpus, ca);
+ curcpu = ca[int(rand() * ncpus + 1)];
+ mask = lshift(1, curcpu);
+ if (mask + 0 <= 0)
+ mask = 1;
+ printf("%#x\n", mask);
+ }' < /dev/null`
+ n=$(($n+1))
+ if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ then
+ echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ exit 1
+ fi
+
+ # Sleep a random duration
+ sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
+ srand(n + me + systime());
+ printf("%06d", int(rand() * sleepmax));
+ }' < /dev/null`
+ n=$(($n+1))
+ sleep .$sleeptime
+
+ # Spin a random duration
+ limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
+ srand(n + me + systime());
+ printf("%06d", int(rand() * spinmax));
+ }' < /dev/null`
+ n=$(($n+1))
+ for i in {1..$limit}
+ do
+ echo > /dev/null
+ done
+done
+
+exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
new file mode 100755
index 0000000..f79b0e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements,
+# looking for ftrace data. Exits with 0 if data was found, analyzed, and
+# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
+# argument checking.
+#
+# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+. tools/testing/selftests/rcutorture/bin/functions.sh
+
+if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
+then
+ exit 10
+fi
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+grep 'us : rcu_exp_grace_period' |
+sed -e 's/us : / : /' |
+tr -d '\015' |
+awk '
+$8 == "start" {
+ if (starttask != "")
+ nlost++;
+ starttask = $1;
+ starttime = $3;
+ startseq = $7;
+}
+
+$8 == "end" {
+ if (starttask == $1 && startseq == $7) {
+ curgpdur = $3 - starttime;
+ gptimes[++n] = curgpdur;
+ gptaskcnt[starttask]++;
+ sum += curgpdur;
+ if (curgpdur > 1000)
+ print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
+ starttask = "";
+ } else {
+ # Lost a message or some such, reset.
+ starttask = "";
+ nlost++;
+ }
+}
+
+$8 == "done" {
+ piggybackcnt[$1]++;
+}
+
+END {
+ newNR = asort(gptimes);
+ if (newNR <= 0) {
+ print "No ftrace records found???"
+ exit 10;
+ }
+ pct50 = int(newNR * 50 / 100);
+ if (pct50 < 1)
+ pct50 = 1;
+ pct90 = int(newNR * 90 / 100);
+ if (pct90 < 1)
+ pct90 = 1;
+ pct99 = int(newNR * 99 / 100);
+ if (pct99 < 1)
+ pct99 = 1;
+ div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+ print "Histogram bucket size: " div;
+ last = gptimes[1] - 10;
+ count = 0;
+ for (i = 1; i <= newNR; i++) {
+ current = div * int(gptimes[i] / div);
+ if (last == current) {
+ count++;
+ } else {
+ if (count > 0)
+ print last, count;
+ count = 1;
+ last = current;
+ }
+ }
+ if (count > 0)
+ print last, count;
+ print "Distribution of grace periods across tasks:";
+ for (i in gptaskcnt) {
+ print "\t" i, gptaskcnt[i];
+ nbatches += gptaskcnt[i];
+ }
+ ngps = nbatches;
+ print "Distribution of piggybacking across tasks:";
+ for (i in piggybackcnt) {
+ print "\t" i, piggybackcnt[i];
+ ngps += piggybackcnt[i];
+ }
+ print "Average grace-period duration: " sum / newNR " microseconds";
+ print "Minimum grace-period duration: " gptimes[1];
+ print "50th percentile grace-period duration: " gptimes[pct50];
+ print "90th percentile grace-period duration: " gptimes[pct90];
+ print "99th percentile grace-period duration: " gptimes[pct99];
+ print "Maximum grace-period duration: " gptimes[newNR];
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
+ print "Computed from ftrace data.";
+}'
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
new file mode 100755
index 0000000..8f3121a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements.
+#
+# Usage: kvm-recheck-rcuperf.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d $i
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. tools/testing/selftests/rcutorture/bin/functions.sh
+
+if kvm-recheck-rcuperf-ftrace.sh $i
+then
+ # ftrace data was successfully analyzed, call it good!
+ exit 0
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+awk '
+/-perf: .* gps: .* batches:/ {
+ ngps = $9;
+ nbatches = $11;
+}
+
+/-perf: .*writer-duration/ {
+ gptimes[++n] = $5 / 1000.;
+ sum += $5 / 1000.;
+}
+
+END {
+ newNR = asort(gptimes);
+ if (newNR <= 0) {
+ print "No rcuperf records found???"
+ exit;
+ }
+ pct50 = int(newNR * 50 / 100);
+ if (pct50 < 1)
+ pct50 = 1;
+ pct90 = int(newNR * 90 / 100);
+ if (pct90 < 1)
+ pct90 = 1;
+ pct99 = int(newNR * 99 / 100);
+ if (pct99 < 1)
+ pct99 = 1;
+ div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+ print "Histogram bucket size: " div;
+ last = gptimes[1] - 10;
+ count = 0;
+ for (i = 1; i <= newNR; i++) {
+ current = div * int(gptimes[i] / div);
+ if (last == current) {
+ count++;
+ } else {
+ if (count > 0)
+ print last, count;
+ count = 1;
+ last = current;
+ }
+ }
+ if (count > 0)
+ print last, count;
+ print "Average grace-period duration: " sum / newNR " microseconds";
+ print "Minimum grace-period duration: " gptimes[1];
+ print "50th percentile grace-period duration: " gptimes[pct50];
+ print "90th percentile grace-period duration: " gptimes[pct90];
+ print "99th percentile grace-period duration: " gptimes[pct99];
+ print "Maximum grace-period duration: " gptimes[newNR];
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+ print "Computed from rcuperf printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index d86bdd6..f659346 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,7 +48,10 @@
cat $i/Make.oldconfig.err
fi
parse-build.sh $i/Make.out $configfile
- parse-torture.sh $i/console.log $configfile
+ if test "$TORTURE_SUITE" != rcuperf
+ then
+ parse-torture.sh $i/console.log $configfile
+ fi
parse-console.sh $i/console.log $configfile
if test -r $i/Warnings
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0f80eef..4109f30 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -6,7 +6,7 @@
# Execute this in the source tree. Do not run it as a background task
# because qemu does not seem to like that much.
#
-# Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args
+# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
#
# qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with
# arguments specifying the number of CPUs and other
@@ -91,25 +91,33 @@
# CONFIG_PCMCIA=n
# CONFIG_CARDBUS=n
# CONFIG_YENTA=n
-if kvm-build.sh $config_template $builddir $T
+base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
then
+ # Rerunning previous test, so use that test's kernel.
+ QEMU="`identify_qemu $base_resdir/vmlinux`"
+ KERNEL=$base_resdir/bzImage
+ ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
+ ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
+elif kvm-build.sh $config_template $builddir $T
+then
+ # Had to build a kernel for this test.
QEMU="`identify_qemu $builddir/vmlinux`"
BOOT_IMAGE="`identify_boot_image $QEMU`"
cp $builddir/Make*.out $resdir
+ cp $builddir/vmlinux $resdir
cp $builddir/.config $resdir
if test -n "$BOOT_IMAGE"
then
cp $builddir/$BOOT_IMAGE $resdir
+ KERNEL=$resdir/bzImage
else
echo No identifiable boot image, not running KVM, see $resdir.
echo Do the torture scripts know about your architecture?
fi
parse-build.sh $resdir/Make.out $title
- if test -f $builddir.wait
- then
- mv $builddir.wait $builddir.ready
- fi
else
+ # Build failed.
cp $builddir/Make*.out $resdir
cp $builddir/.config $resdir || :
echo Build failed, not running KVM, see $resdir.
@@ -119,12 +127,15 @@
fi
exit 1
fi
+if test -f $builddir.wait
+then
+ mv $builddir.wait $builddir.ready
+fi
while test -f $builddir.ready
do
sleep 1
done
-minutes=$4
-seconds=$(($minutes * 60))
+seconds=$4
qemu_args=$5
boot_args=$6
@@ -167,15 +178,26 @@
exit 0
fi
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) &
-qemu_pid=$!
+echo $QEMU $qemu_args -m 512 -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m 512 -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
-echo Monitoring qemu job at pid $qemu_pid
+sleep 10 # Give qemu's pid a chance to reach the file
+if test -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ echo Monitoring qemu job at pid $qemu_pid
+else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid
+fi
while :
do
+ if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ fi
kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
- if kill -0 $qemu_pid > /dev/null 2>&1
+ if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
then
if test $kruntime -ge $seconds
then
@@ -195,12 +217,16 @@
ps -fp $killpid >> $resdir/Warnings 2>&1
fi
else
- echo ' ---' `date`: Kernel done
+ echo ' ---' `date`: "Kernel done"
fi
break
fi
done
-if test $commandcompleted -eq 0
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
then
echo Grace period for qemu job at pid $qemu_pid
while :
@@ -220,6 +246,9 @@
fi
sleep 1
done
+elif test -z "$qemu_pid"
+then
+ echo Unknown PID, cannot kill qemu command
fi
parse-torture.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 4a43176..0d59814 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -34,7 +34,7 @@
trap 'rm -rf $T' 0
mkdir $T
-dur=30
+dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
PATH=${KVM}/bin:$PATH; export PATH
@@ -48,6 +48,7 @@
configs=""
cpus=0
ds=`date +%Y.%m.%d-%H:%M:%S`
+jitter=0
. functions.sh
@@ -63,6 +64,7 @@
echo " --dryrun sched|script"
echo " --duration minutes"
echo " --interactive"
+ echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
echo " --no-initrd"
@@ -116,12 +118,17 @@
;;
--duration)
checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
- dur=$2
+ dur=$(($2*60))
shift
;;
--interactive)
TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
;;
+ --jitter)
+ checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
+ jitter="$2"
+ shift
+ ;;
--kmake-arg)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
TORTURE_KMAKE_ARG="$2"
@@ -156,7 +163,7 @@
shift
;;
--torture)
- checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--'
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
TORTURE_SUITE=$2
shift
;;
@@ -299,6 +306,7 @@
-v CONFIGDIR="$CONFIGFRAG/" \
-v KVM="$KVM" \
-v ncpus=$cpus \
+ -v jitter="$jitter" \
-v rd=$resdir/$ds/ \
-v dur=$dur \
-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
@@ -359,6 +367,16 @@
print "\techo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log";
print "fi"
}
+ njitter = 0;
+ split(jitter, ja);
+ if (ja[1] == -1 && ncpus == 0)
+ njitter = 1;
+ else if (ja[1] == -1)
+ njitter = ncpus;
+ else
+ njitter = ja[1];
+ for (j = 0; j < njitter; j++)
+ print "jitter.sh " j " " dur " " ja[2] " " ja[3] "&"
print "wait"
print "if test -z \"$TORTURE_BUILDONLY\""
print "then"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 39a2c6d..17cbe09 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -14,7 +14,7 @@
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=4
-CONFIG_RCU_FANOUT_LEAF=4
+CONFIG_RCU_FANOUT_LEAF=3
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index 0fc8a34..e34c334 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutorture.torture_type=rcu_bh
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
new file mode 100644
index 0000000..c9f56cf
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
@@ -0,0 +1 @@
+TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
new file mode 100644
index 0000000..a09816b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_PERF_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
new file mode 100644
index 0000000..a312f67
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
new file mode 100644
index 0000000..985fb17
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=54
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
new file mode 100644
index 0000000..34f2a1b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# rcuperf_param_nreaders bootparam-string
+#
+# Adds nreaders rcuperf module parameter if not already specified.
+rcuperf_param_nreaders () {
+ if ! echo "$1" | grep -q "rcuperf.nreaders"
+ then
+ echo rcuperf.nreaders=-1
+ fi
+}
+
+# rcuperf_param_nwriters bootparam-string
+#
+# Adds nwriters rcuperf module parameter if not already specified.
+rcuperf_param_nwriters () {
+ if ! echo "$1" | grep -q "rcuperf.nwriters"
+ then
+ echo rcuperf.nwriters=-1
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `rcuperf_param_nreaders "$1"` \
+ `rcuperf_param_nwriters "$1"` \
+ rcuperf.perf_runnable=1 \
+ rcuperf.shutdown=1 \
+ rcuperf.verbose=1
+}
diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
new file mode 100644
index 0000000..56af56e
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -Wall
+BINARIES = sas
+all: $(BINARIES)
+
+include ../lib.mk
+
+clean:
+ rm -rf $(BINARIES)
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
new file mode 100644
index 0000000..1bb0125
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -0,0 +1,176 @@
+/*
+ * Stas Sergeev <stsp@users.sourceforge.net>
+ *
+ * test sigaltstack(SS_ONSTACK | SS_AUTODISARM)
+ * If that succeeds, then swapcontext() can be used inside sighandler safely.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <alloca.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#ifndef SS_AUTODISARM
+#define SS_AUTODISARM (1U << 31)
+#endif
+
+static void *sstack, *ustack;
+static ucontext_t uc, sc;
+static const char *msg = "[OK]\tStack preserved";
+static const char *msg2 = "[FAIL]\tStack corrupted";
+struct stk_data {
+ char msg[128];
+ int flag;
+};
+
+void my_usr1(int sig, siginfo_t *si, void *u)
+{
+ char *aa;
+ int err;
+ stack_t stk;
+ struct stk_data *p;
+
+ register unsigned long sp asm("sp");
+
+ if (sp < (unsigned long)sstack ||
+ sp >= (unsigned long)sstack + SIGSTKSZ) {
+ printf("[FAIL]\tSP is not on sigaltstack\n");
+ exit(EXIT_FAILURE);
+ }
+ /* put some data on stack. other sighandler will try to overwrite it */
+ aa = alloca(1024);
+ assert(aa);
+ p = (struct stk_data *)(aa + 512);
+ strcpy(p->msg, msg);
+ p->flag = 1;
+ printf("[RUN]\tsignal USR1\n");
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ perror("[FAIL]\tsigaltstack()");
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags != SS_DISABLE)
+ printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n",
+ stk.ss_flags);
+ else
+ printf("[OK]\tsigaltstack is disabled in sighandler\n");
+ swapcontext(&sc, &uc);
+ printf("%s\n", p->msg);
+ if (!p->flag) {
+ printf("[RUN]\tAborting\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+void my_usr2(int sig, siginfo_t *si, void *u)
+{
+ char *aa;
+ struct stk_data *p;
+
+ printf("[RUN]\tsignal USR2\n");
+ aa = alloca(1024);
+ /* dont run valgrind on this */
+ /* try to find the data stored by previous sighandler */
+ p = memmem(aa, 1024, msg, strlen(msg));
+ if (p) {
+ printf("[FAIL]\tsigaltstack re-used\n");
+ /* corrupt the data */
+ strcpy(p->msg, msg2);
+ /* tell other sighandler that his data is corrupted */
+ p->flag = 0;
+ }
+}
+
+static void switch_fn(void)
+{
+ printf("[RUN]\tswitched to user ctx\n");
+ raise(SIGUSR2);
+ setcontext(&sc);
+}
+
+int main(void)
+{
+ struct sigaction act;
+ stack_t stk;
+ int err;
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+ act.sa_sigaction = my_usr1;
+ sigaction(SIGUSR1, &act, NULL);
+ act.sa_sigaction = my_usr2;
+ sigaction(SIGUSR2, &act, NULL);
+ sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (sstack == MAP_FAILED) {
+ perror("mmap()");
+ return EXIT_FAILURE;
+ }
+
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ perror("[FAIL]\tsigaltstack()");
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags == SS_DISABLE) {
+ printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
+ } else {
+ printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags);
+ return EXIT_FAILURE;
+ }
+
+ stk.ss_sp = sstack;
+ stk.ss_size = SIGSTKSZ;
+ stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
+ err = sigaltstack(&stk, NULL);
+ if (err) {
+ if (errno == EINVAL) {
+ printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+ /*
+ * If test cases for the !SS_AUTODISARM variant were
+ * added, we could still run them. We don't have any
+ * test cases like that yet, so just exit and report
+ * success.
+ */
+ return 0;
+ } else {
+ perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)");
+ return EXIT_FAILURE;
+ }
+ }
+
+ ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (ustack == MAP_FAILED) {
+ perror("mmap()");
+ return EXIT_FAILURE;
+ }
+ getcontext(&uc);
+ uc.uc_link = NULL;
+ uc.uc_stack.ss_sp = ustack;
+ uc.uc_stack.ss_size = SIGSTKSZ;
+ makecontext(&uc, switch_fn, 0);
+ raise(SIGUSR1);
+
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ perror("[FAIL]\tsigaltstack()");
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags != SS_AUTODISARM) {
+ printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n",
+ stk.ss_flags);
+ exit(EXIT_FAILURE);
+ }
+ printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n");
+
+ printf("[OK]\tTest passed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b47ebd1..c73425de 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -9,6 +9,7 @@
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase
TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644
index 0000000..5b2b4b3
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -0,0 +1,398 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (!want_segv) {
+ clearhandler(SIGSEGV);
+ return; /* Crash cleanly. */
+ }
+
+ want_segv = false;
+ segv_addr = (unsigned long)si->si_addr;
+
+ ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+ unsigned long offset;
+ /*
+ * Unless we have FSGSBASE, there's no direct way to do this from
+ * user mode. We can get at it indirectly using signals, though.
+ */
+
+ want_segv = true;
+
+ offset = 0;
+ if (which == FS) {
+ /* Use a constant-length instruction here. */
+ asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ /*
+ * If that didn't segfault, try the other end of the address space.
+ * Unless we get really unlucky and run into the vsyscall page, this
+ * is guaranteed to segfault.
+ */
+
+ offset = (ULONG_MAX >> 1) + 1;
+ if (which == FS) {
+ asm volatile ("mov %%fs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+ unsigned long base;
+ unsigned short sel;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+ err(1, "ARCH_SET_GS");
+
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ base = read_base(GS);
+ if (base == value) {
+ printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == value) {
+ printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+ unsigned long base, arch_base;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (schedule)
+ usleep(10);
+
+ asm volatile ("mov %0, %%gs" : : "rm" (0));
+ base = read_base(GS);
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == arch_base) {
+ printf("[OK]\tGSBASE is 0x%lx\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+ }
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+ unsigned long to_set = remote_base;
+ bool hard_zero = false;
+ if (to_set == HARD_ZERO) {
+ to_set = 0;
+ hard_zero = true;
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ unsigned short sel;
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+ to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+ /*
+ * The goal here is to try to arrange for GS == 0, GSBASE !=
+ * 0, and for the the kernel the think that GSBASE == 0.
+ *
+ * To make the test as reliable as possible, this uses
+ * explicit descriptorss. (This is not the only way. This
+ * could use ARCH_SET_GS with a low, nonzero base, but the
+ * relevant side effect of ARCH_SET_GS could change.)
+ */
+
+ /* Step 1: tell the kernel that we have GSBASE == 0. */
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+
+ /* Step 2: change GSBASE without telling the kernel. */
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0xBAADF00D,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tother thread: using LDT slot 0\n");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "flags");
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+ return;
+ }
+ printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+ }
+
+ /*
+ * Step 3: set the selector back to zero. On AMD chips, this will
+ * preserve GSBASE.
+ */
+
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+ while (1) {
+ while (ftx == 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ if (ftx == 3)
+ return NULL;
+
+ if (ftx == 1)
+ do_remote_base();
+ else if (ftx == 2)
+ do_unexpected_base();
+ else
+ errx(1, "helper thread got bad command");
+
+ ftx = 0;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ }
+}
+
+static void set_gs_and_switch_to(unsigned long local, unsigned long remote)
+{
+ unsigned long base;
+
+ bool hard_zero = false;
+ if (local == HARD_ZERO) {
+ hard_zero = true;
+ local = 0;
+ }
+
+ printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+ local, hard_zero ? " and clear gs" : "", remote);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+ err(1, "ARCH_SET_GS");
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ if (read_base(GS) != local) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE wasn't set as expected\n");
+ }
+
+ remote_base = remote;
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ base = read_base(GS);
+ if (base == local) {
+ printf("[OK]\tGSBASE remained 0x%lx\n", local);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+ }
+}
+
+static void test_unexpected_base(void)
+{
+ unsigned long base;
+
+ printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ ftx = 2;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ base = read_base(GS);
+ if (base == 0) {
+ printf("[OK]\tGSBASE remained 0\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+ }
+}
+
+int main()
+{
+ pthread_t thread;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ check_gs_value(0);
+ check_gs_value(1);
+ check_gs_value(0x200000000);
+ check_gs_value(0);
+ check_gs_value(0x200000000);
+ check_gs_value(1);
+
+ for (int sched = 0; sched < 2; sched++) {
+ mov_0_gs(0, !!sched);
+ mov_0_gs(1, !!sched);
+ mov_0_gs(0x200000000, !!sched);
+ }
+
+ /* Set up for multithreading. */
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0"); /* should never fail */
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ static unsigned long bases_with_hard_zero[] = {
+ 0, HARD_ZERO, 1, 0x200000000,
+ };
+
+ for (int local = 0; local < 4; local++) {
+ for (int remote = 0; remote < 4; remote++) {
+ set_gs_and_switch_to(bases_with_hard_zero[local],
+ bases_with_hard_zero[remote]);
+ }
+ }
+
+ test_unexpected_base();
+
+ ftx = 3; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 31a3035..4af4707 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -21,6 +21,9 @@
#include <pthread.h>
#include <sched.h>
#include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
#define AR_ACCESSED (1<<8)
@@ -44,6 +47,12 @@
static int nerrs;
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear; /* Use to delete GDT entry */
+static int gdt_entry_num;
+
static void check_invalid_segment(uint16_t index, int ldt)
{
uint32_t has_limit = 0, has_ar = 0, limit, ar;
@@ -561,16 +570,257 @@
}
}
+static void setup_counter_page(void)
+{
+ unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (page == MAP_FAILED)
+ err(1, "mmap");
+
+ for (int i = 0; i < 1024; i++)
+ page[i] = i;
+ counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+ int ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (low_user_desc) :
+ "a" (243), "b" (low_user_desc)
+ : "flags");
+ return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+ low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (low_user_desc == MAP_FAILED)
+ err(1, "mmap");
+
+ low_user_desc->entry_number = -1;
+ low_user_desc->base_addr = (unsigned long)&counter_page[1];
+ low_user_desc->limit = 0xfffff;
+ low_user_desc->seg_32bit = 1;
+ low_user_desc->contents = 0; /* Data, grow-up*/
+ low_user_desc->read_exec_only = 0;
+ low_user_desc->limit_in_pages = 1;
+ low_user_desc->seg_not_present = 0;
+ low_user_desc->useable = 0;
+
+ if (invoke_set_thread_area() == 0) {
+ gdt_entry_num = low_user_desc->entry_number;
+ printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+ } else {
+ printf("[NOTE]\tset_thread_area is unavailable\n");
+ }
+
+ low_user_desc_clear = low_user_desc + 1;
+ low_user_desc_clear->entry_number = gdt_entry_num;
+ low_user_desc_clear->read_exec_only = 1;
+ low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+ if (!gdt_entry_num)
+ return; /* 64-bit only system -- we can't use set_thread_area */
+
+ unsigned short prev_sel;
+ unsigned short sel;
+ unsigned int eax;
+ const char *result;
+#ifdef __x86_64__
+ unsigned long saved_base;
+ unsigned long new_base;
+#endif
+
+ /* Test DS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%ds, %[prev_sel]\n\t"
+ "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate ds */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%ds, %[sel]\n\t"
+ "movw %[prev_sel], %%ds"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+ result, sel);
+
+ /* Test ES */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%es, %[prev_sel]\n\t"
+ "movw %[sel], %%es\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate es */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%es, %[sel]\n\t"
+ "movw %[prev_sel], %%es"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+ result, sel);
+
+ /* Test FS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+ asm volatile ("movw %%fs, %[prev_sel]\n\t"
+ "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate fs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%fs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+ /* Restore FS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew FSBASE was zero\n");
+ }
+#endif
+
+ /* Test GS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+ asm volatile ("movw %%gs, %[prev_sel]\n\t"
+ "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate gs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%gs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : "flags");
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+ /* Restore GS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew GSBASE was zero\n");
+ }
+#endif
+}
+
int main(int argc, char **argv)
{
if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
return finish_exec_test();
+ setup_counter_page();
+ setup_low_user_desc();
+
do_simple_tests();
do_multicpu_tests();
do_exec_test();
+ test_gdt_invalidation();
+
return nerrs ? 1 : 0;
}