arm_compute v18.01 Change-Id: I9bfa178c2e38bfd5fc812e62aab6760d87748e05

commit: f45d5a9be1bf4d315a227b80617582b8eb4214d2 [log] [tgz]
author: Anthony Barbier <anthony.barbier@arm.com> Wed Jan 24 16:23:15 2018 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> Wed Jan 24 16:23:15 2018 +0000
tree: 29f24fc5f51448e831080d76eef3ac75d43c1934
parent: 6943bb00e79fe2ea4c127dc04b3440c5b0b29ce0 [diff] [blame]
diff --git a/documentation/architecture.xhtml b/documentation/architecture.xhtml
index da16fa3..75b6d03 100644
--- a/documentation/architecture.xhtml
+++ b/documentation/architecture.xhtml

@@ -40,7 +40,7 @@
  <tr style="height: 56px;">
   <td style="padding-left: 0.5em;">
    <div id="projectname">Compute Library
-   &#160;<span id="projectnumber">17.12</span>
+   &#160;<span id="projectnumber">18.01</span>
    </div>
   </td>
  </tr>
@@ -199,10 +199,10 @@
 <h2><a class="anchor" id="S4_4_2_events_sync"></a>
 OpenCL events and synchronization</h2>
 <p>In order to block until all the jobs in the <a class="el" href="classarm__compute_1_1_c_l_scheduler.xhtml" title="Provides global access to a CL context and command queue. ">CLScheduler</a>'s command queue are done executing the user can call <a class="el" href="classarm__compute_1_1_c_l_scheduler.xhtml#ad55f80ed3cd8b6c4f247763b747016af">CLScheduler::sync()</a> or create a sync event using <a class="el" href="classarm__compute_1_1_c_l_scheduler.xhtml#a6096d26e8e29e1a7f01b0f083fb7e33f">CLScheduler::enqueue_sync_event()</a></p>
-<p>For example: </p><div class="fragment"><div class="line">    PPMLoader     ppm;</div><div class="line">    <a class="code" href="namespacearm__compute.xhtml#aae712f442eae2d56d17529ca6cb9cb77">CLImage</a>       src, tmp_scale_median, tmp_median_gauss, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>;</div><div class="line">    constexpr <span class="keywordtype">int</span> scale_factor = 2;</div><div class="line"></div><div class="line">    <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a46ecf9ef0fe80ba2ed35acfc29856b7d">default_init</a>();</div><div class="line"></div><div class="line">    <span class="keywordflow">if</span>(argc &lt; 2)</div><div class="line">    {</div><div class="line">        <span class="comment">// Print help</span></div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;Usage: ./build/cl_events [input_image.ppm]\n\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;No input_image provided, creating a dummy 640x480 image\n&quot;</span>;</div><div class="line">        <span class="comment">// Create an empty grayscale 640x480 image</span></div><div class="line">        src.allocator()-&gt;init(TensorInfo(640, 480, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line">    }</div><div class="line">    <span class="keywordflow">else</span></div><div class="line">    {</div><div class="line">        ppm.open(argv[1]);</div><div class="line">        ppm.init_image(src, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line">    }</div><div class="line"></div><div class="line">    <span class="comment">// Declare and configure the functions to create the following pipeline: scale -&gt; median -&gt; gauss</span></div><div class="line">    CLScale       <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#acec6d8ad52a28972fa74e071c1a63b6a">scale</a>;</div><div class="line">    CLMedian3x3   median;</div><div class="line">    CLGaussian5x5 gauss;</div><div class="line"></div><div class="line">    TensorInfo dst_info(src.info()-&gt;dimension(0) / scale_factor, src.info()-&gt;dimension(1) / scale_factor, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line"></div><div class="line">    <span class="comment">// Configure the temporary and destination images</span></div><div class="line">    dst.allocator()-&gt;init(dst_info);</div><div class="line">    tmp_scale_median.allocator()-&gt;init(dst_info);</div><div class="line">    tmp_median_gauss.allocator()-&gt;init(dst_info);</div><div class="line"></div><div class="line">    <span class="comment">//Configure the functions:</span></div><div class="line">    scale.configure(&amp;src, &amp;tmp_scale_median, <a class="code" href="namespacearm__compute.xhtml#a966a9c417ce5e94dca08d9b5e745c0c9a7f5ccbc3d30c2cd3fd04d567946cbde2">InterpolationPolicy::NEAREST_NEIGHBOR</a>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">    median.configure(&amp;tmp_scale_median, &amp;tmp_median_gauss, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">    gauss.configure(&amp;tmp_median_gauss, &amp;dst, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line"></div><div class="line">    <span class="comment">// Allocate all the images</span></div><div class="line">    src.allocator()-&gt;allocate();</div><div class="line">    dst.allocator()-&gt;allocate();</div><div class="line">    tmp_scale_median.allocator()-&gt;allocate();</div><div class="line">    tmp_median_gauss.allocator()-&gt;allocate();</div><div class="line">    <span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">    <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">    {</div><div class="line">        ppm.fill_image(src);</div><div class="line">    }</div><div class="line"></div><div class="line">    <span class="comment">// Enqueue and flush the scale OpenCL kernel:</span></div><div class="line">    scale.run();</div><div class="line">    <span class="comment">// Create a synchronisation event between scale and median:</span></div><div class="line">    cl::Event scale_event = <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a6096d26e8e29e1a7f01b0f083fb7e33f">enqueue_sync_event</a>();</div><div class="line">    <span class="comment">// Enqueue and flush the median OpenCL kernel:</span></div><div class="line">    median.run();</div><div class="line">    <span class="comment">// Enqueue and flush the Gaussian OpenCL kernel:</span></div><div class="line">    gauss.run();</div><div class="line"></div><div class="line">    <span class="comment">//Make sure all the OpenCL jobs are done executing:</span></div><div class="line">    scale_event.wait();        <span class="comment">// Block until Scale is done executing (Median3x3 and Gaussian5x5 might still be running)</span></div><div class="line">    <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#ad55f80ed3cd8b6c4f247763b747016af">sync</a>(); <span class="comment">// Block until Gaussian5x5 is done executing</span></div><div class="line"></div><div class="line">    <span class="comment">// Save the result to file:</span></div><div class="line">    <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">    {</div><div class="line">        <span class="keyword">const</span> std::string output_filename = std::string(argv[1]) + <span class="stringliteral">&quot;_out.ppm&quot;</span>;</div><div class="line">        <a class="code" href="namespacearm__compute_1_1utils.xhtml#a301d0b7bfd70f73fc1924f4281938d08">save_to_ppm</a>(dst, output_filename); <span class="comment">// save_to_ppm maps and unmaps the image to store as PPM</span></div><div class="line">    }</div></div><!-- fragment --> <h2><a class="anchor" id="S4_4_2_cl_neon"></a>
+<p>For example: </p><div class="fragment"><div class="line">        PPMLoader     ppm;</div><div class="line">        constexpr <span class="keywordtype">int</span> scale_factor = 2;</div><div class="line"></div><div class="line">        <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a46ecf9ef0fe80ba2ed35acfc29856b7d">default_init</a>();</div><div class="line"></div><div class="line">        <span class="keywordflow">if</span>(argc &lt; 2)</div><div class="line">        {</div><div class="line">            <span class="comment">// Print help</span></div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;Usage: ./build/cl_events [input_image.ppm]\n\n&quot;</span>;</div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;No input_image provided, creating a dummy 640x480 image\n&quot;</span>;</div><div class="line">            <span class="comment">// Create an empty grayscale 640x480 image</span></div><div class="line">            <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.allocator()-&gt;init(TensorInfo(640, 480, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line">        }</div><div class="line">        <span class="keywordflow">else</span></div><div class="line">        {</div><div class="line">            ppm.open(argv[1]);</div><div class="line">            ppm.init_image(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line">        }</div><div class="line"></div><div class="line">        TensorInfo dst_info(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.info()-&gt;dimension(0) / scale_factor, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.info()-&gt;dimension(1) / scale_factor, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line"></div><div class="line">        <span class="comment">// Configure the temporary and destination images</span></div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>.<a class="code" href="classarm__compute_1_1_c_l_tensor.xhtml#ad6dc6b773780dd6b1ad17fc82368d9f3">allocator</a>()-&gt;<a class="code" href="classarm__compute_1_1_i_tensor_allocator.xhtml#aa72161e0e3c0f6b2da20f835de6af680">init</a>(dst_info);</div><div class="line">        tmp_scale_median.allocator()-&gt;init(dst_info);</div><div class="line">        tmp_median_gauss.allocator()-&gt;init(dst_info);</div><div class="line"></div><div class="line">        <span class="comment">//Configure the functions:</span></div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#acec6d8ad52a28972fa74e071c1a63b6a">scale</a>.configure(&amp;<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, &amp;tmp_scale_median, <a class="code" href="namespacearm__compute.xhtml#a966a9c417ce5e94dca08d9b5e745c0c9a7f5ccbc3d30c2cd3fd04d567946cbde2">InterpolationPolicy::NEAREST_NEIGHBOR</a>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">        median.configure(&amp;tmp_scale_median, &amp;tmp_median_gauss, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">        gauss.configure(&amp;tmp_median_gauss, &amp;<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line"></div><div class="line">        <span class="comment">// Allocate all the images</span></div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.allocator()-&gt;allocate();</div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>.<a class="code" href="classarm__compute_1_1_c_l_tensor.xhtml#ad6dc6b773780dd6b1ad17fc82368d9f3">allocator</a>()-&gt;<a class="code" href="classarm__compute_1_1_i_tensor_allocator.xhtml#aa8a4946cd749d482dd996874d295af85">allocate</a>();</div><div class="line">        tmp_scale_median.allocator()-&gt;allocate();</div><div class="line">        tmp_median_gauss.allocator()-&gt;allocate();</div><div class="line"></div><div class="line">        <span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">        <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">        {</div><div class="line">            ppm.fill_image(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>);</div><div class="line">            output_filename = std::string(argv[1]) + <span class="stringliteral">&quot;_out.ppm&quot;</span>;</div><div class="line">        }</div></div><!-- fragment --> <h2><a class="anchor" id="S4_4_2_cl_neon"></a>
 OpenCL / NEON interoperability</h2>
 <p>You can mix OpenCL and NEON kernels and functions. However it is the user's responsibility to handle the mapping/unmapping of OpenCL objects, for example:</p>
-<div class="fragment"><div class="line">    PPMLoader ppm;</div><div class="line">    <a class="code" href="namespacearm__compute.xhtml#aae712f442eae2d56d17529ca6cb9cb77">CLImage</a>   src, scale_median, median_gauss, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>;</div><div class="line"></div><div class="line">    <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a46ecf9ef0fe80ba2ed35acfc29856b7d">default_init</a>();</div><div class="line"></div><div class="line">    <span class="keywordflow">if</span>(argc &lt; 2)</div><div class="line">    {</div><div class="line">        <span class="comment">// Print help</span></div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;Usage: ./build/cl_convolution [input_image.ppm]\n\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;No input_image provided, creating a dummy 640x480 image\n&quot;</span>;</div><div class="line">        <span class="comment">// Create an empty grayscale 640x480 image</span></div><div class="line">        src.allocator()-&gt;init(TensorInfo(640, 480, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line">    }</div><div class="line">    <span class="keywordflow">else</span></div><div class="line">    {</div><div class="line">        ppm.open(argv[1]);</div><div class="line">        ppm.init_image(src, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line">    }</div><div class="line"></div><div class="line">    TensorInfo scale_median_info(TensorInfo(src.info()-&gt;dimension(0) / 2, src.info()-&gt;dimension(1) / 2, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line"></div><div class="line">    <span class="comment">// Configure the temporary and destination images</span></div><div class="line">    scale_median.allocator()-&gt;init(scale_median_info);</div><div class="line">    median_gauss.allocator()-&gt;init(scale_median_info);</div><div class="line">    dst.allocator()-&gt;init(scale_median_info);</div><div class="line"></div><div class="line">    <span class="comment">// Declare and configure the functions to create the following pipeline: scale -&gt; median -&gt; gauss</span></div><div class="line">    CLScale       <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#acec6d8ad52a28972fa74e071c1a63b6a">scale</a>;</div><div class="line">    NEMedian3x3   median;</div><div class="line">    CLGaussian5x5 gauss;</div><div class="line"></div><div class="line">    scale.configure(&amp;src, &amp;scale_median, <a class="code" href="namespacearm__compute.xhtml#a966a9c417ce5e94dca08d9b5e745c0c9a7f5ccbc3d30c2cd3fd04d567946cbde2">InterpolationPolicy::NEAREST_NEIGHBOR</a>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">    median.configure(&amp;scale_median, &amp;median_gauss, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">    gauss.configure(&amp;median_gauss, &amp;dst, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line"></div><div class="line">    <span class="comment">// Allocate all the images</span></div><div class="line">    src.allocator()-&gt;allocate();</div><div class="line">    scale_median.allocator()-&gt;allocate();</div><div class="line">    median_gauss.allocator()-&gt;allocate();</div><div class="line">    dst.allocator()-&gt;allocate();</div><div class="line"></div><div class="line">    <span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">    <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">    {</div><div class="line">        ppm.fill_image(src);</div><div class="line">    }</div><div class="line"></div><div class="line">    <span class="comment">// Enqueue and flush the OpenCL kernel:</span></div><div class="line">    scale.run();</div><div class="line"></div><div class="line">    <span class="comment">// Do a blocking map of the input and output buffers of the NEON function:</span></div><div class="line">    scale_median.map();</div><div class="line">    median_gauss.map();</div><div class="line"></div><div class="line">    <span class="comment">// Run the NEON function:</span></div><div class="line">    median.run();</div><div class="line"></div><div class="line">    <span class="comment">// Unmap the output buffer before it&#39;s used again by OpenCL:</span></div><div class="line">    scale_median.unmap();</div><div class="line">    median_gauss.unmap();</div><div class="line"></div><div class="line">    <span class="comment">// Run the final OpenCL function:</span></div><div class="line">    gauss.run();</div><div class="line"></div><div class="line">    <span class="comment">// Make sure all the OpenCL jobs are done executing:</span></div><div class="line">    <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#ad55f80ed3cd8b6c4f247763b747016af">sync</a>();</div><div class="line"></div><div class="line">    <span class="comment">// Save the result to file:</span></div><div class="line">    <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">    {</div><div class="line">        <span class="keyword">const</span> std::string output_filename = std::string(argv[1]) + <span class="stringliteral">&quot;_out.ppm&quot;</span>;</div><div class="line">        <a class="code" href="namespacearm__compute_1_1utils.xhtml#a301d0b7bfd70f73fc1924f4281938d08">save_to_ppm</a>(dst, output_filename); <span class="comment">// save_to_ppm maps and unmaps the image to store as PPM</span></div><div class="line">    }</div></div><!-- fragment --> <dl class="section see"><dt>See also</dt><dd><a class="el" href="neoncl__scale__median__gaussian_8cpp.xhtml#a4003cb8b626a6604e2f51b8e17f8bb3d" title="Example demonstrating how to use both CL and NEON functions in the same pipeline. ...">main_neoncl_scale_median_gaussian</a></dd></dl>
+<div class="fragment"><div class="line">        PPMLoader ppm;</div><div class="line"></div><div class="line">        <a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a60f9a6836b628a7171914c4afe43b4a7">CLScheduler::get</a>().<a class="code" href="classarm__compute_1_1_c_l_scheduler.xhtml#a46ecf9ef0fe80ba2ed35acfc29856b7d">default_init</a>();</div><div class="line"></div><div class="line">        <span class="keywordflow">if</span>(argc &lt; 2)</div><div class="line">        {</div><div class="line">            <span class="comment">// Print help</span></div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;Usage: ./build/cl_convolution [input_image.ppm]\n\n&quot;</span>;</div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;No input_image provided, creating a dummy 640x480 image\n&quot;</span>;</div><div class="line">            <span class="comment">// Create an empty grayscale 640x480 image</span></div><div class="line">            <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.allocator()-&gt;init(TensorInfo(640, 480, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line">        }</div><div class="line">        <span class="keywordflow">else</span></div><div class="line">        {</div><div class="line">            ppm.open(argv[1]);</div><div class="line">            ppm.init_image(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line">        }</div><div class="line"></div><div class="line">        TensorInfo scale_median_info(TensorInfo(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.info()-&gt;dimension(0) / 2, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.info()-&gt;dimension(1) / 2, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line"></div><div class="line">        <span class="comment">// Configure the temporary and destination images</span></div><div class="line">        scale_median.allocator()-&gt;init(scale_median_info);</div><div class="line">        median_gauss.allocator()-&gt;init(scale_median_info);</div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>.<a class="code" href="classarm__compute_1_1_c_l_tensor.xhtml#ad6dc6b773780dd6b1ad17fc82368d9f3">allocator</a>()-&gt;<a class="code" href="classarm__compute_1_1_i_tensor_allocator.xhtml#aa72161e0e3c0f6b2da20f835de6af680">init</a>(scale_median_info);</div><div class="line"></div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#acec6d8ad52a28972fa74e071c1a63b6a">scale</a>.configure(&amp;<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, &amp;scale_median, <a class="code" href="namespacearm__compute.xhtml#a966a9c417ce5e94dca08d9b5e745c0c9a7f5ccbc3d30c2cd3fd04d567946cbde2">InterpolationPolicy::NEAREST_NEIGHBOR</a>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">        median.configure(&amp;scale_median, &amp;median_gauss, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line">        gauss.configure(&amp;median_gauss, &amp;<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a4ef59320fbe90fe47d40f1f71e4c5daa">BorderMode::REPLICATE</a>);</div><div class="line"></div><div class="line">        <span class="comment">// Allocate all the images</span></div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.allocator()-&gt;allocate();</div><div class="line">        scale_median.allocator()-&gt;allocate();</div><div class="line">        median_gauss.allocator()-&gt;allocate();</div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>.<a class="code" href="classarm__compute_1_1_c_l_tensor.xhtml#ad6dc6b773780dd6b1ad17fc82368d9f3">allocator</a>()-&gt;<a class="code" href="classarm__compute_1_1_i_tensor_allocator.xhtml#aa8a4946cd749d482dd996874d295af85">allocate</a>();</div><div class="line"></div><div class="line">        <span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">        <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">        {</div><div class="line">            ppm.fill_image(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>);</div><div class="line">            <span class="keyword">const</span> std::string output_filename = std::string(argv[1]) + <span class="stringliteral">&quot;_out.ppm&quot;</span>;</div><div class="line">        }</div></div><!-- fragment --> <dl class="section see"><dt>See also</dt><dd>main_neoncl_scale_median_gaussian</dd></dl>
 <h1><a class="anchor" id="S4_5_algorithms"></a>
 Algorithms</h1>
 <p>All computer vision algorithms in this library have been implemented following the <a href="https://www.khronos.org/registry/vx/specs/1.1/html/">OpenVX 1.1 specifications</a>. Please refer to the Khronos documentation for more information.</p>
@@ -226,11 +226,11 @@
 <ul>
 <li>Accurate padding:</li>
 </ul>
-<div class="fragment"><div class="line">    PPMLoader ppm;</div><div class="line">    <a class="code" href="struct_image.xhtml">Image</a>     src, tmp, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>;</div><div class="line"></div><div class="line">    <span class="keywordflow">if</span>(argc &lt; 2)</div><div class="line">    {</div><div class="line">        <span class="comment">// Print help</span></div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;Usage: ./build/neon_convolution [input_image.ppm]\n\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;No input_image provided, creating a dummy 640x480 image\n&quot;</span>;</div><div class="line">        <span class="comment">// Initialize just the dimensions and format of your buffers:</span></div><div class="line">        src.allocator()-&gt;init(TensorInfo(640, 480, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line">    }</div><div class="line">    <span class="keywordflow">else</span></div><div class="line">    {</div><div class="line">        ppm.open(argv[1]);</div><div class="line">        <span class="comment">// Initialize just the dimensions and format of your buffers:</span></div><div class="line">        ppm.init_image(src, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line">    }</div><div class="line"></div><div class="line">    <span class="comment">// Initialize just the dimensions and format of the temporary and destination images:</span></div><div class="line">    tmp.allocator()-&gt;init(*src.info());</div><div class="line">    dst.allocator()-&gt;init(*src.info());</div><div class="line"></div><div class="line">    NEConvolution3x3 conv3x3;</div><div class="line">    <a class="code" href="namespacearm__compute.xhtml#adbc7771d367ba8f51da1450d3602e5c0">NEConvolution5x5</a> conv5x5;</div><div class="line"></div><div class="line">    <span class="comment">// Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5:</span></div><div class="line">    <span class="comment">// The function will automatically update the padding information inside input and output to match its requirements</span></div><div class="line">    conv3x3.configure(&amp;src, &amp;tmp, <a class="code" href="cl__convolution_8cpp.xhtml#a741ba5321da40184f8653e0a50ace070">gaussian3x3</a>, 0 <span class="comment">/* Let arm_compute calculate the scale */</span>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a0db45d2a4141101bdfe48e3314cfbca3">BorderMode::UNDEFINED</a>);</div><div class="line">    conv5x5.configure(&amp;tmp, &amp;dst, <a class="code" href="cl__convolution_8cpp.xhtml#a565013cf7e49a591bacd548571951f94">gaussian5x5</a>, 0 <span class="comment">/* Let arm_compute calculate the scale */</span>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a0db45d2a4141101bdfe48e3314cfbca3">BorderMode::UNDEFINED</a>);</div><div class="line"></div><div class="line">    <span class="comment">// Now that the padding requirements are known we can allocate the images:</span></div><div class="line">    src.allocator()-&gt;allocate();</div><div class="line">    tmp.allocator()-&gt;allocate();</div><div class="line">    dst.allocator()-&gt;allocate();</div><div class="line"></div><div class="line">    <span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">    <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">    {</div><div class="line">        ppm.fill_image(src);</div><div class="line">    }</div><div class="line"></div><div class="line">    <span class="comment">//Execute the functions:</span></div><div class="line">    conv3x3.run();</div><div class="line">    conv5x5.run();</div><div class="line"></div><div class="line">    <span class="comment">// Save the result to file:</span></div><div class="line">    <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">    {</div><div class="line">        <span class="keyword">const</span> std::string output_filename = std::string(argv[1]) + <span class="stringliteral">&quot;_out.ppm&quot;</span>;</div><div class="line">        <a class="code" href="namespacearm__compute_1_1utils.xhtml#a301d0b7bfd70f73fc1924f4281938d08">save_to_ppm</a>(dst, output_filename);</div><div class="line">    }</div></div><!-- fragment --> <dl class="section note"><dt>Note</dt><dd>It's important to call allocate <b>after</b> the function is configured: if the image / tensor is already allocated then the function will shrink its execution window instead of increasing the padding. (See below for more details).</dd></dl>
+<div class="fragment"><div class="line">        PPMLoader ppm;</div><div class="line"></div><div class="line">        <span class="keywordflow">if</span>(argc &lt; 2)</div><div class="line">        {</div><div class="line">            <span class="comment">// Print help</span></div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;Usage: ./build/neon_convolution [input_image.ppm]\n\n&quot;</span>;</div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;No input_image provided, creating a dummy 640x480 image\n&quot;</span>;</div><div class="line">            <span class="comment">// Initialize just the dimensions and format of your buffers:</span></div><div class="line">            <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.allocator()-&gt;init(TensorInfo(640, 480, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>));</div><div class="line">        }</div><div class="line">        <span class="keywordflow">else</span></div><div class="line">        {</div><div class="line">            ppm.open(argv[1]);</div><div class="line">            <span class="comment">// Initialize just the dimensions and format of your buffers:</span></div><div class="line">            ppm.init_image(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line">        }</div><div class="line"></div><div class="line">        <span class="comment">// Initialize just the dimensions and format of the temporary and destination images:</span></div><div class="line">        tmp.allocator()-&gt;init(*<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.info());</div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>.<a class="code" href="classarm__compute_1_1_c_l_tensor.xhtml#ad6dc6b773780dd6b1ad17fc82368d9f3">allocator</a>()-&gt;<a class="code" href="classarm__compute_1_1_i_tensor_allocator.xhtml#aa72161e0e3c0f6b2da20f835de6af680">init</a>(*<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.info());</div><div class="line"></div><div class="line">        <span class="comment">// Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5:</span></div><div class="line">        <span class="comment">// The function will automatically update the padding information inside input and output to match its requirements</span></div><div class="line">        conv3x3.configure(&amp;<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, &amp;tmp, <a class="code" href="cl__convolution_8cpp.xhtml#a741ba5321da40184f8653e0a50ace070">gaussian3x3</a>, 0 <span class="comment">/* Let arm_compute calculate the scale */</span>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a0db45d2a4141101bdfe48e3314cfbca3">BorderMode::UNDEFINED</a>);</div><div class="line">        conv5x5.configure(&amp;tmp, &amp;<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>, <a class="code" href="cl__convolution_8cpp.xhtml#a565013cf7e49a591bacd548571951f94">gaussian5x5</a>, 0 <span class="comment">/* Let arm_compute calculate the scale */</span>, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a0db45d2a4141101bdfe48e3314cfbca3">BorderMode::UNDEFINED</a>);</div><div class="line"></div><div class="line">        <span class="comment">// Now that the padding requirements are known we can allocate the images:</span></div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>.allocator()-&gt;allocate();</div><div class="line">        tmp.allocator()-&gt;allocate();</div><div class="line">        <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>.<a class="code" href="classarm__compute_1_1_c_l_tensor.xhtml#ad6dc6b773780dd6b1ad17fc82368d9f3">allocator</a>()-&gt;<a class="code" href="classarm__compute_1_1_i_tensor_allocator.xhtml#aa8a4946cd749d482dd996874d295af85">allocate</a>();</div><div class="line"></div><div class="line">        <span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">        <span class="keywordflow">if</span>(ppm.is_open())</div><div class="line">        {</div><div class="line">            ppm.fill_image(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>);</div><div class="line">            output_filename = std::string(argv[1]) + <span class="stringliteral">&quot;_out.ppm&quot;</span>;</div><div class="line">        }</div></div><!-- fragment --> <dl class="section note"><dt>Note</dt><dd>It's important to call allocate <b>after</b> the function is configured: if the image / tensor is already allocated then the function will shrink its execution window instead of increasing the padding. (See below for more details).</dd></dl>
 <ul>
 <li>Manual padding / no padding / auto padding: You can allocate your images / tensors up front (before configuring your functions). In that case the function will use whatever padding is available and will shrink its execution window if there isn't enough padding available (which translates into a smaller valid region for the output). See also <a class="el" href="architecture.xhtml#valid_region">Valid regions</a>). If you don't want to manually set the padding but still want to allocate your objects upfront then you can use auto_padding. It guarantees that the allocation will have enough padding to run any of the provided functions.</li>
 </ul>
-<div class="fragment"><div class="line"><a class="code" href="struct_image.xhtml">Image</a>     src, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>;</div><div class="line"></div><div class="line"><span class="comment">// Use auto padding for the input:</span></div><div class="line">src.info()-&gt;init_auto_padding(TensorShape(640u,480u), <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line"></div><div class="line"><span class="comment">// Use manual padding for the destination image</span></div><div class="line">dst.info()-&gt;init(src.info()-&gt;tensor_shape(), <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>, strides_in_bytes, offset_first_element_in_bytes, total_size_in_bytes);</div><div class="line"></div><div class="line"><span class="comment">// Allocate all the images</span></div><div class="line">src.allocator()-&gt;allocate();</div><div class="line">dst.allocator()-&gt;allocate();</div><div class="line"><span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">fill_image(src);</div><div class="line"></div><div class="line">NEGaussian3x3 gauss;</div><div class="line"></div><div class="line"><span class="comment">// Apply a Gaussian 3x3 filter to the source image (Note: if the padding provided is not enough then the execution window and valid region of the output will be shrunk)</span></div><div class="line">gauss.configure(&amp;src, &amp;dst, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a0db45d2a4141101bdfe48e3314cfbca3">BorderMode::UNDEFINED</a>);</div><div class="line"></div><div class="line"><span class="comment">//Execute the functions:</span></div><div class="line">gauss.run();</div></div><!-- fragment --><dl class="section warning"><dt>Warning</dt><dd>Some kernels need up to 3 neighbor values to calculate the value of a given pixel. Therefore, to be safe, we use a 4-pixel padding all around the image. In addition, some kernels read and write up to 32 pixels at the same time. To cover that case as well we add an extra 32 pixels of padding at the end of each row. As a result auto padded buffers waste a lot of memory and are less cache friendly. It is therefore recommended to use accurate padding or manual padding wherever possible.</dd></dl>
+<div class="fragment"><div class="line"><a class="code" href="struct_image.xhtml">Image</a>     <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a6743f0a130e8311e6f5b1a23df102472">src</a>, <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#adbf67dcee294e673cf796f1ed8aeb6a4">dst</a>;</div><div class="line"></div><div class="line"><span class="comment">// Use auto padding for the input:</span></div><div class="line">src.info()-&gt;init_auto_padding(TensorShape(640u,480u), <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>);</div><div class="line"></div><div class="line"><span class="comment">// Use manual padding for the destination image</span></div><div class="line">dst.info()-&gt;init(src.info()-&gt;tensor_shape(), <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a6669348b484e3008dca2bfa8e85e40b5">Format::U8</a>, strides_in_bytes, offset_first_element_in_bytes, total_size_in_bytes);</div><div class="line"></div><div class="line"><span class="comment">// Allocate all the images</span></div><div class="line">src.allocator()-&gt;allocate();</div><div class="line">dst.allocator()-&gt;allocate();</div><div class="line"><span class="comment">// Fill the input image with the content of the PPM image if a filename was provided:</span></div><div class="line">fill_image(src);</div><div class="line"></div><div class="line">NEGaussian3x3 gauss;</div><div class="line"></div><div class="line"><span class="comment">// Apply a Gaussian 3x3 filter to the source image (Note: if the padding provided is not enough then the execution window and valid region of the output will be shrunk)</span></div><div class="line">gauss.configure(&amp;src, &amp;dst, <a class="code" href="namespacearm__compute.xhtml#a15a05537a472ee742404821851529327a0db45d2a4141101bdfe48e3314cfbca3">BorderMode::UNDEFINED</a>);</div><div class="line"></div><div class="line"><span class="comment">//Execute the functions:</span></div><div class="line">gauss.run();</div></div><!-- fragment --><dl class="section warning"><dt>Warning</dt><dd>Some kernels need up to 3 neighbor values to calculate the value of a given pixel. Therefore, to be safe, we use a 4-pixel padding all around the image. In addition, some kernels read and write up to 32 pixels at the same time. To cover that case as well we add an extra 32 pixels of padding at the end of each row. As a result auto padded buffers waste a lot of memory and are less cache friendly. It is therefore recommended to use accurate padding or manual padding wherever possible.</dd></dl>
 <h3><a class="anchor" id="valid_region"></a>
 Valid regions</h3>
 <p>Some kernels (like edge detectors for example) need to read values of neighboring pixels to calculate the value of a given pixel, it is therefore not possible to calculate the values of the pixels on the edges.</p>
@@ -257,7 +257,7 @@
 <p>Iteration windows are defined by an array of dimensions, each of which consists of a start, end and step.</p>
 <p>The <a class="el" href="namespacearm__compute.xhtml#a6c0dcc38187027dcb89cd9724bc5a823">execute_window_loop</a> function takes an execution window, a lambda function and one or more iterators. It will iterate through every element of the execution window and for each element it will update the iterators accordingly and call the lambda function.</p>
 <p>Here are a couple of examples of how to use the iterators to fill / read tensors:</p>
-<div class="fragment"><div class="line">    constexpr <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> width  = 4;</div><div class="line">    constexpr <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> height = 3;</div><div class="line">    constexpr <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> batch  = 2;</div><div class="line"></div><div class="line">    <span class="keyword">auto</span> *src_data = <span class="keyword">new</span> <span class="keywordtype">float</span>[width * height * batch];</div><div class="line">    <span class="keyword">auto</span> *dst_data = <span class="keyword">new</span> <span class="keywordtype">float</span>[width * height * batch];</div><div class="line"></div><div class="line">    <span class="comment">// Fill src_data with dummy values:</span></div><div class="line">    <span class="keywordflow">for</span>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> = 0; <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> &lt; batch; <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a>++)</div><div class="line">    {</div><div class="line">        <span class="keywordflow">for</span>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> h = 0; h &lt; height; h++)</div><div class="line">        {</div><div class="line">            <span class="keywordflow">for</span>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> w = 0; w &lt; width; w++)</div><div class="line">            {</div><div class="line">                src_data[<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> * (width * height) + h * width + w] = static_cast&lt;float&gt;(100 * <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> + 10 * h + w);</div><div class="line">            }</div><div class="line">        }</div><div class="line">    }</div><div class="line"></div><div class="line">    Tensor         input, output;</div><div class="line">    NESoftmaxLayer softmax;</div><div class="line"></div><div class="line">    <span class="comment">// Initialize the tensors dimensions and type:</span></div><div class="line">    <span class="keyword">const</span> TensorShape <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a45cde9abb508c62d67c3bb2b9bf566a5">shape</a>(width, height, batch);</div><div class="line">    input.allocator()-&gt;init(TensorInfo(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a45cde9abb508c62d67c3bb2b9bf566a5">shape</a>, 1, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a44ad4ef5a76e6aa6fb3e3fa079a54fda">DataType::F32</a>));</div><div class="line">    output.allocator()-&gt;init(TensorInfo(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a45cde9abb508c62d67c3bb2b9bf566a5">shape</a>, 1, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a44ad4ef5a76e6aa6fb3e3fa079a54fda">DataType::F32</a>));</div><div class="line"></div><div class="line">    <span class="comment">// Configure softmax:</span></div><div class="line">    softmax.configure(&amp;input, &amp;output);</div><div class="line"></div><div class="line">    <span class="comment">// Allocate the input / output tensors:</span></div><div class="line">    input.allocator()-&gt;allocate();</div><div class="line">    output.allocator()-&gt;allocate();</div><div class="line"></div><div class="line">    <span class="comment">// Fill the input tensor:</span></div><div class="line">    <span class="comment">// Simplest way: create an iterator to iterate through each element of the input tensor:</span></div><div class="line">    Window input_window;</div><div class="line">    input_window.use_tensor_dimensions(input.info()-&gt;tensor_shape());</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; Dimensions of the input&#39;s iterator:\n&quot;</span>;</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; X = [start=&quot;</span> &lt;&lt; input_window.x().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; input_window.x().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; input_window.x().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; Y = [start=&quot;</span> &lt;&lt; input_window.y().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; input_window.y().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; input_window.y().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; Z = [start=&quot;</span> &lt;&lt; input_window.z().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; input_window.z().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; input_window.z().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line"></div><div class="line">    <span class="comment">// Create an iterator:</span></div><div class="line">    Iterator input_it(&amp;input, input_window);</div><div class="line"></div><div class="line">    <span class="comment">// Iterate through the elements of src_data and copy them one by one to the input tensor:</span></div><div class="line">    <span class="comment">// This is equivalent to:</span></div><div class="line">    <span class="comment">// for( unsigned int z = 0; z &lt; batch; ++z)</span></div><div class="line">    <span class="comment">// {</span></div><div class="line">    <span class="comment">//   for( unsigned int y = 0; y &lt; height; ++y)</span></div><div class="line">    <span class="comment">//   {</span></div><div class="line">    <span class="comment">//     for( unsigned int x = 0; x &lt; width; ++x)</span></div><div class="line">    <span class="comment">//     {</span></div><div class="line">    <span class="comment">//       *reinterpret_cast&lt;float*&gt;( input.buffer() + input.info()-&gt;offset_element_in_bytes(Coordinates(x,y,z))) = src_data[ z * (width*height) + y * width + x];</span></div><div class="line">    <span class="comment">//     }</span></div><div class="line">    <span class="comment">//   }</span></div><div class="line">    <span class="comment">// }</span></div><div class="line">    <span class="comment">// Except it works for an arbitrary number of dimensions</span></div><div class="line">    <a class="code" href="namespacearm__compute.xhtml#a6c0dcc38187027dcb89cd9724bc5a823">execute_window_loop</a>(input_window, [&amp;](<span class="keyword">const</span> Coordinates &amp; <span class="keywordtype">id</span>)</div><div class="line">    {</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;Setting item [&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.x() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.y() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.z() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">        *<span class="keyword">reinterpret_cast&lt;</span><span class="keywordtype">float</span> *<span class="keyword">&gt;</span>(input_it.ptr()) = src_data[<span class="keywordtype">id</span>.z() * (width * height) + <span class="keywordtype">id</span>.y() * width + <span class="keywordtype">id</span>.x()];</div><div class="line">    },</div><div class="line">    input_it);</div><div class="line"></div><div class="line">    <span class="comment">// Run NEON softmax:</span></div><div class="line">    softmax.run();</div><div class="line"></div><div class="line">    <span class="comment">// More efficient way: create an iterator to iterate through each row (instead of each element) of the output tensor:</span></div><div class="line">    Window output_window;</div><div class="line">    output_window.use_tensor_dimensions(output.info()-&gt;tensor_shape(), <span class="comment">/* first_dimension =*/</span><a class="code" href="classarm__compute_1_1_window.xhtml#ad2d402364fa822b0b7775081291eeca9">Window::DimY</a>); <span class="comment">// Iterate through the rows (not each element)</span></div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; Dimensions of the output&#39;s iterator:\n&quot;</span>;</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; X = [start=&quot;</span> &lt;&lt; output_window.x().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; output_window.x().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; output_window.x().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; Y = [start=&quot;</span> &lt;&lt; output_window.y().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; output_window.y().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; output_window.y().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">    std::cout &lt;&lt; <span class="stringliteral">&quot; Z = [start=&quot;</span> &lt;&lt; output_window.z().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; output_window.z().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; output_window.z().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line"></div><div class="line">    <span class="comment">// Create an iterator:</span></div><div class="line">    Iterator output_it(&amp;output, output_window);</div><div class="line"></div><div class="line">    <span class="comment">// Iterate through the rows of the output tensor and copy them to dst_data:</span></div><div class="line">    <span class="comment">// This is equivalent to:</span></div><div class="line">    <span class="comment">// for( unsigned int z = 0; z &lt; batch; ++z)</span></div><div class="line">    <span class="comment">// {</span></div><div class="line">    <span class="comment">//   for( unsigned int y = 0; y &lt; height; ++y)</span></div><div class="line">    <span class="comment">//   {</span></div><div class="line">    <span class="comment">//     memcpy( dst_data + z * (width*height) + y * width, input.buffer() + input.info()-&gt;offset_element_in_bytes(Coordinates(0,y,z)), width * sizeof(float));</span></div><div class="line">    <span class="comment">//   }</span></div><div class="line">    <span class="comment">// }</span></div><div class="line">    <span class="comment">// Except it works for an arbitrary number of dimensions</span></div><div class="line">    <a class="code" href="namespacearm__compute.xhtml#a6c0dcc38187027dcb89cd9724bc5a823">execute_window_loop</a>(output_window, [&amp;](<span class="keyword">const</span> Coordinates &amp; <span class="keywordtype">id</span>)</div><div class="line">    {</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot;Copying one row starting from [&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.x() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.y() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.z() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">        <span class="comment">// Copy one whole row:</span></div><div class="line">        memcpy(dst_data + <span class="keywordtype">id</span>.z() * (width * height) + <span class="keywordtype">id</span>.y() * width, output_it.ptr(), width * <span class="keyword">sizeof</span>(float));</div><div class="line">    },</div><div class="line">    output_it);</div><div class="line"></div><div class="line">    <span class="keyword">delete</span>[] src_data;</div><div class="line">    <span class="keyword">delete</span>[] dst_data;</div></div><!-- fragment --> <h1><a class="anchor" id="S4_7_memory_manager"></a>
+<div class="fragment"><div class="line">        constexpr <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> width  = 4;</div><div class="line">        constexpr <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> height = 3;</div><div class="line">        constexpr <span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> batch  = 2;</div><div class="line"></div><div class="line">        src_data = <span class="keyword">new</span> <span class="keywordtype">float</span>[width * height * batch];</div><div class="line">        dst_data = <span class="keyword">new</span> <span class="keywordtype">float</span>[width * height * batch];</div><div class="line"></div><div class="line">        <span class="comment">// Fill src_data with dummy values:</span></div><div class="line">        <span class="keywordflow">for</span>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> = 0; <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> &lt; batch; <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a>++)</div><div class="line">        {</div><div class="line">            <span class="keywordflow">for</span>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> h = 0; h &lt; height; h++)</div><div class="line">            {</div><div class="line">                <span class="keywordflow">for</span>(<span class="keywordtype">unsigned</span> <span class="keywordtype">int</span> w = 0; w &lt; width; w++)</div><div class="line">                {</div><div class="line">                    src_data[<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> * (width * height) + h * width + w] = static_cast&lt;float&gt;(100 * <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a7b8004eef325a40dd43eb80755610fff">b</a> + 10 * h + w);</div><div class="line">                }</div><div class="line">            }</div><div class="line">        }</div><div class="line"></div><div class="line">        <span class="comment">// Initialize the tensors dimensions and type:</span></div><div class="line">        <span class="keyword">const</span> TensorShape <a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a45cde9abb508c62d67c3bb2b9bf566a5">shape</a>(width, height, batch);</div><div class="line">        input.allocator()-&gt;init(TensorInfo(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a45cde9abb508c62d67c3bb2b9bf566a5">shape</a>, 1, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a44ad4ef5a76e6aa6fb3e3fa079a54fda">DataType::F32</a>));</div><div class="line">        output.allocator()-&gt;init(TensorInfo(<a class="code" href="namespacearm__compute_1_1test_1_1validation.xhtml#a45cde9abb508c62d67c3bb2b9bf566a5">shape</a>, 1, <a class="code" href="namespacearm__compute.xhtml#ab4e88c89b3b7ea1735996cc4def22d58a44ad4ef5a76e6aa6fb3e3fa079a54fda">DataType::F32</a>));</div><div class="line"></div><div class="line">        <span class="comment">// Configure softmax:</span></div><div class="line">        softmax.configure(&amp;input, &amp;output);</div><div class="line"></div><div class="line">        <span class="comment">// Allocate the input / output tensors:</span></div><div class="line">        input.allocator()-&gt;allocate();</div><div class="line">        output.allocator()-&gt;allocate();</div><div class="line"></div><div class="line">        <span class="comment">// Fill the input tensor:</span></div><div class="line">        <span class="comment">// Simplest way: create an iterator to iterate through each element of the input tensor:</span></div><div class="line">        Window input_window;</div><div class="line">        input_window.use_tensor_dimensions(input.info()-&gt;tensor_shape());</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; Dimensions of the input&#39;s iterator:\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; X = [start=&quot;</span> &lt;&lt; input_window.x().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; input_window.x().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; input_window.x().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; Y = [start=&quot;</span> &lt;&lt; input_window.y().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; input_window.y().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; input_window.y().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; Z = [start=&quot;</span> &lt;&lt; input_window.z().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; input_window.z().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; input_window.z().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line"></div><div class="line">        <span class="comment">// Create an iterator:</span></div><div class="line">        Iterator input_it(&amp;input, input_window);</div><div class="line"></div><div class="line">        <span class="comment">// Iterate through the elements of src_data and copy them one by one to the input tensor:</span></div><div class="line">        <span class="comment">// This is equivalent to:</span></div><div class="line">        <span class="comment">// for( unsigned int z = 0; z &lt; batch; ++z)</span></div><div class="line">        <span class="comment">// {</span></div><div class="line">        <span class="comment">//   for( unsigned int y = 0; y &lt; height; ++y)</span></div><div class="line">        <span class="comment">//   {</span></div><div class="line">        <span class="comment">//     for( unsigned int x = 0; x &lt; width; ++x)</span></div><div class="line">        <span class="comment">//     {</span></div><div class="line">        <span class="comment">//       *reinterpret_cast&lt;float*&gt;( input.buffer() + input.info()-&gt;offset_element_in_bytes(Coordinates(x,y,z))) = src_data[ z * (width*height) + y * width + x];</span></div><div class="line">        <span class="comment">//     }</span></div><div class="line">        <span class="comment">//   }</span></div><div class="line">        <span class="comment">// }</span></div><div class="line">        <span class="comment">// Except it works for an arbitrary number of dimensions</span></div><div class="line">        <a class="code" href="namespacearm__compute.xhtml#a6c0dcc38187027dcb89cd9724bc5a823">execute_window_loop</a>(input_window, [&amp;](<span class="keyword">const</span> Coordinates &amp; <span class="keywordtype">id</span>)</div><div class="line">        {</div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;Setting item [&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.x() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.y() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.z() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">            *<span class="keyword">reinterpret_cast&lt;</span><span class="keywordtype">float</span> *<span class="keyword">&gt;</span>(input_it.ptr()) = src_data[<span class="keywordtype">id</span>.z() * (width * height) + <span class="keywordtype">id</span>.y() * width + <span class="keywordtype">id</span>.x()];</div><div class="line">        },</div><div class="line">        input_it);</div><div class="line"></div><div class="line">        <span class="comment">// More efficient way: create an iterator to iterate through each row (instead of each element) of the output tensor:</span></div><div class="line">        Window output_window;</div><div class="line">        output_window.use_tensor_dimensions(output.info()-&gt;tensor_shape(), <span class="comment">/* first_dimension =*/</span><a class="code" href="classarm__compute_1_1_window.xhtml#ad2d402364fa822b0b7775081291eeca9">Window::DimY</a>); <span class="comment">// Iterate through the rows (not each element)</span></div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; Dimensions of the output&#39;s iterator:\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; X = [start=&quot;</span> &lt;&lt; output_window.x().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; output_window.x().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; output_window.x().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; Y = [start=&quot;</span> &lt;&lt; output_window.y().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; output_window.y().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; output_window.y().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">        std::cout &lt;&lt; <span class="stringliteral">&quot; Z = [start=&quot;</span> &lt;&lt; output_window.z().start() &lt;&lt; <span class="stringliteral">&quot;, end=&quot;</span> &lt;&lt; output_window.z().end() &lt;&lt; <span class="stringliteral">&quot;, step=&quot;</span> &lt;&lt; output_window.z().step() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line"></div><div class="line">        <span class="comment">// Create an iterator:</span></div><div class="line">        Iterator output_it(&amp;output, output_window);</div><div class="line"></div><div class="line">        <span class="comment">// Iterate through the rows of the output tensor and copy them to dst_data:</span></div><div class="line">        <span class="comment">// This is equivalent to:</span></div><div class="line">        <span class="comment">// for( unsigned int z = 0; z &lt; batch; ++z)</span></div><div class="line">        <span class="comment">// {</span></div><div class="line">        <span class="comment">//   for( unsigned int y = 0; y &lt; height; ++y)</span></div><div class="line">        <span class="comment">//   {</span></div><div class="line">        <span class="comment">//     memcpy( dst_data + z * (width*height) + y * width, input.buffer() + input.info()-&gt;offset_element_in_bytes(Coordinates(0,y,z)), width * sizeof(float));</span></div><div class="line">        <span class="comment">//   }</span></div><div class="line">        <span class="comment">// }</span></div><div class="line">        <span class="comment">// Except it works for an arbitrary number of dimensions</span></div><div class="line">        <a class="code" href="namespacearm__compute.xhtml#a6c0dcc38187027dcb89cd9724bc5a823">execute_window_loop</a>(output_window, [&amp;](<span class="keyword">const</span> Coordinates &amp; <span class="keywordtype">id</span>)</div><div class="line">        {</div><div class="line">            std::cout &lt;&lt; <span class="stringliteral">&quot;Copying one row starting from [&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.x() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.y() &lt;&lt; <span class="stringliteral">&quot;,&quot;</span> &lt;&lt; <span class="keywordtype">id</span>.z() &lt;&lt; <span class="stringliteral">&quot;]\n&quot;</span>;</div><div class="line">            <span class="comment">// Copy one whole row:</span></div><div class="line">            memcpy(dst_data + <span class="keywordtype">id</span>.z() * (width * height) + <span class="keywordtype">id</span>.y() * width, output_it.ptr(), width * <span class="keyword">sizeof</span>(float));</div><div class="line">        },</div><div class="line">        output_it);</div><div class="line"></div></div><!-- fragment --> <h1><a class="anchor" id="S4_7_memory_manager"></a>
 MemoryManager</h1>
 <p><a class="el" href="classarm__compute_1_1_i_memory_manager.xhtml">IMemoryManager</a> is a memory managing interface that can be used to reduce the memory requirements of a given pipeline by recycling temporary buffers.</p>
 <h2><a class="anchor" id="S4_7_1_memory_manager_components"></a>
@@ -299,7 +299,7 @@
 <!-- start footer part -->
 <div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
   <ul>
-    <li class="footer">Generated on Thu Dec 14 2017 23:48:34 for Compute Library by
+    <li class="footer">Generated on Wed Jan 24 2018 14:30:43 for Compute Library by
     <a href="http://www.doxygen.org/index.html">
     <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.8.11 </li>
   </ul>
commit	f45d5a9be1bf4d315a227b80617582b8eb4214d2	[log] [tgz]
author	Anthony Barbier <anthony.barbier@arm.com>	Wed Jan 24 16:23:15 2018 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	Wed Jan 24 16:23:15 2018 +0000
tree	29f24fc5f51448e831080d76eef3ac75d43c1934
parent	6943bb00e79fe2ea4c127dc04b3440c5b0b29ce0 [diff] [blame]