Bugfix for NumPyBinLoader with SubTensor output. (#345)

* Bugfix for NumPyBinLoader with SubTensor output.

When a SubTensor is used as output from a NumPyBinLoader, data should
not be written to the begining of the buffer.

Change-Id: I6ed4b24710ac09b41ca92c7e21f24d44a3ed2881

* Reduce the buffer size with the same amount

Change-Id: Id260f58209ce2fbee877f7d6f4afbe95c6c74036

* SubTensor's has to be accessed through execution windows
diff --git a/utils/GraphUtils.cpp b/utils/GraphUtils.cpp
index 2ff40b7..f190af4 100644
--- a/utils/GraphUtils.cpp
+++ b/utils/GraphUtils.cpp
@@ -24,6 +24,7 @@
 
 #include "utils/GraphUtils.h"
 #include "utils/Utils.h"
+#include "arm_compute/runtime/SubTensor.h"
 
 #ifdef ARM_COMPUTE_CL
 #include "arm_compute/core/CL/OpenCL.h"
@@ -206,7 +207,7 @@
 {
     std::mt19937 gen(_seed);
 
-    if(tensor.info()->padding().empty())
+    if(tensor.info()->padding().empty() && !dynamic_cast<SubTensor*>(&tensor))
     {
         for(size_t offset = 0; offset < tensor.info()->total_size(); offset += tensor.info()->element_size())
         {
@@ -349,7 +350,7 @@
     }
 
     // Read data
-    if(tensor.info()->padding().empty())
+    if(tensor.info()->padding().empty() && !dynamic_cast<SubTensor*>(&tensor))
     {
         // If tensor has no padding read directly from stream.
         stream.read(reinterpret_cast<char *>(tensor.buffer()), tensor.info()->total_size());