(nuttx-apps) 11/22: delete the neon conv2D

xiaoxiang Thu, 17 Oct 2024 18:40:30 -0700

This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx-apps.git


commit 570102c5018c6b4010227a044dde894b0b16feec
Author: xinhaiteng <xinhait...@xiaomi.com>
AuthorDate: Thu Apr 25 11:31:10 2024 +0800

    delete the neon conv2D
    
    The complete implementation is placed separately in 
mLearning/tflite-micro/operators/neon, delete this part.
    
    Signed-off-by: xinhaiteng <xinhait...@xiaomi.com>
---
 mlearning/tflite-micro/tflite-micro.patch | 114 +++++-------------------------
 1 file changed, 19 insertions(+), 95 deletions(-)

diff --git a/mlearning/tflite-micro/tflite-micro.patch 
b/mlearning/tflite-micro/tflite-micro.patch
index d39038d5a..7c39ee527 100644
--- a/mlearning/tflite-micro/tflite-micro.patch
+++ b/mlearning/tflite-micro/tflite-micro.patch
@@ -1,5 +1,20 @@
+From e6049c8ddd7e36db4993e3c6d8b4e5a7114db2c5 Mon Sep 17 00:00:00 2001
+From: renzhiyuan1 <renzhiyu...@xiaomi.com>
+Date: Mon, 4 Dec 2023 12:30:20 +0800
+Subject: [PATCH] Patch files in tflite-micro
+
+VELAPLATFO-20049
+
+Patch files in tflite-micro
+
+Change-Id: Ic683fe4f12221d214ca77515c7b9487bb76a923f
+Signed-off-by: renzhiyuan1 <renzhiyu...@xiaomi.com>
+---
+ signal/src/circular_buffer.cc | 2 ++
+ 1 file changed, 2 insertions(+)
+
 diff --git a/signal/src/circular_buffer.cc b/signal/src/circular_buffer.cc
-index 7638d912..3261be56 100644
+index 7638d912e..3261be56c 100644
 --- a/signal/src/circular_buffer.cc
 +++ b/signal/src/circular_buffer.cc
 @@ -19,7 +19,9 @@ limitations under the License.
@@ -12,97 +27,6 @@ index 7638d912..3261be56 100644
  
  namespace tflite {
  namespace tflm_signal {
-diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h 
b/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
-index eac00576..abfdea8c 100644
---- a/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
-+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h
-@@ -18,6 +18,9 @@ limitations under the License.
- #include <algorithm>
- 
- #include "tensorflow/lite/kernels/internal/common.h"
-+#ifdef USE_NEON
-+#include <arm_neon.h>
-+#endif
- 
- namespace tflite {
- namespace reference_integer_ops {
-@@ -133,6 +136,79 @@ inline void ConvPerChannel(
-       }
-     }
-   }
-+#ifdef USE_NEON
-+  for (int batch = 0; batch < batches; ++batch) {
-+    for (int out_y = 0; out_y < output_height; ++out_y) {
-+      int in_y_origin = (out_y * stride_height) - pad_height;
-+      for (int out_x = 0; out_x < output_width; ++out_x) {
-+        int in_x_origin = (out_x * stride_width) - pad_width;
-+        int filter_start_offset = 0;
-+        for (int out_channel = 0; out_channel < output_depth; ++out_channel) {
-+          auto group = out_channel / filters_per_group;
-+          int32_t acc = 0;
-+          int8x8_t input_v = vdup_n_s8(0);
-+          int8x8_t filter_v = vdup_n_s8(0);
-+          int16x8_t mid_mul = vdupq_n_s16(0);
-+          int32x4_t res_v = vdupq_n_s32(0);
-+          int32x4_t filter_offset_v = vdupq_n_s32(0);
-+          int input_offset_temp = 0;
-+          for (int filter_y = 0; filter_y < filter_height; ++filter_y) {
-+            int in_y = in_y_origin + dilation_height_factor * filter_y;
-+            for (int filter_x = 0; filter_x < filter_width; ++filter_x) {
-+              int in_x = in_x_origin + dilation_width_factor * filter_x;
-+              const bool is_point_inside_image = (in_x >= 0) && (in_x < 
input_width) && (in_y >= 0) && (in_y < input_height);
-+              if (!is_point_inside_image)
-+                continue;
-+              int input_start_offset = ((batch * input_height + in_y) * 
input_width + in_x) * input_depth + group * filter_input_depth;
-+              int in_channel = 0;
-+              for (; in_channel < (filter_input_depth & -8); in_channel += 8) 
{
-+                input_v = vld1_s8(input_data + input_start_offset);
-+                input_start_offset += 8;
-+                filter_v = vld1_s8(filter_data + filter_start_offset);
-+                filter_start_offset += 8;
-+
-+                mid_mul = vmovl_s8(filter_v);
-+                filter_offset_v = vaddw_s16(filter_offset_v, 
vget_low_s16(mid_mul));
-+                filter_offset_v = vaddw_s16(filter_offset_v, 
vget_high_s16(mid_mul));
-+                mid_mul = vmull_s8(input_v, filter_v);
-+                res_v = vaddw_s16(res_v, vget_low_s16(mid_mul));
-+                res_v = vaddw_s16(res_v, vget_high_s16(mid_mul));
-+
-+              }
-+
-+              for (; in_channel < filter_input_depth; ++in_channel) {
-+                acc += (input_data[input_start_offset] +  input_offset) * 
filter_data[filter_start_offset];
-+                ++input_start_offset;
-+                ++filter_start_offset;
-+              }
-+            }
-+          }
-+          acc += vgetq_lane_s32(res_v, 0);
-+          acc += vgetq_lane_s32(res_v, 1);
-+          acc += vgetq_lane_s32(res_v, 2);
-+          acc += vgetq_lane_s32(res_v, 3);
-+          input_offset_temp += vgetq_lane_s32(filter_offset_v, 0);
-+          input_offset_temp += vgetq_lane_s32(filter_offset_v, 1);
-+          input_offset_temp += vgetq_lane_s32(filter_offset_v, 2);
-+          input_offset_temp += vgetq_lane_s32(filter_offset_v, 3);
-+          acc += input_offset_temp * input_offset;
-+
-+          if (bias_data)
-+          {
-+            acc += bias_data[out_channel];
-+          }
-+          acc = MultiplyByQuantizedMultiplier(
-+              acc, output_multiplier[out_channel], output_shift[out_channel]);
-+          acc += output_offset;
-+          acc = std::max(acc, output_activation_min);
-+          acc = std::min(acc, output_activation_max);
-+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] 
=
-+              static_cast<int8_t>(acc);
-+        }
-+      }
-+    }
-+  }
-+#endif
- }
- 
- 
+-- 
+2.25.1
+

(nuttx-apps) 11/22: delete the neon conv2D

Reply via email to