Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • jvet-ahg-nnvc/sadl
  • msantamaria/sadl
  • Weijie/sadl-extension
  • chaoyi_lin/sadl
  • Yucong/sadl
  • qliu/sadl
  • duliu/sadl
  • jacob/sadl
  • Yun_li/sadl
  • XiangLi/sadl
  • ruiying/sadl
  • NianxiangFu/sadl
  • WenzhuoMa/sadl
13 results
Show changes
Commits on Source (39)
Showing with 528 additions and 94 deletions
......@@ -3,6 +3,23 @@ stages:
variables:
GIT_SUBMODULE_STRATEGY: none
.build_template_windows:
stage: build
script:
- |
echo "[INFO] BUILD $CI_COMMIT_SHORT_SHA";
mkdir -p build;
cd build;
& "C:\Program Files\CMake\bin\cmake.exe" -G "Visual Studio 16 2019" -DCMAKE_BUILD_TYPE=Release -DSPARSE_MATMULT_SUPPORT=1 ../sample;
& "C:\Program Files\CMake\bin\cmake.exe" --build ./;
only:
refs:
- master
- dev_for_transformers
- merge_requests
variables:
- $CI_PROJECT_URL == 'https://vcgit.hhi.fraunhofer.de/jvet-ahg-nnvc/sadl'
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'master'
.build_template_linux:
......@@ -19,6 +36,14 @@ variables:
cd utests/build;
cmake -DCMAKE_BUILD_TYPE=Release -DSPARSE_MATMULT_SUPPORT=1 ..;
make;
only:
refs:
- master
- dev_for_transformers
- merge_requests
variables:
- $CI_PROJECT_URL == 'https://vcgit.hhi.fraunhofer.de/jvet-ahg-nnvc/sadl'
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == 'master'
build_ubuntu2004:
......@@ -31,4 +56,8 @@ build_ubuntu2204:
tags:
- ubuntu2204
build_vc192x:
extends: .build_template_windows
tags:
- vc192x
......@@ -127,6 +127,7 @@ class OPTYPE(IntEnum):
Resize = (24,)
Compare = (25,)
Where = (26,)
Minimum = (27,)
# "BatchMatMulV2" did not exist in Tensorflow 1.9. It exists in
# Tensorflow 1.15.
......@@ -681,8 +682,8 @@ def parse_graph_node(
myGraph[node.output[0]]["additional"] = {}
myGraph[node.output[0]]["additional"]["data"] = node
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "Identity":
elif node.op_type == "Identity" or node.op_type == "Cast":
myGraph[node.output[0]] = {}
myGraph[node.output[0]]["op_type"] = OPTYPE.Identity
myGraph[node.output[0]]["inputs"] = [map_onnx_to_myGraph[n0name]]
......@@ -767,7 +768,7 @@ def parse_graph_node(
myGraph[node.output[0]]["op_type"] = OPTYPE.Expand
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "Reshape" or node.op_type == "MatMul":
elif node.op_type == "Reshape":
# Const
myGraph[node.input[1]] = {}
myGraph[node.input[1]]["op_type"] = OPTYPE.Const
......@@ -790,11 +791,65 @@ def parse_graph_node(
if node.op_type == "Reshape":
myGraph[node.output[0]]["op_type"] = OPTYPE.Reshape
elif node.op_type == "MatMul":
myGraph[node.output[0]]["op_type"] = OPTYPE.MatMul
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "MatMul":
# check the inputs
if is_constant(n0name, model_onnx.graph.initializer) and is_constant(
node.input[1], model_onnx.graph.initializer
):
quit("[ERROR] unsupported double constants MatMul", node)
swap_inputs = False
if is_constant(n0name, model_onnx.graph.initializer):
additional = {}
additional["data"] = node
n2 = getNodesWithOutput(n0name, model_onnx)
additional["dims"], additional["raw_data"], additional[
"dtype"
] = extract_additional_data(
n0name, node_annotation[n2.name].to_transpose, model_onnx.graph, verbose
)
map_onnx_to_myGraph[n0name] = n0name
myGraph[n0name] = {}
myGraph[n0name]["inputs"] = []
myGraph[n0name]["additional"] = additional
myGraph[n0name]["op_type"] = OPTYPE.Const
swap_inputs = True
if is_constant(node.input[1], model_onnx.graph.initializer):
additional = {}
additional["data"] = node
n2 = getNodesWithOutput(node.input[1], model_onnx)
additional["dims"], additional["raw_data"], additional[
"dtype"
] = extract_additional_data(
node.input[1],
node_annotation[n2.name].to_transpose,
model_onnx.graph,
verbose,
)
map_onnx_to_myGraph[node.input[1]] = node.input[1]
myGraph[node.input[1]] = {}
myGraph[node.input[1]]["inputs"] = []
myGraph[node.input[1]]["additional"] = additional
myGraph[node.input[1]]["op_type"] = OPTYPE.Const
myGraph[node.output[0]] = {}
myGraph[node.output[0]]["op_type"] = OPTYPE.MatMul
if swap_inputs:
myGraph[node.output[0]]["inputs"] = [
map_onnx_to_myGraph[node.input[1]],
map_onnx_to_myGraph[n0name],
]
else:
myGraph[node.output[0]]["inputs"] = [
map_onnx_to_myGraph[n0name],
map_onnx_to_myGraph[node.input[1]],
]
myGraph[node.output[0]]["additional"] = {}
myGraph[node.output[0]]["additional"]["data"] = node
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "Concat":
# Const
myGraph[node.output[0]] = {}
......@@ -831,6 +886,17 @@ def parse_graph_node(
myGraph[node.output[0]]["additional"]["data"] = node
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "Min":
myGraph[node.output[0]] = {}
myGraph[node.output[0]]["op_type"] = OPTYPE.Minimum
myGraph[node.output[0]]["inputs"] = [
map_onnx_to_myGraph[n0name],
map_onnx_to_myGraph[node.input[1]],
]
myGraph[node.output[0]]["additional"] = {}
myGraph[node.output[0]]["additional"]["data"] = node
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "Unsqueeze":
# No need to parse Unsqueeze as SADL can handle it.
map_onnx_to_myGraph[node.output[0]] = node.output[0]
......@@ -1199,6 +1265,37 @@ def parse_graph_node(
myGraph[node.output[0]]["additional"]["data"] = node
map_onnx_to_myGraph[node.output[0]] = node.output[0]
elif node.op_type == "Equal":
additional = {}
additional["data"] = node
if is_constant(node.input[1], model_onnx.graph.initializer):
n2 = getNodesWithOutput(node.input[1], model_onnx) # constant
(
additional["dims"],
additional["raw_data"],
additional["dtype"],
) = extract_additional_data(
node.input[1],
False,
model_onnx.graph,
verbose,
)
myGraph[node.input[1]] = {}
myGraph[node.input[1]]["op_type"] = OPTYPE.Const
myGraph[node.input[1]]["inputs"] = []
myGraph[node.input[1]]["additional"] = additional
map_onnx_to_myGraph[node.input[1]] = node.input[1]
myGraph[node.output[0]] = {}
myGraph[node.output[0]]["op_type"] = OPTYPE.Compare
myGraph[node.output[0]]["inputs"] = [map_onnx_to_myGraph[n0name]] + [
map_onnx_to_myGraph[node.input[1]]
]
myGraph[node.output[0]]["additional"] = {}
myGraph[node.output[0]]["additional"]["data"] = node
myGraph[node.output[0]]["additional"]["mode"] = 2
map_onnx_to_myGraph[node.output[0]] = node.output[0]
else:
raise Exception("[ERROR] node not supported:\n{})".format(node))
......@@ -1677,6 +1774,13 @@ def annotate_node(
node_annotation[n2.name].to_transpose = True
node_annotation[n2.name].layout_onnx = "nhwc"
elif node.op_type == "MatMul":
if global_data_layout == "nchw":
n2 = getNodesWithOutput(node.input[1], model_onnx)
node_annotation[n2.name].add_transpose_after = True
node_annotation[node.name].add_transpose_before = True
node_annotation[node.name].add_transpose_after = True
elif node.op_type == "Gemm":
n2 = getInitializer(node.input[1], model_onnx)
if global_data_layout == "nchw":
......@@ -1689,6 +1793,7 @@ def annotate_node(
node_annotation[n2.name].to_transpose = True
nexts = getNodesWithInput(node.output[0], model_onnx)
for n in nexts:
annotate_node(
n, model_onnx, node_annotation, global_data_layout, verbose
......
......@@ -107,7 +107,7 @@ void bilinear_in_channels_wo_simd(const Tensor<T> &data, const T2 coeffs[], cons
constexpr int im_nb = 0;
int in_D = data.dims()[3];
const int &x_ori_left = pos[0], &y_ori_top = pos[1], &x_ori_right = pos[2], &y_ori_bottom = pos[3];
const int pos_table[4][2] = { y_ori_top, x_ori_left, y_ori_top, x_ori_right, y_ori_bottom, x_ori_left, y_ori_bottom, x_ori_right };
const int pos_table[4][2] = { {y_ori_top, x_ori_left}, {y_ori_top, x_ori_right}, {y_ori_bottom, x_ori_left}, {y_ori_bottom, x_ori_right} };
static std::vector<T2> temp_buffer;
temp_buffer.resize(in_D);
......@@ -142,7 +142,7 @@ inline void bilinear_in_channels_simd256(const Tensor<float> &data, const float
int in_D = data.dims()[3];
assert(in_D % 8 == 0); // Should be used with mod8 data.
const int &x_ori_left = pos[0], &y_ori_top = pos[1], &x_ori_right = pos[2], &y_ori_bottom = pos[3];
const int pos_table[4][2] = { y_ori_top, x_ori_left, y_ori_top, x_ori_right, y_ori_bottom, x_ori_left, y_ori_bottom, x_ori_right };
const int pos_table[4][2] = { {y_ori_top, x_ori_left}, {y_ori_top, x_ori_right}, {y_ori_bottom, x_ori_left}, {y_ori_bottom, x_ori_right} };
static std::vector<float> temp_buffer;
temp_buffer.resize(in_D);
......@@ -182,7 +182,7 @@ inline void bilinear_in_channels_simd256(const Tensor<int16_t> &data, const int3
using T = int16_t;
#endif
const int &x_ori_left = pos[0], &y_ori_top = pos[1], &x_ori_right = pos[2], &y_ori_bottom = pos[3];
const int pos_table[4][2] = { y_ori_top, x_ori_left, y_ori_top, x_ori_right, y_ori_bottom, x_ori_left, y_ori_bottom, x_ori_right };
const int pos_table[4][2] = { {y_ori_top, x_ori_left}, {y_ori_top, x_ori_right}, {y_ori_bottom, x_ori_left}, {y_ori_bottom, x_ori_right} };
static std::vector<int32_t> temp_buffer;
temp_buffer.resize(in_D);
......@@ -227,7 +227,7 @@ inline void bilinear_in_channels_simd512(const Tensor<float> &data, const float
int in_D = data.dims()[3];
assert(in_D % 16 == 0); // Should be used with mod16 data.
const int &x_ori_left = pos[0], &y_ori_top = pos[1], &x_ori_right = pos[2], &y_ori_bottom = pos[3];
const int pos_table[4][2] = { y_ori_top, x_ori_left, y_ori_top, x_ori_right, y_ori_bottom, x_ori_left, y_ori_bottom, x_ori_right };
const int pos_table[4][2] = { {y_ori_top, x_ori_left}, {y_ori_top, x_ori_right}, {y_ori_bottom, x_ori_left}, {y_ori_bottom, x_ori_right} };
static std::vector<float> temp_buffer;
temp_buffer.resize(in_D);
......@@ -269,7 +269,7 @@ inline void bilinear_in_channels_simd512(const Tensor<int16_t> &data, const int3
using T = int16_t;
#endif
const int &x_ori_left = pos[0], &y_ori_top = pos[1], &x_ori_right = pos[2], &y_ori_bottom = pos[3];
const int pos_table[4][2] = { y_ori_top, x_ori_left, y_ori_top, x_ori_right, y_ori_bottom, x_ori_left, y_ori_bottom, x_ori_right };
const int pos_table[4][2] = { {y_ori_top, x_ori_left}, {y_ori_top, x_ori_right}, {y_ori_bottom, x_ori_left}, {y_ori_bottom, x_ori_right }};
static std::vector<int32_t> temp_buffer;
temp_buffer.resize(in_D);
......
......@@ -74,7 +74,8 @@ struct OperationType
Resize = 24,
Compare = 25,
Where = 26,
OperationTypeCount = 27
Minimum = 27,
OperationTypeCount = 28
};
};
......
......@@ -40,7 +40,8 @@ namespace layers
enum class Compare_mode
{
LessThan,
GreaterThan
GreaterThan,
EqualTo
};
template<typename T> class Compare : public Layer<T>
{
......@@ -56,6 +57,7 @@ protected:
virtual bool loadInternal(std::istream &file, Version) override;
bool apply_less(std::vector<Tensor<T> *> &in);
bool apply_greater(std::vector<Tensor<T> *> &in);
bool apply_equal_to(std::vector<Tensor<T> *> &in);
Compare_mode m_mode;
DUMP_MODEL_EXT;
......@@ -70,6 +72,8 @@ template<typename T> bool Compare<T>::apply(std::vector<Tensor<T> *> &in)
return apply_less(in);
else if(m_mode == Compare_mode::GreaterThan)
return apply_greater(in);
else if(m_mode == Compare_mode::EqualTo)
return apply_equal_to(in);
else return false;
}
......@@ -151,6 +155,55 @@ template<typename T> bool Compare<T>::apply_greater(std::vector<Tensor<T> *> &in
return true;
}
template<typename T> bool Compare<T>::apply_equal_to(std::vector<Tensor<T> *> &in)
{
#if DEBUG_MODEL
if constexpr (std::is_same<T, float>::value)
{
static bool once = true;
if (once)
{
std::cout << "[WARNING] using equal layer with float: unexpected results can occur" << std::endl;
}
once = false;
}
#endif
const Tensor<T> &A = *in[0];
const Tensor<T> &B = *in[1];
const int &A_q = A.quantizer;
const int &B_q = B.quantizer;
const int A_shift = std::max(0, B_q - A_q);
const int B_shift = std::max(0, A_q - B_q);
m_out.quantizer = 0;// bool tensor
if(B.dims().size() == 1)
{
for (int i = 0; i < m_out.size(); i++)
{
T A_i = A[i];
T B_i = B[0];
ComputationType<T>::shift_left(A_i, A_shift);//quantization
ComputationType<T>::shift_left(B_i, B_shift);//quantization
T z = A_i == B_i;
COUNTERS(z);
m_out[i] = z;
}
}
else
{
for (int i = 0; i < m_out.size(); i++)
{
T A_i = A[i];
T B_i = B[i];
ComputationType<T>::shift_left(A_i, A_shift);//quantization
ComputationType<T>::shift_left(B_i, B_shift);//quantization
T z = A_i == B_i;
COUNTERS(z);
m_out[i] = z;
}
}
return true;
}
template<typename T> bool Compare<T>::init(const std::vector<Tensor<T> *> &in)
{
if (in.size() != 2)
......@@ -168,6 +221,8 @@ template<typename T> bool Compare<T>::loadInternal(std::istream &file, Version)
m_mode = Compare_mode::LessThan;
else if(x == (int32_t) Compare_mode::GreaterThan)
m_mode = Compare_mode::GreaterThan;
else if(x == (int32_t) Compare_mode::EqualTo)
m_mode = Compare_mode::EqualTo;
else
{
std::cerr << "[ERROR] invalid mode: " << x << std::endl;
......
......@@ -73,8 +73,10 @@ template<typename T> bool Concat<T>::apply(std::vector<Tensor<T> *> &in)
}
m_out.quantizer = qmin; // adapt output width to last input
m_out.border_skip = in[0]->border_skip;
for (int i = 1; i < nb_in; ++i)
m_out.border_skip = std::max(m_out.border_skip, in[i]->border_skip);
for (int i = 1; i < nb_in; ++i) {
m_out.border_skip.first = std::max(m_out.border_skip.first, in[i]->border_skip.first);
m_out.border_skip.second = std::max(m_out.border_skip.second, in[i]->border_skip.second);
}
const Dimensions dim = in[0]->dims();
if (dim.size() == 2)
......
......@@ -158,11 +158,11 @@ template<typename T> bool Conv2D<T>::apply(std::vector<Tensor<T> *> &in)
{
return apply_s<1, 1>(A, kernel);
}
else if (m_strides[1] == 1 && m_strides[2] == 2 && m_groups == 1)
else if (m_strides[1] == 1 && m_strides[2] == 2)
{
return apply_s<1, 2>(A, kernel);
}
else if ((m_strides[1] == 2 && m_strides[2] == 1) && m_groups == 1)
else if (m_strides[1] == 2 && m_strides[2] == 1)
{
return apply_s<2, 1>(A, kernel);
}
......@@ -209,11 +209,12 @@ template<typename T> template<int s_h, int s_w> bool Conv2D<T>::apply_s(const Te
{ // skip border
if (s_h == 1 && s_w == 1)
{
start_h += m_out.border_skip;
start_w += m_out.border_skip;
in_H -= m_out.border_skip;
in_W -= m_out.border_skip;
m_out.border_skip++;
start_h += m_out.border_skip.first;
start_w += m_out.border_skip.second;
in_H -= m_out.border_skip.first;
in_W -= m_out.border_skip.second;
m_out.border_skip.first++;
m_out.border_skip.second++;
}
}
conv2d_3x3_s_core_dispatch<s_h, s_w>(A, kernel);
......
......@@ -113,7 +113,7 @@ template<typename T> bool Expand<T>::init(const std::vector<Tensor<T> *> &in)
std::cerr << "[ERROR] quantizer on reshape dimensions data layer" << std::endl;
return false;
}
copy(in[1]->begin(), in[1]->end(), dim.begin());
for(int64_t i=0;i<in[1]->size();i++) dim[i]=(int)((*in[1])[i]);
// current restriction: broadcast only scalar to shape or expand last channel =1 of a tensor of dim 4
bool ok = false;
if (in[0]->size() == 1)
......
......@@ -55,6 +55,8 @@ protected:
virtual bool loadInternal(std::istream &file, Version v) override;
template<int NN> bool apply_dim2(std::vector<Tensor<T> *> &in);
template<int NN> bool apply_dim3(std::vector<Tensor<T> *> &in);
template<int NN> bool apply_dim4(std::vector<Tensor<T> *> &in);
#if __AVX2__
bool apply_dim2_simd8(std::vector<Tensor<T> *> &in) { return apply_dim2<8>(in); }
bool apply_dim2_simd16(std::vector<Tensor<T> *> &in) { return apply_dim2_simd8(in); }
......@@ -127,6 +129,9 @@ template<typename T> bool MatMul<T>::apply(std::vector<Tensor<T> *> &in)
case 3:
return apply_dim3<1>(in);
break;
case 4:
return apply_dim4<1>(in);
break;
default:
std::cerr << "Logical error MatMul::apply(std::vector<Tensor<T> *> &in)" << A.dims() << ' ' << B.dims() << std::endl;
return false;
......@@ -348,6 +353,48 @@ template<typename T> template<int NN> bool MatMul<T>::apply_dim3(std::vector<Ten
}
return true;
}
template<typename T> template<int NN> bool MatMul<T>::apply_dim4(std::vector<Tensor<T> *> &in)
{
const Tensor<T> &A{ *in[0] };
const Tensor<T> &B{ *in[1] };
int shift{ in[1]->quantizer + m_q };
const int last = A.dims().size() - 1;
const int N{ A.dims()[last - 2] };
const int H{ A.dims()[last - 1] };
const int R{ B.dims().back() };
const int W{ (A.dims()[last] / NN) * NN };
(void) W;
#if __AVX2__ && DEBUG_SIMD
std::cout << "\n[WARN] generic version matmul dim4 " << A.dims() << ' ' << B.dims() << "(H=" << H << ") " << (N * R * H * W) / 1000 << " kMAC" << std::endl;
#endif // SIMD
constexpr int idx_start{ 0 };
const int idx_end{ W };
for (int b = 0; b < N; ++b)
{
for (int i = 0; i < H; ++i)
{
for (int t = 0; t < R; ++t)
{
typename ComputationType<T>::type x = 0;
{
for (int j = idx_start; j < idx_end; ++j)
{
x += (typename ComputationType<T>::type) A(0, b, i, j) * B(0, b, j, t);
COUNTERS_MAC(B(0, b, j, t));
}
}
ComputationType<T>::quantize(x, shift);
COUNTERS(x);
SATURATE(x);
m_out(0, b, i, t) = (T) x;
}
}
}
return true;
}
#if SPARSE_SUPPORT
template<typename T> bool MatMul<T>::apply_sparse_matmul(std::vector<Tensor<T> *> &in)
......@@ -558,7 +605,7 @@ template<typename T> bool MatMul<T>::init(const std::vector<Tensor<T> *> &in)
// B: const (because assumed transposed)
// 1- A [x,y] B[y,z] || A [x,y,z] B[x,z,t] || A [1,x,y,z] B[1,x,z,t]
// 2- A [1,x,y] B[y,z] || A [1,x,y,z] B[x,z,t]
if (in[1]->dims().size() < 2 || in[1]->dims().size() > 3)
if (in[1]->dims().size() < 2 || in[1]->dims().size() > 4)
{
return false;
}
......
......@@ -72,35 +72,10 @@ template<typename T> bool MaxPool<T>::apply(std::vector<Tensor<T> *> &in)
const int offset_end = m_kernel[1] / 2;
const int offset_start = m_kernel[1] - 1 - offset_end;
const int step = m_strides[1];
const int in_H = in[0]->dims()[1];
// currently adhoc start
int start = 0;
if (step == 1)
{
start = 0;
}
else if (step == 2)
{
// if (in_H % 2 == 0)
// start = 1;
// else
start = 0;
}
else if (step == 3)
{
if (in_H % 2 == 0)
start = 0;
else
start = 1;
}
else
{
std::cerr << "[ERROR] to do" << std::endl;
assert(false);
exit(-1);
}
int start = offset_start;
m_out.quantizer = in[0]->quantizer; // adapt output width to bias
m_out.border_skip = in[0]->border_skip; // to check
......
/* The copyright in this software is being made available under the BSD
* License, included below. This software may be subject to other third party
* and contributor rights, including patent rights, and no such rights are
* granted under this license.
*
* Copyright (c) 2010-2024, ITU/ISO/IEC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "layer.h"
namespace sadl
{
namespace layers
{
template<typename T> class Minimum : public Layer<T>
{
public:
using Layer<T>::Layer;
using Layer<T>::m_out; // to avoid this->
using Layer<T>::m_initDone;
virtual bool apply(std::vector<Tensor<T> *> &in) override;
virtual bool init(const std::vector<Tensor<T> *> &in) override;
virtual bool mutateInput() const override { return true; }
protected:
virtual bool loadInternal(std::istream &file, Version) override;
};
template<typename T> bool Minimum<T>::apply(std::vector<Tensor<T> *> &in)
{
assert(in.size() == 2);
if (in[0] == in[1])
{
std::cerr << " input aliasing" << std::endl;
return false;
}
const int shift = -(in[1]->quantizer - in[0]->quantizer);
swap(*in[0], m_out);
/*
Looking at the initialization, if the condition
below is false, necessarily, `in[1]->dims().size()`
is equal to 1.
*/
if (in[0]->dims() == in[1]->dims())
{
for (auto it0 = m_out.begin(), it1 = in[1]->begin(); it0 != m_out.end(); ++it0, ++it1)
{
T z = *it1;
ComputationType<T>::shift_left(z, shift);
*it0 = std::min(*it0, z);
}
}
else
{
const Tensor<T> &B{ *in[1] };
if (B.size() == 1)
{
T value{ B[0] };
ComputationType<T>::shift_left(value, shift);
for (auto it0 = m_out.begin(); it0 != m_out.end(); ++it0)
{
*it0 = std::min(*it0, value);
}
}
else if (in[0]->dims().size() == 2)
{
const int N{ in[0]->dims()[0] };
const int H{ in[0]->dims()[1] };
for (int n = 0; n < N; ++n)
for (int i = 0; i < H; ++i)
{
T z = B[i];
ComputationType<T>::shift_left(z, shift);
m_out(n, i) = std::min(m_out(n, i), z);
}
}
else if (in[0]->dims().size() == 3)
{
const int N{ in[0]->dims()[0] };
const int H{ in[0]->dims()[1] };
const int W{ in[0]->dims()[2] };
for (int n = 0; n < N; ++n)
for (int i = 0; i < H; ++i)
for (int j = 0; j < W; ++j)
{
T z = B[j];
ComputationType<T>::shift_left(z, shift);
m_out(n, i, j) = std::min(m_out(n, i, j), z);
}
}
else if (in[0]->dims().size() == 4)
{
const int N{ in[0]->dims()[0] };
const int H{ in[0]->dims()[1] };
const int W{ in[0]->dims()[2] };
const int K{ in[0]->dims()[3] };
for (int n = 0; n < N; ++n)
for (int i = 0; i < H; ++i)
for (int j = 0; j < W; ++j)
for (int k = 0; k < K; ++k)
{
T z = B[k];
ComputationType<T>::shift_left(z, shift);
m_out(n, i, j, k) = std::min(m_out(n, i, j, k), z);
}
}
}
return true;
}
template<typename T> bool Minimum<T>::init(const std::vector<Tensor<T> *> &in)
{
SADL_DBG(std::cout << " - " << in[0]->dims() << ' ' << in[1]->dims() << std::endl);
if (in.size() != 2)
{
return false;
}
/*
Broadcasting is supported. This means that either
the two input Tensor<T>s have the same shape or the
second input Tensor<T> is a singleton or the second
input Tensor<T> is a vector and the last dimension
of the first input Tensor<T> is equal to the size
of the second input Tensor<T>.
*/
if (in[1]->size() == 1)
{ // singleton
// ok
}
else if (in[1]->dims().size() == 1 || (in[1]->dims().size() == 2 && in[1]->dims()[0] == 1))
{
if (in[1]->size() != in[0]->dims().back())
{ // broadcast last tdim
return false;
}
}
else
{
if (!(in[0]->dims() == in[1]->dims()))
{ // same sim
return false;
}
}
m_out.resize(in[0]->dims());
m_initDone = true;
return true;
}
template<typename T> bool Minimum<T>::loadInternal(std::istream &, Version) { return true; }
} // namespace layers
} // namespace sadl
......@@ -66,7 +66,7 @@ template<typename T> bool Placeholder<T>::apply(std::vector<Tensor<T> *> &in)
{ // v2
m_out.quantizer = m_q;
}
m_out.border_skip = 0;
m_out.border_skip = {0,0};
return true;
}
......
......@@ -193,7 +193,6 @@ template<typename T> template<bool multialpha> bool PReLU<T>::apply_scalar(std::
#if __AVX2__
template<> template<bool multialpha> inline bool PReLU<float>::apply_simd256(std::vector<Tensor<float> *> &in) // simd256 float
{
exit(-1); // to correct
Tensor<float> &A = *in[1];
swap(*in[0], m_out);
float *const data_ptr = m_out.data();
......@@ -230,8 +229,8 @@ template<> template<bool multialpha> inline bool PReLU<int16_t>::apply_simd256(s
const __m256i max = _mm256_set1_epi32(32767);
const __m256i min = _mm256_set1_epi32(-32768);
const __m256i zeros = _mm256_setzero_si256();
const int N = m_out.size();
for (int iter = 0; iter < N; iter += 16)
const auto N = m_out.size();
for (int64_t iter = 0; iter < N; iter += 16)
{
int16_t *aptr = data_ptr + iter;
auto a = _mm256_load_si256((__m256i *) aptr); // load
......@@ -294,7 +293,7 @@ template<> template<bool multialpha> inline bool PReLU<float>::apply_simd512(std
const float *const alpha_ptr = A.data();
const __m512 m_zeros = _mm512_setzero_ps();
__m512 alpha = _mm512_set1_ps(*A.data());
for (int iter = 0; iter < m_out.size(); iter += 16)
for (int64_t iter = 0; iter < m_out.size(); iter += 16)
{
if (multialpha)
alpha = _mm512_load_ps(alpha_ptr + iter % A.size());
......@@ -329,9 +328,9 @@ template<> template<bool multialpha> inline bool PReLU<int16_t>::apply_simd512(s
static constexpr int16_t data[]={0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62};
const auto shuffle= _mm512_loadu_si512((void *)data);
const int N = m_out.size();
const auto N = m_out.size();
for (int iter = 0; iter < N; iter += 32)
for (int64_t iter = 0; iter < N; iter += 32)
{
int16_t *aptr = data_ptr + iter;
auto a = _mm512_loadu_si512((__m512i *) aptr); // load
......
......@@ -85,7 +85,7 @@ template<typename T> bool Resize<T>::loadInternal(std::istream &file, Version v)
int32_t x = 0;
file.read((char *) &x, sizeof(x));
m_input_label = x;
SADL_DBG(std::cout << " - input_lable: " << m_input_label << std::endl);
SADL_DBG(std::cout << " - input_label: " << m_input_label << std::endl);
file.read((char *) &x, sizeof(x));
m_coordinate_transformation_mode = x;
SADL_DBG(std::cout << " - coordinate_transformation_mode: " << m_coordinate_transformation_mode << std::endl);
......@@ -132,24 +132,28 @@ template<typename T> bool Resize<T>::init(const std::vector<Tensor<T> *> &in)
int W_in = in[0]->dims()[2];
int C = in[0]->dims()[3];
// scale factor
float scale_N = 0, scale_C = 0, scale_H = 0, scale_W = 0;
int scale_N = 0, scale_C = 0, scale_H = 0, scale_W = 0;
if (m_input_label == 1) // inputs are X and sizes
{
scale_N = in[1]->data()[0] / (float)N;
scale_C = in[1]->data()[1] / (float)C;
scale_H = in[1]->data()[2] / (float)H_in;
scale_W = in[1]->data()[3] / (float)W_in;
scale_N = (int)round(in[1]->data()[0] / (float)N);
scale_C = (int)round(in[1]->data()[1] / (float)C);
scale_H = (int)round(in[1]->data()[2] / (float)H_in);
scale_W = (int)round(in[1]->data()[3] / (float)W_in);
}
else if (m_input_label == 2) // inputs are X and scales
{
scale_N = in[1]->data()[0];
scale_C = in[1]->data()[1];
scale_H = in[1]->data()[2];
scale_W = in[1]->data()[3];
scale_N = (int)round(in[1]->data()[0]);
scale_C = (int)round(in[1]->data()[1]);
scale_H = (int)round(in[1]->data()[2]);
scale_W = (int)round(in[1]->data()[3]);
} else {
std::cerr << "[ERROR] invalid type " << m_input_label<< std::endl;
return false;
}
if (scale_N != 1 || scale_H != 2 || scale_W != 2 || scale_C != 1)
{
std::cerr << "[ERROR] invalid scale factor: (" << scale_N << ", " << scale_H << ", " << scale_W << ", " << scale_C << ")" << std::endl;
std::cerr << "[ERROR] invalid scale factor: input: "<<in[0]->dims()<<" scales: "<<*in[1]<<" result=(" << scale_N << ", " << scale_H << ", " << scale_W << ", " << scale_C << ")" << std::endl;
return false;
}
scale_factors.resize(in[1]->dims());
......
......@@ -72,9 +72,9 @@ template<typename T> bool ScatterND<T>::apply(std::vector<Tensor<T> *> &in)
{
for (int c = 0; c < dim_C; c++)
{
index_H = indices(h, w, c, 1);
index_W = indices(h, w, c, 2);
index_C = indices(h, w, c, 3);
index_H = (int)indices(h, w, c, 1);
index_W = (int)indices(h, w, c, 2);
index_C = (int)indices(h, w, c, 3);
m_out(0, index_H, index_W, index_C) = updates(0, h, w, c); // n==1
}
}
......
......@@ -67,7 +67,7 @@ template<typename T> bool Shape<T>::init(const std::vector<Tensor<T> *> &in)
d.resize(1);
d[0] = in[0]->dims().size();
m_out.resize(d);
copy(in[0]->dims().begin(), in[0]->dims().end(), m_out.begin());
for(int i=0;i<in[0]->dims().size();i++) m_out[i]=(T)(in[0]->dims()[i]);
m_initDone = true;
return true;
}
......
......@@ -46,6 +46,7 @@ public:
virtual bool apply(std::vector<Tensor<T> *> &in) override;
virtual bool init(const std::vector<Tensor<T> *> &in) override;
virtual bool mutateInput() const override { return true; }
protected:
virtual bool loadInternal(std::istream &file, Version) override;
......@@ -55,22 +56,44 @@ protected:
template<typename T> bool Where<T>::apply(std::vector<Tensor<T> *> &in)
{
assert(in.size() == 3);
assert(in[0]->dims() == m_out.dims());
assert(in[0]->dims() == in[1]->dims() || (in[1]->dims().size() == 1 && in[1]->dims()[0] == 1));
assert(in[0]->dims() == in[2]->dims() || (in[2]->dims().size() == 1 && in[2]->dims()[0] == 1));
if (in[0]->size() != 1)
{
assert(in[0]->dims() == m_out.dims());
assert(in[0]->dims() == in[1]->dims() || (in[1]->dims().size() == 1 && in[1]->dims()[0] == 1));
assert(in[0]->dims() == in[2]->dims() || (in[2]->dims().size() == 1 && in[2]->dims()[0] == 1));
}
else
{
assert(in[1]->dims() == m_out.dims());
assert(in[1]->dims() == in[2]->dims());
}
const Tensor<T> &condition = *in[0];
const Tensor<T> &A = *in[1];
const Tensor<T> &B = *in[2];
m_out.quantizer = A.quantizer > B.quantizer ? A.quantizer : B.quantizer;
for (int i = 0; i < m_out.size(); i++)
if (condition.size() == 1)
{
if (condition[0])
{
swap(*in[1], m_out);
}
else
{
swap(*in[2], m_out);
}
}
else
{
const T A_i = (A.dims().size() == 1) ? A[0] : A[i];
const T B_i = (B.dims().size() == 1) ? B[0] : B[i];
typename ComputationType<T>::type z = condition[i] ? A_i : B_i;
const int z_q = condition[i] ? A.quantizer : B.quantizer ;
ComputationType<T>::shift_left(z, m_out.quantizer - z_q);
COUNTERS(z);
m_out[i] = z;
const Tensor<T> &A = *in[1];
const Tensor<T> &B = *in[2];
m_out.quantizer = A.quantizer > B.quantizer ? A.quantizer : B.quantizer;
for (int i = 0; i < m_out.size(); i++)
{
const T A_i = (A.dims().size() == 1) ? A[0] : A[i];
const T B_i = (B.dims().size() == 1) ? B[0] : B[i];
typename ComputationType<T>::type z = condition[i] ? A_i : B_i;
const int z_q = condition[i] ? A.quantizer : B.quantizer ;
ComputationType<T>::shift_left(z, m_out.quantizer - z_q);
COUNTERS(z);
m_out[i] = z;
}
}
return true;
}
......@@ -80,7 +103,10 @@ template<typename T> bool Where<T>::init(const std::vector<Tensor<T> *> &in)
{
if (in.size() != 3)
return false;
m_out.resize(in[0]->dims());//condition dims
if (in[0]->size() == 1)//condition dims
m_out.resize(in[1]->dims());
else
m_out.resize(in[0]->dims());
m_initDone = true;
return true;
}
......
......@@ -59,6 +59,7 @@
#include "layer_resize.h"
#include "layer_compare.h"
#include "layer_where.h"
#include "layer_minimum.h"
namespace sadl
{
......@@ -99,6 +100,8 @@ inline std::string opName(const OperationType::Type op)
DIRTYCASEPRINT(GridSample);
DIRTYCASEPRINT(Resize);
DIRTYCASEPRINT(Compare);
DIRTYCASEPRINT(Minimum);
DIRTYCASEPRINT(Where);
default:
oss << "??";
break;
......
......@@ -57,7 +57,7 @@ private:
std::vector<LayerData> m_data;
int32_t m_nb_inputs = 0;
static constexpr int kMaxInputByLayer = 2;
static constexpr int kMaxLayers = 2048;
static constexpr int kMaxLayers = 8192;
std::vector<typename layers::Layer<T>::Id> getLayerIdsWithInput(typename layers::Layer<T>::Id id) const;
void insertCopyLayers();
void reshapeConv2DFilters();
......@@ -75,7 +75,7 @@ public:
// aditionnal info
std::vector<Tensor<T>> getInputsTemplate() const;
const std::vector<typename layers::Layer<T>::Id> &getIdsOutput() const { return m_ids_output; }
int nbOutputs() const { return m_ids_output.size(); }
size_t nbOutputs() const { return m_ids_output.size(); }
std::vector<typename layers::Layer<T>::Id> getLayersId() const;
const LayerData & getLayer(const typename layers::Layer<T>::Id &id) const;
LayerData & getLayer(const typename layers::Layer<T>::Id &id);
......@@ -185,6 +185,9 @@ template<typename T> std::unique_ptr<layers::Layer<T>> createLayer(int32_t id, l
case layers::OperationType::Where:
return std::unique_ptr<layers::Layer<T>>(new layers::Where<T>{ id, op });
break;
case layers::OperationType::Minimum:
return std::unique_ptr<layers::Layer<T>>(new layers::Minimum<T>{ id, op });
break;
case layers::OperationType::OperationTypeCount:
break; // no default on purpose
}
......@@ -246,7 +249,7 @@ template<typename T> bool Model<T>::load(std::istream &file)
if ((std::is_same<T, float>::value && x != layers::TensorInternalType::Float) || (std::is_same<T, int32_t>::value && x != layers::TensorInternalType::Int32)
|| (std::is_same<T, int16_t>::value && x != layers::TensorInternalType::Int16))
{
std::cerr << "[ERROR] wrong model type and Model<T>" << std::endl;
std::cerr << "[ERROR] wrong model type and Model<T> " << std::endl;
return false;
}
SADL_DBG(std::cout << "[INFO] Model type: " << (int) x << std::endl);
......@@ -622,10 +625,13 @@ template<typename T> typename Model<T>::Stat Model<T>::printOverflow(bool printi
<< "]: overflow: " << m_data[layer_cnt].layer->cpt_overflow << '/' << m_data[layer_cnt].layer->cpt_op << " ("
<< m_data[layer_cnt].layer->cpt_overflow * 100. / m_data[layer_cnt].layer->cpt_op << "%)" << std::endl;
}
else if (printinfo && m_data[layer_cnt].layer->cpt_op > 0)
{
std::cout << "[INFO] layer " << m_data[layer_cnt].layer->id() << ' ' << m_data[layer_cnt].layer->name() << " [" << opName(m_data[layer_cnt].layer->op())
<< "]: " << m_data[layer_cnt].layer->cpt_op << " op" << std::endl;
else {
if (printinfo && (m_data[layer_cnt].layer->cpt_op > 0 || m_data[layer_cnt].layer->cpt_mac > 0 ) )
{
std::cout << "[INFO] layer " << m_data[layer_cnt].layer->id() << ' ' << m_data[layer_cnt].layer->name() << " [" << opName(m_data[layer_cnt].layer->op()) << "]: "
<< m_data[layer_cnt].layer->cpt_mac << " mac, "
<< m_data[layer_cnt].layer->cpt_op << " op" << std::endl;
}
}
}
#if DEBUG_COUNTERS && __AVX2__
......
......@@ -58,6 +58,7 @@ static constexpr float kSparsifySizeThreshold = 1000.0f;
// #define DEBUG_PRINT 1 // print model info
// #define DEBUG_SIMD 1 // tell about non simd version
// #define DEBUG_KEEP_OUTPUT 1 // keep a copy of the output tensor
// #define DEBUG_OVERFLOW 1 // set all accumulator to int16 to detect overflow in accumulator as well
#if SATURATE_RESULT
#define SATURATE(X) \
if (!std::is_same<T, float>::value) \
......