mirror of
https://github.com/zebrajr/pytorch.git
synced 2026-01-15 12:15:51 +00:00
540 lines
19 KiB
C++
540 lines
19 KiB
C++
|
|
#include <fmt/format.h>
|
||
|
|
#include <gtest/gtest.h>
|
||
|
|
#include <torch/torch.h>
|
||
|
|
#include <random>
|
||
|
|
#include "test/cpp/nativert/static_kernel_test_utils.h" // @manual
|
||
|
|
|
||
|
|
namespace torch::nativert {
|
||
|
|
|
||
|
|
namespace {
|
||
|
|
std::vector<c10::IValue> generateArgsForQuantizedEmbeddingBag() {
|
||
|
|
// Set seed for reproducibility
|
||
|
|
std::random_device rd;
|
||
|
|
std::mt19937 gen(rd());
|
||
|
|
std::uniform_int_distribution<int> int_dis(0, 15); // num_embeddings - 1
|
||
|
|
int num_embeddings = 16;
|
||
|
|
int embedding_dim = 32;
|
||
|
|
int num_lengths = 10;
|
||
|
|
|
||
|
|
auto weight =
|
||
|
|
at::randint(0, 255, {num_embeddings, embedding_dim}).to(at::kByte);
|
||
|
|
|
||
|
|
// Generate random lengths
|
||
|
|
std::vector<int> np_lengths(num_lengths);
|
||
|
|
for (auto& length : np_lengths) {
|
||
|
|
length = int_dis(gen);
|
||
|
|
}
|
||
|
|
int total_length = 0;
|
||
|
|
for (const auto& length : np_lengths) {
|
||
|
|
total_length += length;
|
||
|
|
}
|
||
|
|
// Generate random indices
|
||
|
|
at::Tensor indices =
|
||
|
|
torch::empty({total_length}, torch::dtype(torch::kInt32));
|
||
|
|
auto indices_accessor = indices.accessor<int, 1>();
|
||
|
|
for (int i = 0; i < total_length; ++i) {
|
||
|
|
indices_accessor[i] = int_dis(gen);
|
||
|
|
}
|
||
|
|
// Create lengths tensor
|
||
|
|
at::Tensor lengths = torch::from_blob(
|
||
|
|
np_lengths.data(), {num_lengths}, torch::dtype(torch::kInt32));
|
||
|
|
// Calculate offsets
|
||
|
|
at::Tensor offsets = torch::cat(
|
||
|
|
{torch::zeros({1}, torch::dtype(torch::kInt32)),
|
||
|
|
torch::cumsum(lengths, 0)});
|
||
|
|
offsets = offsets.to(torch::dtype(torch::kInt32));
|
||
|
|
|
||
|
|
at::Tensor per_sample_weights = at::randn(indices.sizes());
|
||
|
|
|
||
|
|
std::vector<c10::IValue> args{weight, indices, offsets, per_sample_weights};
|
||
|
|
return args;
|
||
|
|
}
|
||
|
|
|
||
|
|
std::vector<c10::IValue> generateArgsForEmbeddingBag(bool include_padding_idx) {
|
||
|
|
torch::Tensor weight = torch::randn({10, 3}, torch::dtype(torch::kFloat32));
|
||
|
|
torch::Tensor indices =
|
||
|
|
torch::randint(0, 10, {20}, torch::dtype(torch::kInt64));
|
||
|
|
torch::Tensor offsets =
|
||
|
|
torch::tensor({0, 5, 10, 15, 20}, torch::dtype(torch::kInt64));
|
||
|
|
torch::Tensor per_sample_weights =
|
||
|
|
torch::rand({20}, torch::dtype(torch::kFloat32));
|
||
|
|
// Define the padding_idx
|
||
|
|
int64_t padding_idx = 1;
|
||
|
|
// Create a vector of IValues to store the arguments
|
||
|
|
std::vector<c10::IValue> args;
|
||
|
|
args.emplace_back(weight);
|
||
|
|
args.emplace_back(indices);
|
||
|
|
args.emplace_back(offsets);
|
||
|
|
args.emplace_back(per_sample_weights);
|
||
|
|
if (include_padding_idx) {
|
||
|
|
args.emplace_back(padding_idx);
|
||
|
|
}
|
||
|
|
return args;
|
||
|
|
}
|
||
|
|
} // namespace
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, QuantizedEmbeddingBagByteRowwiseOffsets) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
|
||
|
|
%out = torch.ops.quantized.embedding_bag_byte_rowwise_offsets.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, pruned_weights=false, per_sample_weights=%per_sample_weights, compressed_indices_mapping=None, include_last_offset=true)
|
||
|
|
%res = torch.ops.aten.clone.default(self=%out, memory_format=None)
|
||
|
|
return (%res)
|
||
|
|
)";
|
||
|
|
|
||
|
|
std::vector<c10::IValue> args = generateArgsForQuantizedEmbeddingBag();
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, args);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, QuantizedEmbeddingBag4BitRowwiseOffsets) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
|
||
|
|
%out = torch.ops.quantized.embedding_bag_4bit_rowwise_offsets.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, pruned_weights=false, per_sample_weights=%per_sample_weights, compressed_indices_mapping=None, include_last_offset=true)
|
||
|
|
%res = torch.ops.aten.clone.default(self=%out, memory_format=None)
|
||
|
|
return (%res)
|
||
|
|
)";
|
||
|
|
std::vector<c10::IValue> args = generateArgsForQuantizedEmbeddingBag();
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, args);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, EmbeddingBag) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
|
||
|
|
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true)
|
||
|
|
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
|
||
|
|
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
|
||
|
|
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
|
||
|
|
%res4 = torch.ops.aten.clone.default(self=%out3, memory_format=None)
|
||
|
|
return (%res1, %res2, %res3, %res4)
|
||
|
|
)";
|
||
|
|
std::vector<c10::IValue> args = generateArgsForEmbeddingBag(false);
|
||
|
|
testStaticKernelEquality(graph, args);
|
||
|
|
|
||
|
|
// Test use_max_indices False
|
||
|
|
const std::string graph2 =
|
||
|
|
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
|
||
|
|
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true)
|
||
|
|
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
|
||
|
|
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
|
||
|
|
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
|
||
|
|
return (%res1, %res2, %res3, %out2)
|
||
|
|
)";
|
||
|
|
std::vector<c10::IValue> args2 = generateArgsForEmbeddingBag(false);
|
||
|
|
testStaticKernelEquality(graph2, args2);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, EmbeddingBagPaddingIdx) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%weight, %indices, %offsets, %per_sample_weights, %padding_idx):
|
||
|
|
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.padding_idx(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true, padding_idx=%padding_idx)
|
||
|
|
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
|
||
|
|
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
|
||
|
|
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
|
||
|
|
%res4 = torch.ops.aten.clone.default(self=%out3, memory_format=None)
|
||
|
|
return (%res1, %res2, %res3, %res4)
|
||
|
|
)";
|
||
|
|
std::vector<c10::IValue> args = generateArgsForEmbeddingBag(true);
|
||
|
|
testStaticKernelEquality(graph, args);
|
||
|
|
|
||
|
|
// Test use_max_indices False
|
||
|
|
const std::string graph2 =
|
||
|
|
R"(graph(%weight, %indices, %offsets, %per_sample_weights, %padding_idx):
|
||
|
|
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.padding_idx(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true, padding_idx=%padding_idx)
|
||
|
|
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
|
||
|
|
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
|
||
|
|
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
|
||
|
|
return (%res1, %res2, %res3, %out2)
|
||
|
|
)";
|
||
|
|
std::vector<c10::IValue> args2 = generateArgsForEmbeddingBag(true);
|
||
|
|
testStaticKernelEquality(graph2, args2);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, Aten_ToCopy) {
|
||
|
|
for (auto& target_dtype :
|
||
|
|
{"None",
|
||
|
|
"ScalarType::FLOAT",
|
||
|
|
"ScalarType::DOUBLE",
|
||
|
|
"ScalarType::HALF",
|
||
|
|
"ScalarType::INT",
|
||
|
|
"ScalarType::LONG"}) {
|
||
|
|
for (auto& target_memory_format : {
|
||
|
|
"None",
|
||
|
|
"MemoryFormat::PreserveFormat",
|
||
|
|
"MemoryFormat::ContiguousFormat",
|
||
|
|
}) {
|
||
|
|
for (auto& input_dtype :
|
||
|
|
{at::kLong, at::kInt, at::kFloat, at::kDouble, at::kHalf}) {
|
||
|
|
for (auto& permute_input : {true, false}) {
|
||
|
|
const std::string graph = fmt::format(
|
||
|
|
R"(graph(%input):
|
||
|
|
%out = torch.ops.aten._to_copy.default(self=%input, dtype={}, memory_format={})
|
||
|
|
return (%out)
|
||
|
|
)",
|
||
|
|
target_dtype,
|
||
|
|
target_memory_format);
|
||
|
|
at::Tensor input =
|
||
|
|
at::randint(0, 128, {8, 8, 8, 8}, at::kLong).to(input_dtype);
|
||
|
|
if (permute_input) {
|
||
|
|
input = input.permute({1, 0, 3, 2});
|
||
|
|
}
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, {input});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, Aten_ToCopy_Aliasing) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%input):
|
||
|
|
%out = torch.ops.aten._to_copy.default(self=%input, dtype=ScalarType::FLOAT, memory_format=None)
|
||
|
|
return (%out))";
|
||
|
|
|
||
|
|
at::Tensor input =
|
||
|
|
at::randint(0, 128, {8, 8, 8, 8}, at::kLong).to(at::kFloat);
|
||
|
|
|
||
|
|
torch::nativert::ExecutorConfig config;
|
||
|
|
config.enableStaticCPUKernels = true;
|
||
|
|
SimpleTestModelRunner runner(graph, config);
|
||
|
|
|
||
|
|
// try standard aliasing case
|
||
|
|
auto output = runner.run({input});
|
||
|
|
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
|
||
|
|
EXPECT_EQ(output[0].toTensor().dim(), 4);
|
||
|
|
EXPECT_EQ(output[0].toTensor().numel(), 8 * 8 * 8 * 8);
|
||
|
|
output = runner.run({input});
|
||
|
|
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
|
||
|
|
EXPECT_EQ(output[0].toTensor().dim(), 4);
|
||
|
|
EXPECT_EQ(output[0].toTensor().numel(), 8 * 8 * 8 * 8);
|
||
|
|
|
||
|
|
// try swap out input storage between runs
|
||
|
|
at::Storage original_storage = input.storage();
|
||
|
|
input.unsafeGetTensorImpl()->set_storage_keep_dtype(
|
||
|
|
at::randint(0, 128, {8, 8, 8, 8}, at::kLong).to(at::kFloat).storage());
|
||
|
|
output = runner.run({input});
|
||
|
|
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
|
||
|
|
EXPECT_FALSE(output[0].toTensor().storage().is_alias_of(original_storage));
|
||
|
|
EXPECT_EQ(output[0].toTensor().dim(), 4);
|
||
|
|
EXPECT_EQ(output[0].toTensor().numel(), 8 * 8 * 8 * 8);
|
||
|
|
|
||
|
|
// try to upsize between runs
|
||
|
|
input.resize_({16, 16, 16, 16, 16});
|
||
|
|
output = runner.run({input});
|
||
|
|
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
|
||
|
|
EXPECT_EQ(output[0].toTensor().dim(), 5);
|
||
|
|
EXPECT_EQ(output[0].toTensor().numel(), 16 * 16 * 16 * 16 * 16);
|
||
|
|
|
||
|
|
// try to downsize between runs
|
||
|
|
input.resize_({4});
|
||
|
|
output = runner.run({input});
|
||
|
|
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
|
||
|
|
EXPECT_EQ(output[0].toTensor().dim(), 1);
|
||
|
|
EXPECT_EQ(output[0].toTensor().numel(), 4);
|
||
|
|
|
||
|
|
// try to restride between runs
|
||
|
|
input.as_strided_({3, 2}, {3, 6}).random_();
|
||
|
|
output = runner.run({input});
|
||
|
|
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
|
||
|
|
EXPECT_EQ(output[0].toTensor().dim(), 2);
|
||
|
|
EXPECT_EQ(output[0].toTensor().numel(), 3 * 2);
|
||
|
|
for (int i = 0; i < 3; i += 1) {
|
||
|
|
for (int j = 0; j < 2; j += 1) {
|
||
|
|
EXPECT_EQ(
|
||
|
|
output[0].toTensor().index({i, j}).item().toFloat(),
|
||
|
|
input.index({i, j}).item().toFloat());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, MulScalar) {
|
||
|
|
const std::string graph = R"(graph(%in0_t, %in1_t):
|
||
|
|
%out = torch.ops.aten.mul.Scalar(self=%in0_t, other=%in1_t)
|
||
|
|
return (%out)
|
||
|
|
)";
|
||
|
|
|
||
|
|
std::vector<std::pair<at::Tensor, std::vector<double>>> test_cases = {
|
||
|
|
{at::rand({3, 4}), {2.0, -2.0, -2, 2, 0.0, 1e6, 1e-6, NAN, INFINITY}},
|
||
|
|
{at::rand({2, 3, 4}), {2.0}},
|
||
|
|
{at::rand({3, 4}, at::kFloat), {3.0}}, // fp32 tensor with int scalar
|
||
|
|
{at::randint(0, 10, {3, 4}, at::kInt),
|
||
|
|
{2.0}}, // int32 tensor with double scalar
|
||
|
|
{at::rand({3, 4}, at::kHalf), {2.0}}, // half tensor with float scalar
|
||
|
|
{at::rand({3, 4}, at::kBFloat16), {2.0}}, // bf16 tensor with float scalar
|
||
|
|
{at::randint(0, 10, {3, 4}, at::kInt), {2}}, // int tensor with int scalar
|
||
|
|
{at::randint(0, 10, {3, 4}, at::kLong),
|
||
|
|
{2}}, // int64 tensor with int64 scalar,
|
||
|
|
{at::rand({3, 4, 5}, at::kFloat).permute({2, 0, 1}),
|
||
|
|
{2}}, // int64 strided tensor with int64 scalar
|
||
|
|
{at::rand({3, 4}, at::kFloat).t(),
|
||
|
|
{2}}, // int64 strided tensor with int64 scalar
|
||
|
|
{at::rand({3, 4, 5}, at::kFloat).permute({2, 0, 1}),
|
||
|
|
{2}}, // int64 strided tensor with int64 scalar
|
||
|
|
{at::rand({3, 4}, at::kFloat).t(),
|
||
|
|
{2}}, // int64 strided tensor with int64 scalar
|
||
|
|
};
|
||
|
|
|
||
|
|
for (const auto& [tensor, scalars] : test_cases) {
|
||
|
|
for (double scalar : scalars) {
|
||
|
|
std::vector<c10::IValue> inputs = {tensor, scalar};
|
||
|
|
testStaticKernelEquality(graph, inputs);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, SymSizeInt) {
|
||
|
|
const std::string graph = R"(graph(%self, %dim):
|
||
|
|
%out = torch.ops.aten.sym_size.int(self=%self, dim=%dim)
|
||
|
|
return (%out)
|
||
|
|
)";
|
||
|
|
|
||
|
|
// Define test cases with different tensors
|
||
|
|
std::vector<at::Tensor> test_cases = {
|
||
|
|
at::rand({3, 4, 5}), // standard 3D tensor
|
||
|
|
at::rand({0, 4, 5}), // empty tensor
|
||
|
|
at::rand({1}), // single-element tensor
|
||
|
|
at::rand({2, 3, 4, 5, 6}), // high-dimensional tensor
|
||
|
|
at::rand({3, 1, 5}) // tensor with one dimension as 1
|
||
|
|
};
|
||
|
|
|
||
|
|
// Iterate over each test case
|
||
|
|
for (const auto& tensor : test_cases) {
|
||
|
|
for (int64_t dim = 0; dim < tensor.dim(); ++dim) {
|
||
|
|
std::vector<c10::IValue> inputs = {tensor, dim};
|
||
|
|
testStaticKernelEquality(graph, inputs);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, BucketizeTensor) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%input, %boundaries, %out_int32, %right):
|
||
|
|
%out = torch.ops.aten.bucketize.Tensor(self=%input, boundaries=%boundaries, out_int32=%out_int32, right=%right)
|
||
|
|
return (%out)
|
||
|
|
)";
|
||
|
|
|
||
|
|
std::vector<std::pair<bool, bool>> test_cases = {
|
||
|
|
{false, false}, {true, false}, {false, true}, {true, true}};
|
||
|
|
|
||
|
|
for (const auto& [out_int32, right] : test_cases) {
|
||
|
|
at::Tensor input = at::tensor({0.1, 2.5, 3.0, 4.5, 5.0}, at::kFloat);
|
||
|
|
at::Tensor boundaries = at::tensor({1.0, 2.0, 3.0, 4.0}, at::kFloat);
|
||
|
|
|
||
|
|
std::vector<c10::IValue> args = {input, boundaries, out_int32, right};
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, args);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, SliceScatter) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self, %src, %dim, %start, %end, %step):
|
||
|
|
%out = torch.ops.aten.slice_scatter.default(self=%self, src=%src, dim=%dim, start=%start, end=%end, step=%step)
|
||
|
|
return (%out)
|
||
|
|
)";
|
||
|
|
|
||
|
|
// Create input tensors
|
||
|
|
at::Tensor self = at::rand({5, 5}, at::kFloat);
|
||
|
|
at::Tensor src = at::rand({2, 5}, at::kFloat);
|
||
|
|
int64_t dim = 0;
|
||
|
|
int64_t start = 1;
|
||
|
|
int64_t end = 3;
|
||
|
|
int64_t step = 1;
|
||
|
|
|
||
|
|
// Create a vector of IValues to pass as inputs
|
||
|
|
std::vector<c10::IValue> inputs = {self, src, dim, start, end, step};
|
||
|
|
|
||
|
|
// Run the kernel and verify the output
|
||
|
|
testStaticKernelEquality(graph, inputs);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, QuantizedEmbeddingBagBytePrepack) {
|
||
|
|
const std::string graph = R"(
|
||
|
|
graph(%input):
|
||
|
|
%weight = torch.ops.quantized.embedding_bag_byte_prepack.default(weight=%input)
|
||
|
|
%res = torch.ops.aten.clone.default(self=%weight, memory_format=None)
|
||
|
|
return (%res)
|
||
|
|
)";
|
||
|
|
|
||
|
|
at::Tensor args1 = torch::randn({8, 16}, at::ScalarType::Float);
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, {args1});
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, QuantizedEmbeddingBagByteUnpack) {
|
||
|
|
const std::string graph = R"(
|
||
|
|
graph(%input):
|
||
|
|
%weight = torch.ops.quantized.embedding_bag_byte_prepack.default(weight=%input)
|
||
|
|
%output = torch.ops.quantized.embedding_bag_byte_unpack.default(weight=%weight)
|
||
|
|
%res = torch.ops.aten.clone.default(self=%output, memory_format=None)
|
||
|
|
return (%res)
|
||
|
|
)";
|
||
|
|
|
||
|
|
at::Tensor args1 = torch::randn({8, 16}, at::ScalarType::Float);
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, {args1});
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, QuantizedLinear) {
|
||
|
|
const std::string graph = R"(
|
||
|
|
graph(%input, %weights):
|
||
|
|
%packed_params = torch.ops.quantized.linear_prepack.default(W=%weights, B=None)
|
||
|
|
%1254 = torch.ops.quantized.linear.default(X=%input, W_prepack=%packed_params, Y_scale_i=1.0, Y_zero_point_i=1)
|
||
|
|
%res = torch.ops.aten.dequantize.self(self=%1254)
|
||
|
|
return (%res)
|
||
|
|
)";
|
||
|
|
|
||
|
|
at::Tensor input =
|
||
|
|
at::quantize_per_tensor(torch::randn({3, 2}), 2, 3, torch::kQUInt8);
|
||
|
|
at::Tensor weight =
|
||
|
|
at::quantize_per_tensor(torch::randn({3, 2}), 2, 3, torch::kQInt8);
|
||
|
|
|
||
|
|
testStaticKernelEquality(graph, {input, weight});
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, View) {
|
||
|
|
const std::string source =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.view.default(self=%self, size=[36])
|
||
|
|
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
|
||
|
|
return (%cloned)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({6, 6});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(source, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Permute) {
|
||
|
|
const std::string source =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.permute.default(self=%self, dims=[1, 0])
|
||
|
|
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
|
||
|
|
return (%cloned)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({2, 3});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(source, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Reshape) {
|
||
|
|
const std::string source =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.reshape.default(self=%self, shape=[9, 4])
|
||
|
|
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
|
||
|
|
return (%cloned)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({3, 3, 4});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(source, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Select) {
|
||
|
|
static constexpr std::string_view source =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.select.int(self=%self, dim=1, index=0)
|
||
|
|
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
|
||
|
|
return (%cloned)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({3, 3, 3});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(source, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Slice) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.slice.Tensor(self=%self, dim=0, start=1, end=3, step=1)
|
||
|
|
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
|
||
|
|
return (%cloned)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({5, 5});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Split) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.split.Tensor(self=%self, split_size=2, dim=0)
|
||
|
|
return (%ret)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({6, 6});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, SplitWithSizes) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.split_with_sizes.default(self=%self, split_sizes=[2, 4], dim=0)
|
||
|
|
return (%ret)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({6, 6});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, TensorSplitSections) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.tensor_split.sections(self=%self, sections=3, dim=0)
|
||
|
|
return (%ret)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::rand({9, 3});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(StaticKernelTest, Stack) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%tensors):
|
||
|
|
%ret = torch.ops.aten.stack.default(tensors=%tensors, dim=0)
|
||
|
|
return (%ret)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto tensor1 = at::rand({2, 3});
|
||
|
|
auto tensor2 = at::rand({2, 3});
|
||
|
|
auto tensor3 = at::rand({2, 3});
|
||
|
|
std::vector<c10::IValue> args{
|
||
|
|
std::vector<at::Tensor>{tensor1, tensor2, tensor3}};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Item) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self):
|
||
|
|
%ret = torch.ops.aten.item.default(self=%self)
|
||
|
|
return (%ret)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self0 = at::tensor({42.0});
|
||
|
|
std::vector<c10::IValue> args{self0};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(NativeKernelTest, Narrow) {
|
||
|
|
const std::string graph =
|
||
|
|
R"(graph(%self, %dim, %start, %length):
|
||
|
|
%ret = torch.ops.aten.narrow.default(self=%self, dim=%dim, start=%start, length=%length)
|
||
|
|
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
|
||
|
|
return (%cloned)
|
||
|
|
)";
|
||
|
|
|
||
|
|
auto self = at::rand({5, 5});
|
||
|
|
int64_t dim = 1;
|
||
|
|
int64_t start = 1;
|
||
|
|
int64_t length = 3;
|
||
|
|
std::vector<c10::IValue> args{self, dim, start, length};
|
||
|
|
testStaticKernelEquality(graph, args, true);
|
||
|
|
}
|
||
|
|
} // namespace torch::nativert
|