Files
pytorch/test/cpp/nativert/test_static_kernel_ops.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

540 lines
19 KiB
C++
Raw Permalink Normal View History

#include <fmt/format.h>
#include <gtest/gtest.h>
#include <torch/torch.h>
#include <random>
#include "test/cpp/nativert/static_kernel_test_utils.h" // @manual
namespace torch::nativert {
namespace {
std::vector<c10::IValue> generateArgsForQuantizedEmbeddingBag() {
// Set seed for reproducibility
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> int_dis(0, 15); // num_embeddings - 1
int num_embeddings = 16;
int embedding_dim = 32;
int num_lengths = 10;
auto weight =
at::randint(0, 255, {num_embeddings, embedding_dim}).to(at::kByte);
// Generate random lengths
std::vector<int> np_lengths(num_lengths);
for (auto& length : np_lengths) {
length = int_dis(gen);
}
int total_length = 0;
for (const auto& length : np_lengths) {
total_length += length;
}
// Generate random indices
at::Tensor indices =
torch::empty({total_length}, torch::dtype(torch::kInt32));
auto indices_accessor = indices.accessor<int, 1>();
for (int i = 0; i < total_length; ++i) {
indices_accessor[i] = int_dis(gen);
}
// Create lengths tensor
at::Tensor lengths = torch::from_blob(
np_lengths.data(), {num_lengths}, torch::dtype(torch::kInt32));
// Calculate offsets
at::Tensor offsets = torch::cat(
{torch::zeros({1}, torch::dtype(torch::kInt32)),
torch::cumsum(lengths, 0)});
offsets = offsets.to(torch::dtype(torch::kInt32));
at::Tensor per_sample_weights = at::randn(indices.sizes());
std::vector<c10::IValue> args{weight, indices, offsets, per_sample_weights};
return args;
}
std::vector<c10::IValue> generateArgsForEmbeddingBag(bool include_padding_idx) {
torch::Tensor weight = torch::randn({10, 3}, torch::dtype(torch::kFloat32));
torch::Tensor indices =
torch::randint(0, 10, {20}, torch::dtype(torch::kInt64));
torch::Tensor offsets =
torch::tensor({0, 5, 10, 15, 20}, torch::dtype(torch::kInt64));
torch::Tensor per_sample_weights =
torch::rand({20}, torch::dtype(torch::kFloat32));
// Define the padding_idx
int64_t padding_idx = 1;
// Create a vector of IValues to store the arguments
std::vector<c10::IValue> args;
args.emplace_back(weight);
args.emplace_back(indices);
args.emplace_back(offsets);
args.emplace_back(per_sample_weights);
if (include_padding_idx) {
args.emplace_back(padding_idx);
}
return args;
}
} // namespace
TEST(StaticKernelTest, QuantizedEmbeddingBagByteRowwiseOffsets) {
const std::string graph =
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
%out = torch.ops.quantized.embedding_bag_byte_rowwise_offsets.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, pruned_weights=false, per_sample_weights=%per_sample_weights, compressed_indices_mapping=None, include_last_offset=true)
%res = torch.ops.aten.clone.default(self=%out, memory_format=None)
return (%res)
)";
std::vector<c10::IValue> args = generateArgsForQuantizedEmbeddingBag();
testStaticKernelEquality(graph, args);
}
TEST(StaticKernelTest, QuantizedEmbeddingBag4BitRowwiseOffsets) {
const std::string graph =
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
%out = torch.ops.quantized.embedding_bag_4bit_rowwise_offsets.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, pruned_weights=false, per_sample_weights=%per_sample_weights, compressed_indices_mapping=None, include_last_offset=true)
%res = torch.ops.aten.clone.default(self=%out, memory_format=None)
return (%res)
)";
std::vector<c10::IValue> args = generateArgsForQuantizedEmbeddingBag();
testStaticKernelEquality(graph, args);
}
TEST(StaticKernelTest, EmbeddingBag) {
const std::string graph =
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true)
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
%res4 = torch.ops.aten.clone.default(self=%out3, memory_format=None)
return (%res1, %res2, %res3, %res4)
)";
std::vector<c10::IValue> args = generateArgsForEmbeddingBag(false);
testStaticKernelEquality(graph, args);
// Test use_max_indices False
const std::string graph2 =
R"(graph(%weight, %indices, %offsets, %per_sample_weights):
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.default(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true)
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
return (%res1, %res2, %res3, %out2)
)";
std::vector<c10::IValue> args2 = generateArgsForEmbeddingBag(false);
testStaticKernelEquality(graph2, args2);
}
TEST(StaticKernelTest, EmbeddingBagPaddingIdx) {
const std::string graph =
R"(graph(%weight, %indices, %offsets, %per_sample_weights, %padding_idx):
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.padding_idx(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true, padding_idx=%padding_idx)
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
%res4 = torch.ops.aten.clone.default(self=%out3, memory_format=None)
return (%res1, %res2, %res3, %res4)
)";
std::vector<c10::IValue> args = generateArgsForEmbeddingBag(true);
testStaticKernelEquality(graph, args);
// Test use_max_indices False
const std::string graph2 =
R"(graph(%weight, %indices, %offsets, %per_sample_weights, %padding_idx):
%out0, %out1, %out2, %out3 = torch.ops.aten.embedding_bag.padding_idx(weight=%weight, indices=%indices, offsets=%offsets, scale_grad_by_freq=false, mode=0, sparse=false, per_sample_weights=%per_sample_weights, include_last_offset=true, padding_idx=%padding_idx)
%res1 = torch.ops.aten.clone.default(self=%out0, memory_format=None)
%res2 = torch.ops.aten.clone.default(self=%out1, memory_format=None)
%res3 = torch.ops.aten.clone.default(self=%out2, memory_format=None)
return (%res1, %res2, %res3, %out2)
)";
std::vector<c10::IValue> args2 = generateArgsForEmbeddingBag(true);
testStaticKernelEquality(graph2, args2);
}
TEST(StaticKernelTest, Aten_ToCopy) {
for (auto& target_dtype :
{"None",
"ScalarType::FLOAT",
"ScalarType::DOUBLE",
"ScalarType::HALF",
"ScalarType::INT",
"ScalarType::LONG"}) {
for (auto& target_memory_format : {
"None",
"MemoryFormat::PreserveFormat",
"MemoryFormat::ContiguousFormat",
}) {
for (auto& input_dtype :
{at::kLong, at::kInt, at::kFloat, at::kDouble, at::kHalf}) {
for (auto& permute_input : {true, false}) {
const std::string graph = fmt::format(
R"(graph(%input):
%out = torch.ops.aten._to_copy.default(self=%input, dtype={}, memory_format={})
return (%out)
)",
target_dtype,
target_memory_format);
at::Tensor input =
at::randint(0, 128, {8, 8, 8, 8}, at::kLong).to(input_dtype);
if (permute_input) {
input = input.permute({1, 0, 3, 2});
}
testStaticKernelEquality(graph, {input});
}
}
}
}
}
TEST(StaticKernelTest, Aten_ToCopy_Aliasing) {
const std::string graph =
R"(graph(%input):
%out = torch.ops.aten._to_copy.default(self=%input, dtype=ScalarType::FLOAT, memory_format=None)
return (%out))";
at::Tensor input =
at::randint(0, 128, {8, 8, 8, 8}, at::kLong).to(at::kFloat);
torch::nativert::ExecutorConfig config;
config.enableStaticCPUKernels = true;
SimpleTestModelRunner runner(graph, config);
// try standard aliasing case
auto output = runner.run({input});
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
EXPECT_EQ(output[0].toTensor().dim(), 4);
EXPECT_EQ(output[0].toTensor().numel(), 8 * 8 * 8 * 8);
output = runner.run({input});
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
EXPECT_EQ(output[0].toTensor().dim(), 4);
EXPECT_EQ(output[0].toTensor().numel(), 8 * 8 * 8 * 8);
// try swap out input storage between runs
at::Storage original_storage = input.storage();
input.unsafeGetTensorImpl()->set_storage_keep_dtype(
at::randint(0, 128, {8, 8, 8, 8}, at::kLong).to(at::kFloat).storage());
output = runner.run({input});
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
EXPECT_FALSE(output[0].toTensor().storage().is_alias_of(original_storage));
EXPECT_EQ(output[0].toTensor().dim(), 4);
EXPECT_EQ(output[0].toTensor().numel(), 8 * 8 * 8 * 8);
// try to upsize between runs
input.resize_({16, 16, 16, 16, 16});
output = runner.run({input});
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
EXPECT_EQ(output[0].toTensor().dim(), 5);
EXPECT_EQ(output[0].toTensor().numel(), 16 * 16 * 16 * 16 * 16);
// try to downsize between runs
input.resize_({4});
output = runner.run({input});
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
EXPECT_EQ(output[0].toTensor().dim(), 1);
EXPECT_EQ(output[0].toTensor().numel(), 4);
// try to restride between runs
input.as_strided_({3, 2}, {3, 6}).random_();
output = runner.run({input});
EXPECT_TRUE(output[0].toTensor().storage().is_alias_of(input.storage()));
EXPECT_EQ(output[0].toTensor().dim(), 2);
EXPECT_EQ(output[0].toTensor().numel(), 3 * 2);
for (int i = 0; i < 3; i += 1) {
for (int j = 0; j < 2; j += 1) {
EXPECT_EQ(
output[0].toTensor().index({i, j}).item().toFloat(),
input.index({i, j}).item().toFloat());
}
}
}
TEST(StaticKernelTest, MulScalar) {
const std::string graph = R"(graph(%in0_t, %in1_t):
%out = torch.ops.aten.mul.Scalar(self=%in0_t, other=%in1_t)
return (%out)
)";
std::vector<std::pair<at::Tensor, std::vector<double>>> test_cases = {
{at::rand({3, 4}), {2.0, -2.0, -2, 2, 0.0, 1e6, 1e-6, NAN, INFINITY}},
{at::rand({2, 3, 4}), {2.0}},
{at::rand({3, 4}, at::kFloat), {3.0}}, // fp32 tensor with int scalar
{at::randint(0, 10, {3, 4}, at::kInt),
{2.0}}, // int32 tensor with double scalar
{at::rand({3, 4}, at::kHalf), {2.0}}, // half tensor with float scalar
{at::rand({3, 4}, at::kBFloat16), {2.0}}, // bf16 tensor with float scalar
{at::randint(0, 10, {3, 4}, at::kInt), {2}}, // int tensor with int scalar
{at::randint(0, 10, {3, 4}, at::kLong),
{2}}, // int64 tensor with int64 scalar,
{at::rand({3, 4, 5}, at::kFloat).permute({2, 0, 1}),
{2}}, // int64 strided tensor with int64 scalar
{at::rand({3, 4}, at::kFloat).t(),
{2}}, // int64 strided tensor with int64 scalar
{at::rand({3, 4, 5}, at::kFloat).permute({2, 0, 1}),
{2}}, // int64 strided tensor with int64 scalar
{at::rand({3, 4}, at::kFloat).t(),
{2}}, // int64 strided tensor with int64 scalar
};
for (const auto& [tensor, scalars] : test_cases) {
for (double scalar : scalars) {
std::vector<c10::IValue> inputs = {tensor, scalar};
testStaticKernelEquality(graph, inputs);
}
}
}
TEST(StaticKernelTest, SymSizeInt) {
const std::string graph = R"(graph(%self, %dim):
%out = torch.ops.aten.sym_size.int(self=%self, dim=%dim)
return (%out)
)";
// Define test cases with different tensors
std::vector<at::Tensor> test_cases = {
at::rand({3, 4, 5}), // standard 3D tensor
at::rand({0, 4, 5}), // empty tensor
at::rand({1}), // single-element tensor
at::rand({2, 3, 4, 5, 6}), // high-dimensional tensor
at::rand({3, 1, 5}) // tensor with one dimension as 1
};
// Iterate over each test case
for (const auto& tensor : test_cases) {
for (int64_t dim = 0; dim < tensor.dim(); ++dim) {
std::vector<c10::IValue> inputs = {tensor, dim};
testStaticKernelEquality(graph, inputs);
}
}
}
TEST(StaticKernelTest, BucketizeTensor) {
const std::string graph =
R"(graph(%input, %boundaries, %out_int32, %right):
%out = torch.ops.aten.bucketize.Tensor(self=%input, boundaries=%boundaries, out_int32=%out_int32, right=%right)
return (%out)
)";
std::vector<std::pair<bool, bool>> test_cases = {
{false, false}, {true, false}, {false, true}, {true, true}};
for (const auto& [out_int32, right] : test_cases) {
at::Tensor input = at::tensor({0.1, 2.5, 3.0, 4.5, 5.0}, at::kFloat);
at::Tensor boundaries = at::tensor({1.0, 2.0, 3.0, 4.0}, at::kFloat);
std::vector<c10::IValue> args = {input, boundaries, out_int32, right};
testStaticKernelEquality(graph, args);
}
}
TEST(StaticKernelTest, SliceScatter) {
const std::string graph =
R"(graph(%self, %src, %dim, %start, %end, %step):
%out = torch.ops.aten.slice_scatter.default(self=%self, src=%src, dim=%dim, start=%start, end=%end, step=%step)
return (%out)
)";
// Create input tensors
at::Tensor self = at::rand({5, 5}, at::kFloat);
at::Tensor src = at::rand({2, 5}, at::kFloat);
int64_t dim = 0;
int64_t start = 1;
int64_t end = 3;
int64_t step = 1;
// Create a vector of IValues to pass as inputs
std::vector<c10::IValue> inputs = {self, src, dim, start, end, step};
// Run the kernel and verify the output
testStaticKernelEquality(graph, inputs);
}
TEST(StaticKernelTest, QuantizedEmbeddingBagBytePrepack) {
const std::string graph = R"(
graph(%input):
%weight = torch.ops.quantized.embedding_bag_byte_prepack.default(weight=%input)
%res = torch.ops.aten.clone.default(self=%weight, memory_format=None)
return (%res)
)";
at::Tensor args1 = torch::randn({8, 16}, at::ScalarType::Float);
testStaticKernelEquality(graph, {args1});
}
TEST(StaticKernelTest, QuantizedEmbeddingBagByteUnpack) {
const std::string graph = R"(
graph(%input):
%weight = torch.ops.quantized.embedding_bag_byte_prepack.default(weight=%input)
%output = torch.ops.quantized.embedding_bag_byte_unpack.default(weight=%weight)
%res = torch.ops.aten.clone.default(self=%output, memory_format=None)
return (%res)
)";
at::Tensor args1 = torch::randn({8, 16}, at::ScalarType::Float);
testStaticKernelEquality(graph, {args1});
}
TEST(StaticKernelTest, QuantizedLinear) {
const std::string graph = R"(
graph(%input, %weights):
%packed_params = torch.ops.quantized.linear_prepack.default(W=%weights, B=None)
%1254 = torch.ops.quantized.linear.default(X=%input, W_prepack=%packed_params, Y_scale_i=1.0, Y_zero_point_i=1)
%res = torch.ops.aten.dequantize.self(self=%1254)
return (%res)
)";
at::Tensor input =
at::quantize_per_tensor(torch::randn({3, 2}), 2, 3, torch::kQUInt8);
at::Tensor weight =
at::quantize_per_tensor(torch::randn({3, 2}), 2, 3, torch::kQInt8);
testStaticKernelEquality(graph, {input, weight});
}
TEST(NativeKernelTest, View) {
const std::string source =
R"(graph(%self):
%ret = torch.ops.aten.view.default(self=%self, size=[36])
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
return (%cloned)
)";
auto self0 = at::rand({6, 6});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(source, args, true);
}
TEST(NativeKernelTest, Permute) {
const std::string source =
R"(graph(%self):
%ret = torch.ops.aten.permute.default(self=%self, dims=[1, 0])
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
return (%cloned)
)";
auto self0 = at::rand({2, 3});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(source, args, true);
}
TEST(NativeKernelTest, Reshape) {
const std::string source =
R"(graph(%self):
%ret = torch.ops.aten.reshape.default(self=%self, shape=[9, 4])
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
return (%cloned)
)";
auto self0 = at::rand({3, 3, 4});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(source, args, true);
}
TEST(NativeKernelTest, Select) {
static constexpr std::string_view source =
R"(graph(%self):
%ret = torch.ops.aten.select.int(self=%self, dim=1, index=0)
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
return (%cloned)
)";
auto self0 = at::rand({3, 3, 3});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(source, args, true);
}
TEST(NativeKernelTest, Slice) {
const std::string graph =
R"(graph(%self):
%ret = torch.ops.aten.slice.Tensor(self=%self, dim=0, start=1, end=3, step=1)
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
return (%cloned)
)";
auto self0 = at::rand({5, 5});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(graph, args, true);
}
TEST(NativeKernelTest, Split) {
const std::string graph =
R"(graph(%self):
%ret = torch.ops.aten.split.Tensor(self=%self, split_size=2, dim=0)
return (%ret)
)";
auto self0 = at::rand({6, 6});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(graph, args, true);
}
TEST(NativeKernelTest, SplitWithSizes) {
const std::string graph =
R"(graph(%self):
%ret = torch.ops.aten.split_with_sizes.default(self=%self, split_sizes=[2, 4], dim=0)
return (%ret)
)";
auto self0 = at::rand({6, 6});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(graph, args, true);
}
TEST(NativeKernelTest, TensorSplitSections) {
const std::string graph =
R"(graph(%self):
%ret = torch.ops.aten.tensor_split.sections(self=%self, sections=3, dim=0)
return (%ret)
)";
auto self0 = at::rand({9, 3});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(graph, args, true);
}
TEST(StaticKernelTest, Stack) {
const std::string graph =
R"(graph(%tensors):
%ret = torch.ops.aten.stack.default(tensors=%tensors, dim=0)
return (%ret)
)";
auto tensor1 = at::rand({2, 3});
auto tensor2 = at::rand({2, 3});
auto tensor3 = at::rand({2, 3});
std::vector<c10::IValue> args{
std::vector<at::Tensor>{tensor1, tensor2, tensor3}};
testStaticKernelEquality(graph, args, true);
}
TEST(NativeKernelTest, Item) {
const std::string graph =
R"(graph(%self):
%ret = torch.ops.aten.item.default(self=%self)
return (%ret)
)";
auto self0 = at::tensor({42.0});
std::vector<c10::IValue> args{self0};
testStaticKernelEquality(graph, args, true);
}
TEST(NativeKernelTest, Narrow) {
const std::string graph =
R"(graph(%self, %dim, %start, %length):
%ret = torch.ops.aten.narrow.default(self=%self, dim=%dim, start=%start, length=%length)
%cloned = torch.ops.aten.clone.default(self=%ret, memory_format=None)
return (%cloned)
)";
auto self = at::rand({5, 5});
int64_t dim = 1;
int64_t start = 1;
int64_t length = 3;
std::vector<c10::IValue> args{self, dim, start, length};
testStaticKernelEquality(graph, args, true);
}
} // namespace torch::nativert