From 9e3a2babfaa8bff5715e5f81006d70c15dc9e5cd Mon Sep 17 00:00:00 2001
From: Priya Ramani <priyaramani@fb.com>
Date: Sat, 16 Oct 2021 20:01:19 -0700
Subject: [PATCH] Make aotCompile support multiple input sizes (#66727)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/66727

Make aotCompile support multiple input sizes

Test Plan:
Able to compile and run a model with multiple inputs
```
(pytorch)  ~/fbsource/fbcode/caffe2/fb/nnc
└─ $ PYTORCH_JIT_LOG_LEVEL=aot_compiler buck run //caffe2/binaries:aot_model_compiler -- --model aot_test_model.pt --model_name=aot_test_model --model_version=v1 --input_dims="2,2,2;2,2,2"
Building: finished in 3.2 sec (100%) 7461/7461 jobs, 0/7461 updated
  Total time: 3.4 sec
BUILD SUCCEEDED
[DUMP aot_compiler.cpp:097] graph before shape propagation
[DUMP aot_compiler.cpp:097] graph(%x.1 : Tensor,
[DUMP aot_compiler.cpp:097]       %y.1 : Tensor):
[DUMP aot_compiler.cpp:097]   %3 : int = prim::Constant[value=1]() # :0:0
[DUMP aot_compiler.cpp:097]   %4 : Tensor = aten::add(%x.1, %y.1, %3) # /data/users/priyaramani/fbsource/fbcode/caffe2/test/mobile/nnc/aot_test_model.py:10:15
[DUMP aot_compiler.cpp:097]   return (%4)
(1,.,.) =                                                                                                                                                                                            0.3357  0.6137
  0.8472  0.0858

(2,.,.) =
  0.8406  0.2959
  0.6012  0.7184
[ CPUFloatType{2,2,2} ]
(1,.,.) =
  0.7086  0.6398
  0.0579  0.1913

(2,.,.) =
  0.8598  0.3641
  0.5925  0.0200
[ CPUFloatType{2,2,2} ]
here
2
2
graph 0x6130001ee2d0
[DUMP aot_compiler.cpp:118] graph after shape propagation
[DUMP aot_compiler.cpp:118] graph(%x.1 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu),
[DUMP aot_compiler.cpp:118]       %y.1 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu)):
[DUMP aot_compiler.cpp:118]   %3 : int = prim::Constant[value=1]() # :0:0
[DUMP aot_compiler.cpp:118]   %4 : Tensor(2, 2, 2) = aten::add(%x.1, %y.1, %3) # /data/users/priyaramani/fbsource/fbcode/caffe2/test/mobile/nnc/aot_test_model.py:10:15
[DUMP aot_compiler.cpp:118]   return (%4)
The compiled llvm assembly code was saved to aot_test_model.compiled.ll
The compiled model was saved to aot_test_model.compiled.pt

└─ $ ./compile_model.sh -m aot_test_model -p /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt -v v1 -i "2,2,2;2,2,2"
+ VERSION=v1
+ getopts m:p:v:i:h opt
+ case $opt in
+ MODEL=aot_test_model
+ getopts m:p:v:i:h opt
+ case $opt in
+ MODEL_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt
+ getopts m:p:v:i:h opt
+ case $opt in
+ VERSION=v1
+ getopts m:p:v:i:h opt
+ case $opt in
+ INPUT_DIMS='2,2,2;2,2,2'
+ getopts m:p:v:i:h opt
+ require_arg m aot_test_model
+ '[' -n aot_test_model ']'
+ require_arg p /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt
+ '[' -n /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt ']'
+ require_arg i '2,2,2;2,2,2'
+ '[' -n '2,2,2;2,2,2' ']'
+ '[' '!' -f /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt ']'
+++ dirname ./compile_model.sh
++ cd .
++ pwd -P
+ SRC_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc
+ FBCODE_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../..
+ FBSOURCE_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../../..
+ KERNEL_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../../../xplat/pytorch_models/build/aot_test_model/v1/nnc
++ echo /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt
++ sed 's/.pt.*//'
+ MODEL_PATH_PREFIX=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model
+ LLVM_CODE_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.ll
+ ASSEMBLY_CODE_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.s
+ COMPILED_MODEL_FILE_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.pt
+ KERNEL_FUNC_NAME=nnc_aot_test_model_v1_forward
+ cd /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../../..
+ buck run //xplat/caffe2/fb/lite_predictor:lite_predictor_nnc -- --model /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.pt --print_output true --input_dims '2,2,2$
2,2,2' --input_type 'float;float' --input_memory_format 'contiguous_format;contiguous_format'
clang-9: warning: argument unused during compilation: '-pthread' [-Wunused-command-line-argument]

Downloaded 1/4 artifacts, 2.11 Kbytes, 50.0% cache miss (for updated rules)
Building: finished in 12.2 sec (100%) 4572/4572 jobs, 3/4572 updated
  Total time: 12.2 sec
BUILD SUCCEEDED
Run with 56 threads
Run with 56 threads
Loading model...
Model loaded: /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.pt
Running forward ...
(1,.,.) =
 -0.7451 -0.7451
 -0.7451 -0.7451

(2,.,.) =
 -0.7451 -0.7451
 -0.7451 -0.7451
[ CPUFloatType{2,2,2} ]
Starting benchmark.
Running warmup runs.
Main runs.
Main run finished. Milliseconds per iter: 0.0887. Iters per second: 11274
Memory usage before main runs: 71262208 bytes
Memory usage after main runs: 71573504 bytes
Average memory increase per iter: 31129.6 bytes
0 value means "not available" in above
```

Reviewed By: ljk53

Differential Revision: D31631975

fbshipit-source-id: 7956787b3e121f9c14f4733398a64c2f7ae84373
---
 binaries/aot_model_compiler.cc             | 21 ++++++++-------
 torch/csrc/jit/mobile/nnc/aot_compiler.cpp | 30 +++++++++++++++-------
 torch/csrc/jit/mobile/nnc/aot_compiler.h   |  2 +-
 3 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/binaries/aot_model_compiler.cc b/binaries/aot_model_compiler.cc
index eab9938e19e..ba751ac84bc 100644
--- a/binaries/aot_model_compiler.cc
+++ b/binaries/aot_model_compiler.cc
@@ -67,21 +67,20 @@ c10::Dict<c10::IValue, c10::IValue> createCompileSpec() {
   c10::Dict<c10::IValue, c10::IValue> method_spec(
       c10::StringType::get(), c10::AnyType::get());
   auto input_shapes = parseInputShapes();
-  TORCH_CHECK(
-      input_shapes.size() == 1,
-      "Wrong # of input shapes: ",
-      input_shapes.size());
-  method_spec.insert("sizes", input_shapes[0]); // TODO: support multiple inputs
+  method_spec.insert("sizes", input_shapes);
   compile_spec.insert(FLAGS_method_name, method_spec);
   return compile_spec;
 }
 
-std::vector<int64_t> getInputSizesForMethod(
+std::vector<std::vector<int64_t>> getInputSizes (
     const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec) {
-  return method_compile_spec.at(FLAGS_method_name)
-      .toGenericDict()
-      .at("sizes")
-      .toIntVector();
+  auto input_shapes = method_compile_spec.at(FLAGS_method_name).toGenericDict().at("sizes").toList();
+  std::vector<std::vector<int64_t>> inputSizes;
+  for (const auto& input_shape : input_shapes) {
+    auto sizes = ((c10::IValue) input_shape).toIntVector();
+    inputSizes.emplace_back(sizes);
+  }
+  return inputSizes;
 }
 
 std::string getNncKernelId() {
@@ -117,7 +116,7 @@ c10::IValue preprocess(
 
   auto method = mod.get_method(FLAGS_method_name);
   auto graph = method.function().graph()->copy();
-  auto sizes = getInputSizesForMethod(method_compile_spec);
+  auto sizes = getInputSizes(method_compile_spec);
 
   std::string llvm_asm_code;
   auto compiled = torch::jit::mobile::nnc::aotCompile(FLAGS_method_name, graph, sizes);
diff --git a/torch/csrc/jit/mobile/nnc/aot_compiler.cpp b/torch/csrc/jit/mobile/nnc/aot_compiler.cpp
index 234594c4d82..f979cd51056 100644
--- a/torch/csrc/jit/mobile/nnc/aot_compiler.cpp
+++ b/torch/csrc/jit/mobile/nnc/aot_compiler.cpp
@@ -33,20 +33,27 @@ std::vector<int64_t> getConstSizes(const BufPtr b) {
   return r;
 }
 
+std::vector<mobile::nnc::InputSpec> toInputSpecs(
+    const std::vector<std::vector<int64_t>>& inputSizes) {
+  std::vector<mobile::nnc::InputSpec> specs;
+  for (const auto& sizes : inputSizes) {
+    mobile::nnc::InputSpec spec;
+    spec.sizes_ = sizes;
+    spec.dtype_ = c10::ScalarType::Float;
+    specs.emplace_back(std::move(spec));
+  }
+  return specs;
+}
+
 std::unique_ptr<Function> compileMethod(
     std::shared_ptr<tensorexpr::TensorExprKernel> kernel,
     const std::string& method_name,
-    const std::vector<int64_t>& sizes) {
+    const std::vector<std::vector<int64_t>>& sizes) {
   auto func = std::make_unique<Function>();
   func->set_name(method_name);
-
-  InputSpec input;
-  input.sizes_ = sizes;
-  input.dtype_ = c10::ScalarType::Float;
-  func->set_input_specs({input});
+  func->set_input_specs(toInputSpecs(sizes));
 
   std::vector<at::Tensor> parameters;
-
   auto const_descriptors = kernel->getConstantDescriptors();
   for (const auto& cd : const_descriptors) {
     auto sizes = getConstSizes(cd.buf);
@@ -80,7 +87,7 @@ std::unique_ptr<Function> compileMethod(
 std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
     const std::string& method_name,
     std::shared_ptr<Graph>& g,
-    const std::vector<int64_t>& sizes) {
+    const std::vector<std::vector<int64_t>>& sizes) {
   GRAPH_DEBUG("Input sizes ", sizes);
   GRAPH_DEBUG("Method name ", method_name);
 
@@ -89,7 +96,12 @@ std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
   g = tensorexpr::removeUnusedSelfArgument(g);
   GRAPH_DUMP("graph before shape propagation ", g);
 
-  std::vector<c10::optional<at::Tensor>> example_inputs = {at::rand(sizes)};
+  std::vector<c10::optional<at::Tensor>> example_inputs;
+  for (const auto& size : sizes) {
+    auto example_input = at::rand(size);
+    example_inputs.emplace_back(example_input);
+  }
+
   tensorexpr::annotateInputShapes(g, example_inputs);
 
   PropagateShapesOnGraph(g);
diff --git a/torch/csrc/jit/mobile/nnc/aot_compiler.h b/torch/csrc/jit/mobile/nnc/aot_compiler.h
index 966337e71a3..d0a7a1ec4a8 100644
--- a/torch/csrc/jit/mobile/nnc/aot_compiler.h
+++ b/torch/csrc/jit/mobile/nnc/aot_compiler.h
@@ -14,7 +14,7 @@ namespace nnc {
 TORCH_API std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
     const std::string& method_name,
     std::shared_ptr<Graph>& subgraph,
-    const std::vector<int64_t>& sizes);
+    const std::vector<std::vector<int64_t>>& sizes);
 
 } // namespace nnc
 } // namespace mobile