From 9e3a2babfaa8bff5715e5f81006d70c15dc9e5cd Mon Sep 17 00:00:00 2001 From: Priya Ramani Date: Sat, 16 Oct 2021 20:01:19 -0700 Subject: [PATCH] Make aotCompile support multiple input sizes (#66727) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/66727 Make aotCompile support multiple input sizes Test Plan: Able to compile and run a model with multiple inputs ``` (pytorch) ~/fbsource/fbcode/caffe2/fb/nnc └─ $ PYTORCH_JIT_LOG_LEVEL=aot_compiler buck run //caffe2/binaries:aot_model_compiler -- --model aot_test_model.pt --model_name=aot_test_model --model_version=v1 --input_dims="2,2,2;2,2,2" Building: finished in 3.2 sec (100%) 7461/7461 jobs, 0/7461 updated Total time: 3.4 sec BUILD SUCCEEDED [DUMP aot_compiler.cpp:097] graph before shape propagation [DUMP aot_compiler.cpp:097] graph(%x.1 : Tensor, [DUMP aot_compiler.cpp:097] %y.1 : Tensor): [DUMP aot_compiler.cpp:097] %3 : int = prim::Constant[value=1]() # :0:0 [DUMP aot_compiler.cpp:097] %4 : Tensor = aten::add(%x.1, %y.1, %3) # /data/users/priyaramani/fbsource/fbcode/caffe2/test/mobile/nnc/aot_test_model.py:10:15 [DUMP aot_compiler.cpp:097] return (%4) (1,.,.) = 0.3357 0.6137 0.8472 0.0858 (2,.,.) = 0.8406 0.2959 0.6012 0.7184 [ CPUFloatType{2,2,2} ] (1,.,.) = 0.7086 0.6398 0.0579 0.1913 (2,.,.) = 0.8598 0.3641 0.5925 0.0200 [ CPUFloatType{2,2,2} ] here 2 2 graph 0x6130001ee2d0 [DUMP aot_compiler.cpp:118] graph after shape propagation [DUMP aot_compiler.cpp:118] graph(%x.1 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu), [DUMP aot_compiler.cpp:118] %y.1 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu)): [DUMP aot_compiler.cpp:118] %3 : int = prim::Constant[value=1]() # :0:0 [DUMP aot_compiler.cpp:118] %4 : Tensor(2, 2, 2) = aten::add(%x.1, %y.1, %3) # /data/users/priyaramani/fbsource/fbcode/caffe2/test/mobile/nnc/aot_test_model.py:10:15 [DUMP aot_compiler.cpp:118] return (%4) The compiled llvm assembly code was saved to aot_test_model.compiled.ll The compiled model was saved to aot_test_model.compiled.pt └─ $ ./compile_model.sh -m aot_test_model -p /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt -v v1 -i "2,2,2;2,2,2" + VERSION=v1 + getopts m:p:v:i:h opt + case $opt in + MODEL=aot_test_model + getopts m:p:v:i:h opt + case $opt in + MODEL_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt + getopts m:p:v:i:h opt + case $opt in + VERSION=v1 + getopts m:p:v:i:h opt + case $opt in + INPUT_DIMS='2,2,2;2,2,2' + getopts m:p:v:i:h opt + require_arg m aot_test_model + '[' -n aot_test_model ']' + require_arg p /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt + '[' -n /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt ']' + require_arg i '2,2,2;2,2,2' + '[' -n '2,2,2;2,2,2' ']' + '[' '!' -f /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt ']' +++ dirname ./compile_model.sh ++ cd . ++ pwd -P + SRC_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc + FBCODE_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../.. + FBSOURCE_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../../.. + KERNEL_DIR=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../../../xplat/pytorch_models/build/aot_test_model/v1/nnc ++ echo /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.pt ++ sed 's/.pt.*//' + MODEL_PATH_PREFIX=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model + LLVM_CODE_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.ll + ASSEMBLY_CODE_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.s + COMPILED_MODEL_FILE_PATH=/data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.pt + KERNEL_FUNC_NAME=nnc_aot_test_model_v1_forward + cd /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/../../../.. + buck run //xplat/caffe2/fb/lite_predictor:lite_predictor_nnc -- --model /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.pt --print_output true --input_dims '2,2,2$ 2,2,2' --input_type 'float;float' --input_memory_format 'contiguous_format;contiguous_format' clang-9: warning: argument unused during compilation: '-pthread' [-Wunused-command-line-argument] Downloaded 1/4 artifacts, 2.11 Kbytes, 50.0% cache miss (for updated rules) Building: finished in 12.2 sec (100%) 4572/4572 jobs, 3/4572 updated Total time: 12.2 sec BUILD SUCCEEDED Run with 56 threads Run with 56 threads Loading model... Model loaded: /data/users/priyaramani/fbsource/fbcode/caffe2/fb/nnc/aot_test_model.compiled.pt Running forward ... (1,.,.) = -0.7451 -0.7451 -0.7451 -0.7451 (2,.,.) = -0.7451 -0.7451 -0.7451 -0.7451 [ CPUFloatType{2,2,2} ] Starting benchmark. Running warmup runs. Main runs. Main run finished. Milliseconds per iter: 0.0887. Iters per second: 11274 Memory usage before main runs: 71262208 bytes Memory usage after main runs: 71573504 bytes Average memory increase per iter: 31129.6 bytes 0 value means "not available" in above ``` Reviewed By: ljk53 Differential Revision: D31631975 fbshipit-source-id: 7956787b3e121f9c14f4733398a64c2f7ae84373 --- binaries/aot_model_compiler.cc | 21 ++++++++------- torch/csrc/jit/mobile/nnc/aot_compiler.cpp | 30 +++++++++++++++------- torch/csrc/jit/mobile/nnc/aot_compiler.h | 2 +- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/binaries/aot_model_compiler.cc b/binaries/aot_model_compiler.cc index eab9938e19e..ba751ac84bc 100644 --- a/binaries/aot_model_compiler.cc +++ b/binaries/aot_model_compiler.cc @@ -67,21 +67,20 @@ c10::Dict createCompileSpec() { c10::Dict method_spec( c10::StringType::get(), c10::AnyType::get()); auto input_shapes = parseInputShapes(); - TORCH_CHECK( - input_shapes.size() == 1, - "Wrong # of input shapes: ", - input_shapes.size()); - method_spec.insert("sizes", input_shapes[0]); // TODO: support multiple inputs + method_spec.insert("sizes", input_shapes); compile_spec.insert(FLAGS_method_name, method_spec); return compile_spec; } -std::vector getInputSizesForMethod( +std::vector> getInputSizes ( const c10::Dict& method_compile_spec) { - return method_compile_spec.at(FLAGS_method_name) - .toGenericDict() - .at("sizes") - .toIntVector(); + auto input_shapes = method_compile_spec.at(FLAGS_method_name).toGenericDict().at("sizes").toList(); + std::vector> inputSizes; + for (const auto& input_shape : input_shapes) { + auto sizes = ((c10::IValue) input_shape).toIntVector(); + inputSizes.emplace_back(sizes); + } + return inputSizes; } std::string getNncKernelId() { @@ -117,7 +116,7 @@ c10::IValue preprocess( auto method = mod.get_method(FLAGS_method_name); auto graph = method.function().graph()->copy(); - auto sizes = getInputSizesForMethod(method_compile_spec); + auto sizes = getInputSizes(method_compile_spec); std::string llvm_asm_code; auto compiled = torch::jit::mobile::nnc::aotCompile(FLAGS_method_name, graph, sizes); diff --git a/torch/csrc/jit/mobile/nnc/aot_compiler.cpp b/torch/csrc/jit/mobile/nnc/aot_compiler.cpp index 234594c4d82..f979cd51056 100644 --- a/torch/csrc/jit/mobile/nnc/aot_compiler.cpp +++ b/torch/csrc/jit/mobile/nnc/aot_compiler.cpp @@ -33,20 +33,27 @@ std::vector getConstSizes(const BufPtr b) { return r; } +std::vector toInputSpecs( + const std::vector>& inputSizes) { + std::vector specs; + for (const auto& sizes : inputSizes) { + mobile::nnc::InputSpec spec; + spec.sizes_ = sizes; + spec.dtype_ = c10::ScalarType::Float; + specs.emplace_back(std::move(spec)); + } + return specs; +} + std::unique_ptr compileMethod( std::shared_ptr kernel, const std::string& method_name, - const std::vector& sizes) { + const std::vector>& sizes) { auto func = std::make_unique(); func->set_name(method_name); - - InputSpec input; - input.sizes_ = sizes; - input.dtype_ = c10::ScalarType::Float; - func->set_input_specs({input}); + func->set_input_specs(toInputSpecs(sizes)); std::vector parameters; - auto const_descriptors = kernel->getConstantDescriptors(); for (const auto& cd : const_descriptors) { auto sizes = getConstSizes(cd.buf); @@ -80,7 +87,7 @@ std::unique_ptr compileMethod( std::pair, const std::string> aotCompile( const std::string& method_name, std::shared_ptr& g, - const std::vector& sizes) { + const std::vector>& sizes) { GRAPH_DEBUG("Input sizes ", sizes); GRAPH_DEBUG("Method name ", method_name); @@ -89,7 +96,12 @@ std::pair, const std::string> aotCompile( g = tensorexpr::removeUnusedSelfArgument(g); GRAPH_DUMP("graph before shape propagation ", g); - std::vector> example_inputs = {at::rand(sizes)}; + std::vector> example_inputs; + for (const auto& size : sizes) { + auto example_input = at::rand(size); + example_inputs.emplace_back(example_input); + } + tensorexpr::annotateInputShapes(g, example_inputs); PropagateShapesOnGraph(g); diff --git a/torch/csrc/jit/mobile/nnc/aot_compiler.h b/torch/csrc/jit/mobile/nnc/aot_compiler.h index 966337e71a3..d0a7a1ec4a8 100644 --- a/torch/csrc/jit/mobile/nnc/aot_compiler.h +++ b/torch/csrc/jit/mobile/nnc/aot_compiler.h @@ -14,7 +14,7 @@ namespace nnc { TORCH_API std::pair, const std::string> aotCompile( const std::string& method_name, std::shared_ptr& subgraph, - const std::vector& sizes); + const std::vector>& sizes); } // namespace nnc } // namespace mobile