From e80f7506c2abe7ae41286bb92fc31d961a4a7300 Mon Sep 17 00:00:00 2001 From: Jeremy Lilley Date: Fri, 8 Nov 2019 15:01:53 -0800 Subject: [PATCH] In torch::save(), make padding computation faster. (#29425) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/29425 This change saves roughly 5-6% in the TorchSaveSmallTensor benchmark (torch::save() on a tensor with 64 random floats) by reusing the padding string across records. ghstack-source-id: 93517961 Test Plan: Correctness: buck test mode/dev-nosan caffe2/test/... Benchmark buck build mode/opt experimental/jeremyl/c2/... buck-out/opt/gen/experimental/jeremy/c2/SerializationBench Differential Revision: D18385731 fbshipit-source-id: 20bcbe1efd2fb7e3012dd68080542f2a74a7d4f2 --- caffe2/serialize/inline_container.cc | 33 ++++++++++++++++++---------- caffe2/serialize/inline_container.h | 1 + 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/caffe2/serialize/inline_container.cc b/caffe2/serialize/inline_container.cc index 120eda8c9b6..b36c1bea0f5 100644 --- a/caffe2/serialize/inline_container.cc +++ b/caffe2/serialize/inline_container.cc @@ -148,12 +148,17 @@ constexpr int MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30; constexpr int MZ_ZIP_LDH_FILENAME_LEN_OFS = 26; constexpr int MZ_ZIP_LDH_EXTRA_LEN_OFS = 28; -static std::string getPadding(size_t cursor, const std::string& filename, size_t size) { - size_t start = cursor + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + filename.size() + sizeof(mz_uint16) * 2; +static size_t getPadding( + size_t cursor, + size_t filename_size, + size_t size, + std::string& padding_buf) { + size_t start = cursor + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + filename_size + + sizeof(mz_uint16) * 2; if (size >= MZ_UINT32_MAX || cursor >= MZ_UINT32_MAX) { start += sizeof(mz_uint16) * 2; if (size >= MZ_UINT32_MAX) { - start += 2*sizeof(mz_uint64); + start += 2 * sizeof(mz_uint64); } if (cursor >= MZ_UINT32_MAX) { start += sizeof(mz_uint64); @@ -162,13 +167,16 @@ static std::string getPadding(size_t cursor, const std::string& filename, size_t size_t mod = start % kFieldAlignment; size_t next_offset = (mod == 0) ? start : (start + kFieldAlignment - mod); size_t padding_size = next_offset - start; - std::string buf(padding_size + 4, 'Z'); + size_t padding_size_plus_fbxx = padding_size + 4; + if (padding_buf.size() < padding_size_plus_fbxx) { + padding_buf.append(padding_size_plus_fbxx - padding_buf.size(), 'Z'); + } // zip extra encoding (key, size_of_extra_bytes) - buf[0] = 'F'; - buf[1] = 'B'; - buf[2] = (uint8_t) padding_size; - buf[3] = (uint8_t) (padding_size >> 8); - return buf; + padding_buf[0] = 'F'; + padding_buf[1] = 'B'; + padding_buf[2] = (uint8_t)padding_size; + padding_buf[3] = (uint8_t)(padding_size >> 8); + return padding_size_plus_fbxx; } bool PyTorchStreamReader::hasRecord(const std::string& name) { @@ -297,7 +305,8 @@ void PyTorchStreamWriter::writeRecord( AT_ASSERT(!finalized_); AT_ASSERT(!archive_name_plus_slash_.empty()); std::string full_name = archive_name_plus_slash_ + name; - std::string padding = getPadding(ar_->m_archive_size, full_name, size); + size_t padding_size = + getPadding(ar_->m_archive_size, full_name.size(), size, padding_); uint32_t flags = compress ? MZ_BEST_COMPRESSION : 0; mz_zip_writer_add_mem_ex_v2( ar_.get(), @@ -310,8 +319,8 @@ void PyTorchStreamWriter::writeRecord( 0, 0, nullptr, - padding.c_str(), - padding.size(), + padding_.c_str(), + padding_size, nullptr, 0); valid("writing file ", name.c_str()); diff --git a/caffe2/serialize/inline_container.h b/caffe2/serialize/inline_container.h index d1823edd3fb..1cb2b0b7fe1 100644 --- a/caffe2/serialize/inline_container.h +++ b/caffe2/serialize/inline_container.h @@ -156,6 +156,7 @@ class CAFFE2_API PyTorchStreamWriter final { std::unique_ptr ar_; std::string archive_name_; std::string archive_name_plus_slash_; + std::string padding_; std::ofstream file_stream_; std::function writer_func_; bool finalized_ = false;