From e5a8faaa70337ffad6ee2a99e00c0ea3b3c69ca0 Mon Sep 17 00:00:00 2001 From: Paul Kmiec Date: Mon, 1 May 2023 08:45:54 -0700 Subject: Introduce BufferedZipOutputStream to avoid lots of small writes to Zip::OutputStream The BufferedZipOutputStream is a drop-in replacement for Zip::OutputStream similar to ZipCommand. --- lib/axlsx.rb | 1 + lib/axlsx/package.rb | 7 ++-- lib/axlsx/util/buffered_zip_output_stream.rb | 60 ++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 lib/axlsx/util/buffered_zip_output_stream.rb diff --git a/lib/axlsx.rb b/lib/axlsx.rb index 912be81d..4f15ba23 100644 --- a/lib/axlsx.rb +++ b/lib/axlsx.rb @@ -11,6 +11,7 @@ require 'axlsx/util/accessors.rb' require 'axlsx/util/serialized_attributes' require 'axlsx/util/options_parser' require 'axlsx/util/mime_type_utils' +require 'axlsx/util/buffered_zip_output_stream' require 'axlsx/util/zip_command' require 'axlsx/stylesheet/styles.rb' diff --git a/lib/axlsx/package.rb b/lib/axlsx/package.rb index dd0432cd..b4116f76 100644 --- a/lib/axlsx/package.rb +++ b/lib/axlsx/package.rb @@ -111,7 +111,7 @@ module Axlsx zip_provider = if zip_command ZipCommand.new(zip_command) else - Zip::OutputStream + BufferedZipOutputStream end Relationship.initialize_ids_cache zip_provider.open(output) do |zip| @@ -133,8 +133,9 @@ module Axlsx return false unless !confirm_valid || self.validate.empty? Relationship.initialize_ids_cache - zip = write_parts(Zip::OutputStream.new(StringIO.new.binmode, true)) - stream = zip.close_buffer + stream = BufferedZipOutputStream.write_buffer do |zip| + write_parts(zip) + end stream.rewind stream ensure diff --git a/lib/axlsx/util/buffered_zip_output_stream.rb b/lib/axlsx/util/buffered_zip_output_stream.rb new file mode 100644 index 00000000..0025e9e2 --- /dev/null +++ b/lib/axlsx/util/buffered_zip_output_stream.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module Axlsx + # The BufferedZipOutputStream buffers the output in order to avoid appending many small strings directly to the + # the `Zip::OutputStream`. + # + # The methods provided here mimic `Zip::OutputStream` so that this class can be used a drop-in replacement. + class BufferedZipOutputStream + # The 4_096 was chosen somewhat arbitrary, however, it was difficult to see any obvious improvement with larger + # buffer sizes. + BUFFER_SIZE = 4_096 + + def initialize(zos) + @zos = zos + @buffer = String.new(capacity: BUFFER_SIZE * 2) + end + + # Create a temporary directory for writing files to. + # + # The directory and its contents are removed at the end of the block. + def self.open(file_name, encrypter = nil, &block) + Zip::OutputStream.open(file_name, encrypter) do |zos| + bzos = new(zos) + block.call(bzos) + ensure + bzos.flush if bzos + end + end + + def self.write_buffer(io = ::StringIO.new, encrypter = nil, &block) + Zip::OutputStream.write_buffer(io, encrypter) do |zos| + bzos = new(zos) + block.call(bzos) + ensure + bzos.flush if bzos + end + end + + # Closes the current entry and opens a new for writing. + def put_next_entry(entry) + flush + @zos.put_next_entry(entry) + end + + # Write to a buffer that will be written to the current entry + def write(content) + @buffer << content.to_s + flush if @buffer.size > BUFFER_SIZE + self + end + alias << write + + def flush + return if @buffer.size == 0 + + @zos << @buffer + @buffer.clear + end + end +end -- cgit v1.2.3