diff options
| author | Ryan Winograd <[email protected]> | 2020-08-13 21:05:59 -0500 |
|---|---|---|
| committer | Ryan Winograd <[email protected]> | 2020-08-19 14:59:17 -0500 |
| commit | e7673f431813cde7aab2d032a4417d4ddeeb9342 (patch) | |
| tree | 95d2758cd969c9c4b7c3d38a887ae75a65185f7e | |
| parent | 33a53474a90f1825ce20c66dab481fdcfa1106bf (diff) | |
| download | caxlsx-e7673f431813cde7aab2d032a4417d4ddeeb9342.tar.gz caxlsx-e7673f431813cde7aab2d032a4417d4ddeeb9342.zip | |
Add option to `#serialize` with system zip command
Add a `:zip_command` option to `Axlsx::Package#serialize` that allows
the user to specify an alternate command to use to perform the zip
operation on the XLSX file contents.
The default zip operation is provided by RubyZip. On large documents
users may experience faster zip times using a zip binary.
Resolves #55
| -rw-r--r-- | CHANGELOG.md | 1 | ||||
| -rw-r--r-- | lib/axlsx.rb | 1 | ||||
| -rw-r--r-- | lib/axlsx/package.rb | 21 | ||||
| -rw-r--r-- | lib/axlsx/util/zip_command.rb | 73 | ||||
| -rw-r--r-- | test/tc_package.rb | 27 |
5 files changed, 117 insertions, 6 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fb3a588..af3432c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ CHANGELOG --------- - **Unreleased** + - [PR #56](https://github.com/caxlsx/caxlsx/pull/56) - Add `zip_command` option to `#serialize` for faster serialization of large Excel files by using a zip binary - [PR #54](https://github.com/caxlsx/caxlsx/pull/54) - Fix type detection for floats with out-of-rage exponents - **July.16.20**: 3.0.2 diff --git a/lib/axlsx.rb b/lib/axlsx.rb index e60660b6..be4a2cd5 100644 --- a/lib/axlsx.rb +++ b/lib/axlsx.rb @@ -10,6 +10,7 @@ require 'axlsx/util/accessors.rb' require 'axlsx/util/serialized_attributes' require 'axlsx/util/options_parser' require 'axlsx/util/mime_type_utils' +require 'axlsx/util/zip_command' require 'axlsx/stylesheet/styles.rb' diff --git a/lib/axlsx/package.rb b/lib/axlsx/package.rb index 124375d4..3afb6f8d 100644 --- a/lib/axlsx/package.rb +++ b/lib/axlsx/package.rb @@ -75,6 +75,9 @@ module Axlsx # # @param [String] output The name of the file you want to serialize your package to # @param [Boolean] confirm_valid Validate the package prior to serialization. + # @param [String, nil] zip_command When `nil`, `#serialize` with RubyZip to + # zip the XLSX file contents. When a String, the provided zip command (e.g., + # "zip") is used to zip the file contents (may be faster for large files) # @return [Boolean] False if confirm_valid and validation errors exist. True if the package was serialized # @note A tremendous amount of effort has gone into ensuring that you cannot create invalid xlsx documents. # confirm_valid should be used in the rare case that you cannot open the serialized file. @@ -88,13 +91,23 @@ module Axlsx # # ......add cool stuff to your workbook...... # p.serialize("example.xlsx") # + # # Serialize to a file, using a system zip binary + # p.serialize("example.xlsx", false, zip_command: "zip") + # p.serialize("example.xlsx", false, zip_command: "/path/to/zip") + # p.serialize("example.xlsx", false, zip_command: "zip -1") + # # # Serialize to a stream # s = p.to_stream() # File.open('example_streamed.xlsx', 'w') { |f| f.write(s.read) } - def serialize(output, confirm_valid=false) + def serialize(output, confirm_valid=false, zip_command: nil) return false unless !confirm_valid || self.validate.empty? + zip_provider = if zip_command + ZipCommand.new(zip_command) + else + Zip::OutputStream + end Relationship.initialize_ids_cache - Zip::OutputStream.open(output) do |zip| + zip_provider.open(output) do |zip| write_parts(zip) end true @@ -153,8 +166,8 @@ module Axlsx private # Writes the package parts to a zip archive. - # @param [Zip::OutputStream] zip - # @return [Zip::OutputStream] + # @param [Zip::OutputStream, ZipCommand] zip + # @return [Zip::OutputStream, ZipCommand] def write_parts(zip) p = parts p.each do |part| diff --git a/lib/axlsx/util/zip_command.rb b/lib/axlsx/util/zip_command.rb new file mode 100644 index 00000000..fb336209 --- /dev/null +++ b/lib/axlsx/util/zip_command.rb @@ -0,0 +1,73 @@ +# encoding: UTF-8 +require 'open3' +require 'shellwords' + +module Axlsx + + # The ZipCommand class supports zipping the Excel file contents using + # a binary zip program instead of RubyZip's `Zip::OutputStream`. + # + # The methods provided here mimic `Zip::OutputStream` so that `ZipCommand` can + # be used as a drop-in replacement. Note that method signatures are not + # identical to `Zip::OutputStream`, they are only sufficiently close so that + # `ZipCommand` and `Zip::OutputStream` can be interchangeably used within + # `caxlsx`. + class ZipCommand + # Raised when the zip command exits with a non-zero status. + class ZipError < StandardError; end + + def initialize(zip_command) + @current_file = nil + @files = [] + @zip_command = zip_command + end + + # Create a temporary directory for writing files to. + # + # The directory and its contents are removed at the end of the block. + def open(output, &block) + Dir.mktmpdir do |dir| + @dir = dir + block.call(self) + write_file + zip_parts(output) + end + end + + # Closes the current entry and opens a new for writing. + def put_next_entry(entry) + write_file + @current_file = "#{@dir}/#{entry.name}" + @files << entry.name + FileUtils.mkdir_p(File.dirname(@current_file)) + end + + # Write to a buffer that will be written to the current entry + def write(content) + @buffer << content + end + alias << write + + private + + def write_file + if @current_file + @buffer.rewind + File.open(@current_file, "wb") { |f| f.write @buffer.read } + end + @current_file = nil + @buffer = StringIO.new + end + + def zip_parts(output) + output = Shellwords.shellescape(File.absolute_path(output)) + inputs = Shellwords.shelljoin(@files) + escaped_dir = Shellwords.shellescape(@dir) + command = "cd #{escaped_dir} && #{@zip_command} #{output} #{inputs}" + stdout_and_stderr, status = Open3.capture2e(command) + if !status.success? + raise(ZipError.new(stdout_and_stderr)) + end + end + end +end diff --git a/test/tc_package.rb b/test/tc_package.rb index 23078862..18c08f5c 100644 --- a/test/tc_package.rb +++ b/test/tc_package.rb @@ -128,11 +128,34 @@ class TestPackage < Test::Unit::TestCase def test_serialization @package.serialize(@fname) - zf = Zip::File.open(@fname) - @package.send(:parts).each{ |part| zf.get_entry(part[:entry]) } + assert_zip_file_matches_package(@fname, @package) File.delete(@fname) end + def test_serialization_with_zip_command + @package.serialize(@fname, false, zip_command: "zip") + assert_zip_file_matches_package(@fname, @package) + File.delete(@fname) + end + + def test_serialization_with_zip_command_and_absolute_path + fname = "#{Dir.tmpdir}/#{@fname}" + @package.serialize(fname, false, zip_command: "zip") + assert_zip_file_matches_package(fname, @package) + File.delete(fname) + end + + def test_serialization_with_invalid_zip_command + assert_raises Axlsx::ZipCommand::ZipError do + @package.serialize(@fname, false, zip_command: "invalid_zip") + end + end + + def assert_zip_file_matches_package(fname, package) + zf = Zip::File.open(fname) + package.send(:parts).each{ |part| zf.get_entry(part[:entry]) } + end + # See comment for Package#zip_entry_for_part def test_serialization_creates_identical_files_at_any_time_if_created_at_is_set @package.core.created = Time.now |
