summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRyan Winograd <[email protected]>2020-08-13 21:05:59 -0500
committerRyan Winograd <[email protected]>2020-08-19 14:59:17 -0500
commite7673f431813cde7aab2d032a4417d4ddeeb9342 (patch)
tree95d2758cd969c9c4b7c3d38a887ae75a65185f7e
parent33a53474a90f1825ce20c66dab481fdcfa1106bf (diff)
downloadcaxlsx-e7673f431813cde7aab2d032a4417d4ddeeb9342.tar.gz
caxlsx-e7673f431813cde7aab2d032a4417d4ddeeb9342.zip
Add option to `#serialize` with system zip command
Add a `:zip_command` option to `Axlsx::Package#serialize` that allows the user to specify an alternate command to use to perform the zip operation on the XLSX file contents. The default zip operation is provided by RubyZip. On large documents users may experience faster zip times using a zip binary. Resolves #55
-rw-r--r--CHANGELOG.md1
-rw-r--r--lib/axlsx.rb1
-rw-r--r--lib/axlsx/package.rb21
-rw-r--r--lib/axlsx/util/zip_command.rb73
-rw-r--r--test/tc_package.rb27
5 files changed, 117 insertions, 6 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1fb3a588..af3432c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@ CHANGELOG
---------
- **Unreleased**
+ - [PR #56](https://github.com/caxlsx/caxlsx/pull/56) - Add `zip_command` option to `#serialize` for faster serialization of large Excel files by using a zip binary
- [PR #54](https://github.com/caxlsx/caxlsx/pull/54) - Fix type detection for floats with out-of-rage exponents
- **July.16.20**: 3.0.2
diff --git a/lib/axlsx.rb b/lib/axlsx.rb
index e60660b6..be4a2cd5 100644
--- a/lib/axlsx.rb
+++ b/lib/axlsx.rb
@@ -10,6 +10,7 @@ require 'axlsx/util/accessors.rb'
require 'axlsx/util/serialized_attributes'
require 'axlsx/util/options_parser'
require 'axlsx/util/mime_type_utils'
+require 'axlsx/util/zip_command'
require 'axlsx/stylesheet/styles.rb'
diff --git a/lib/axlsx/package.rb b/lib/axlsx/package.rb
index 124375d4..3afb6f8d 100644
--- a/lib/axlsx/package.rb
+++ b/lib/axlsx/package.rb
@@ -75,6 +75,9 @@ module Axlsx
#
# @param [String] output The name of the file you want to serialize your package to
# @param [Boolean] confirm_valid Validate the package prior to serialization.
+ # @param [String, nil] zip_command When `nil`, `#serialize` with RubyZip to
+ # zip the XLSX file contents. When a String, the provided zip command (e.g.,
+ # "zip") is used to zip the file contents (may be faster for large files)
# @return [Boolean] False if confirm_valid and validation errors exist. True if the package was serialized
# @note A tremendous amount of effort has gone into ensuring that you cannot create invalid xlsx documents.
# confirm_valid should be used in the rare case that you cannot open the serialized file.
@@ -88,13 +91,23 @@ module Axlsx
# # ......add cool stuff to your workbook......
# p.serialize("example.xlsx")
#
+ # # Serialize to a file, using a system zip binary
+ # p.serialize("example.xlsx", false, zip_command: "zip")
+ # p.serialize("example.xlsx", false, zip_command: "/path/to/zip")
+ # p.serialize("example.xlsx", false, zip_command: "zip -1")
+ #
# # Serialize to a stream
# s = p.to_stream()
# File.open('example_streamed.xlsx', 'w') { |f| f.write(s.read) }
- def serialize(output, confirm_valid=false)
+ def serialize(output, confirm_valid=false, zip_command: nil)
return false unless !confirm_valid || self.validate.empty?
+ zip_provider = if zip_command
+ ZipCommand.new(zip_command)
+ else
+ Zip::OutputStream
+ end
Relationship.initialize_ids_cache
- Zip::OutputStream.open(output) do |zip|
+ zip_provider.open(output) do |zip|
write_parts(zip)
end
true
@@ -153,8 +166,8 @@ module Axlsx
private
# Writes the package parts to a zip archive.
- # @param [Zip::OutputStream] zip
- # @return [Zip::OutputStream]
+ # @param [Zip::OutputStream, ZipCommand] zip
+ # @return [Zip::OutputStream, ZipCommand]
def write_parts(zip)
p = parts
p.each do |part|
diff --git a/lib/axlsx/util/zip_command.rb b/lib/axlsx/util/zip_command.rb
new file mode 100644
index 00000000..fb336209
--- /dev/null
+++ b/lib/axlsx/util/zip_command.rb
@@ -0,0 +1,73 @@
+# encoding: UTF-8
+require 'open3'
+require 'shellwords'
+
+module Axlsx
+
+ # The ZipCommand class supports zipping the Excel file contents using
+ # a binary zip program instead of RubyZip's `Zip::OutputStream`.
+ #
+ # The methods provided here mimic `Zip::OutputStream` so that `ZipCommand` can
+ # be used as a drop-in replacement. Note that method signatures are not
+ # identical to `Zip::OutputStream`, they are only sufficiently close so that
+ # `ZipCommand` and `Zip::OutputStream` can be interchangeably used within
+ # `caxlsx`.
+ class ZipCommand
+ # Raised when the zip command exits with a non-zero status.
+ class ZipError < StandardError; end
+
+ def initialize(zip_command)
+ @current_file = nil
+ @files = []
+ @zip_command = zip_command
+ end
+
+ # Create a temporary directory for writing files to.
+ #
+ # The directory and its contents are removed at the end of the block.
+ def open(output, &block)
+ Dir.mktmpdir do |dir|
+ @dir = dir
+ block.call(self)
+ write_file
+ zip_parts(output)
+ end
+ end
+
+ # Closes the current entry and opens a new for writing.
+ def put_next_entry(entry)
+ write_file
+ @current_file = "#{@dir}/#{entry.name}"
+ @files << entry.name
+ FileUtils.mkdir_p(File.dirname(@current_file))
+ end
+
+ # Write to a buffer that will be written to the current entry
+ def write(content)
+ @buffer << content
+ end
+ alias << write
+
+ private
+
+ def write_file
+ if @current_file
+ @buffer.rewind
+ File.open(@current_file, "wb") { |f| f.write @buffer.read }
+ end
+ @current_file = nil
+ @buffer = StringIO.new
+ end
+
+ def zip_parts(output)
+ output = Shellwords.shellescape(File.absolute_path(output))
+ inputs = Shellwords.shelljoin(@files)
+ escaped_dir = Shellwords.shellescape(@dir)
+ command = "cd #{escaped_dir} && #{@zip_command} #{output} #{inputs}"
+ stdout_and_stderr, status = Open3.capture2e(command)
+ if !status.success?
+ raise(ZipError.new(stdout_and_stderr))
+ end
+ end
+ end
+end
diff --git a/test/tc_package.rb b/test/tc_package.rb
index 23078862..18c08f5c 100644
--- a/test/tc_package.rb
+++ b/test/tc_package.rb
@@ -128,11 +128,34 @@ class TestPackage < Test::Unit::TestCase
def test_serialization
@package.serialize(@fname)
- zf = Zip::File.open(@fname)
- @package.send(:parts).each{ |part| zf.get_entry(part[:entry]) }
+ assert_zip_file_matches_package(@fname, @package)
File.delete(@fname)
end
+ def test_serialization_with_zip_command
+ @package.serialize(@fname, false, zip_command: "zip")
+ assert_zip_file_matches_package(@fname, @package)
+ File.delete(@fname)
+ end
+
+ def test_serialization_with_zip_command_and_absolute_path
+ fname = "#{Dir.tmpdir}/#{@fname}"
+ @package.serialize(fname, false, zip_command: "zip")
+ assert_zip_file_matches_package(fname, @package)
+ File.delete(fname)
+ end
+
+ def test_serialization_with_invalid_zip_command
+ assert_raises Axlsx::ZipCommand::ZipError do
+ @package.serialize(@fname, false, zip_command: "invalid_zip")
+ end
+ end
+
+ def assert_zip_file_matches_package(fname, package)
+ zf = Zip::File.open(fname)
+ package.send(:parts).each{ |part| zf.get_entry(part[:entry]) }
+ end
+
# See comment for Package#zip_entry_for_part
def test_serialization_creates_identical_files_at_any_time_if_created_at_is_set
@package.core.created = Time.now