diff options
| author | dearblue <[email protected]> | 2019-09-15 23:50:24 +0900 |
|---|---|---|
| committer | dearblue <[email protected]> | 2019-09-16 00:26:41 +0900 |
| commit | 992ba476a95136eaad5b9b208d4ca5a1ca31324d (patch) | |
| tree | 5a5959fca27b3ae2f3fafcae6afa766463c94983 | |
| parent | 7cc8c7d2fff9b0dd629c8c614c4b066a4f490de4 (diff) | |
| download | mruby-992ba476a95136eaad5b9b208d4ca5a1ca31324d.tar.gz mruby-992ba476a95136eaad5b9b208d4ca5a1ca31324d.zip | |
Fix broken UTF-8 characters by `IO#getc`
Character (multi-byte UTF-8) is destroyed when character spanning
`IO::BUF_SIZE` (4096 bytes) exist.
- Prepare file:
```ruby
File.open("sample", "wb") { |f| f << "●" * 1370 }
```
- Before patched:
```ruby
File.open("sample") { |f| a = []; while ch = f.getc; a << ch; end; p a }
# => ["●", "●", ..., "●", "\xe2", "\x97", "\x8f", "●", "●", "●", "●"]
- After patched:
```ruby
File.open("sample") { |f| a = []; while ch = f.getc; a << ch; end; p a }
# => ["●", "●", ..., "●", "●", "●", "●", "●", "●"]
| -rw-r--r-- | mrbgems/mruby-io/mrblib/io.rb | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/mrbgems/mruby-io/mrblib/io.rb b/mrbgems/mruby-io/mrblib/io.rb index 32bac1f0d..6b83644ef 100644 --- a/mrbgems/mruby-io/mrblib/io.rb +++ b/mrbgems/mruby-io/mrblib/io.rb @@ -170,8 +170,14 @@ class IO end def _read_buf - return @buf if @buf && @buf.bytesize > 0 - @buf = sysread(BUF_SIZE) + return @buf if @buf && @buf.bytesize >= 4 # maximum UTF-8 character is 4 bytes + @buf ||= "" + begin + @buf += sysread(BUF_SIZE) + rescue EOFError => e + raise e if @buf.empty? + end + @buf end def ungetc(substr) |
