From 992ba476a95136eaad5b9b208d4ca5a1ca31324d Mon Sep 17 00:00:00 2001
From: dearblue <dearblue@users.noreply.github.com>
Date: Sun, 15 Sep 2019 23:50:24 +0900
Subject: Fix broken UTF-8 characters by `IO#getc`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Character (multi-byte UTF-8) is destroyed when character spanning
`IO::BUF_SIZE` (4096 bytes) exist.

- Prepare file:

  ```ruby
  File.open("sample", "wb") { |f| f << "●" * 1370 }
  ```

- Before patched:

  ```ruby
  File.open("sample") { |f| a = []; while ch = f.getc; a << ch; end; p a }
  # => ["●", "●", ..., "●", "\xe2", "\x97", "\x8f", "●", "●", "●", "●"]

- After patched:

  ```ruby
  File.open("sample") { |f| a = []; while ch = f.getc; a << ch; end; p a }
  # => ["●", "●", ..., "●", "●", "●", "●", "●", "●"]
---
 mrbgems/mruby-io/mrblib/io.rb | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/mrbgems/mruby-io/mrblib/io.rb b/mrbgems/mruby-io/mrblib/io.rb
index 32bac1f0d..6b83644ef 100644
--- a/mrbgems/mruby-io/mrblib/io.rb
+++ b/mrbgems/mruby-io/mrblib/io.rb
@@ -170,8 +170,14 @@ class IO
   end
 
   def _read_buf
-    return @buf if @buf && @buf.bytesize > 0
-    @buf = sysread(BUF_SIZE)
+    return @buf if @buf && @buf.bytesize >= 4 # maximum UTF-8 character is 4 bytes
+    @buf ||= ""
+    begin
+      @buf += sysread(BUF_SIZE)
+    rescue EOFError => e
+      raise e if @buf.empty?
+    end
+    @buf
   end
 
   def ungetc(substr)
-- 
cgit v1.2.3