Used to translate an offset from bytes to characters, for instance one
received from a regular expression match
# File activesupport/lib/active_support/multibyte/handlers/utf8_handler.rb, line 238
def translate_offset(str, byte_offset)
return 0 if str == ''
return nil if byte_offset.nil?
chunk = str[0..byte_offset]
begin
begin
chunk.unpack('U*').length - 1
rescue ArgumentError => e
chunk = str[0..(byte_offset+=1)]
# Stop retrying at the end of the string
raise e unless byte_offset < chunk.length
# We damaged a character, retry
retry
end
# Catch the ArgumentError so we can throw our own
rescue ArgumentError
raise EncodingError.new('malformed UTF-8 character')
end
end