read_file(filename, encoding, force_transcode = false)
public
Reads the contents of filename and handles any encoding directives
in the file.
The content will be converted to the encoding. If the file cannot
be converted a warning will be printed and nil will be returned.
If force_transcode is true the document will be transcoded and any
unknown character in the target encoding will be replaced with ‘?’
Show source
def self.read_file filename, encoding, force_transcode = false
content = File.open filename, "rb" do |f| f.read end
content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
enc = RDoc::Encoding.detect_encoding content
content = RDoc::Encoding.change_encoding content, enc if enc
begin
encoding ||= Encoding.default_external
orig_encoding = content.encoding
if not orig_encoding.ascii_compatible? then
content = content.encode encoding
elsif utf8 then
content = RDoc::Encoding.change_encoding content, Encoding::UTF_8
content = content.encode encoding
else
content = RDoc::Encoding.change_encoding content, encoding
end
unless content.valid_encoding? then
content = RDoc::Encoding.change_encoding content, orig_encoding
content = content.encode encoding
end
unless content.valid_encoding? then
warn "unable to convert #{filename} to #{encoding}, skipping"
content = nil
end
rescue Encoding::InvalidByteSequenceError,
Encoding::UndefinedConversionError => e
if force_transcode then
content = RDoc::Encoding.change_encoding content, orig_encoding
content = content.encode(encoding,
:invalid => :replace,
:undef => :replace,
:replace => '?')
return content
else
warn "unable to convert #{e.message} for #{filename}, skipping"
return nil
end
end
content
rescue ArgumentError => e
raise unless e.message =~ /unknown encoding name - (.*)/
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
nil
rescue Errno::EISDIR, Errno::ENOENT
nil
end