def tokenize input
s = StringScanner.new input
@line = 0
@line_pos = 0
until s.eos? do
pos = s.pos
@tokens << case
when s.scan(/\r?\n/) then
token = [:NEWLINE, s.matched, *token_pos(pos)]
@line_pos = s.pos
@line += 1
token
when s.scan(/ +/) then
[:INDENT, s.matched_size, *token_pos(pos)]
when s.scan(/(=+)\s*/) then
level = s[1].length
level = 6 if level > 6
@tokens << [:HEADER, level, *token_pos(pos)]
pos = s.pos
s.scan(/.*/)
[:TEXT, s.matched, *token_pos(pos)]
when s.scan(/^(-{3,}) *$/) then
[:RULE, s[1].length - 2, *token_pos(pos)]
when s.scan(/([*-])\s+/) then
@tokens << [:BULLET, :BULLET, *token_pos(pos)]
[:SPACE, s.matched_size, *token_pos(pos)]
when s.scan(/([a-z]|\d+)\.[ \t]+\S/) then
list_label = s[1]
width = s.matched_size - 1
s.pos -= 1
list_type = case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
@tokens << [list_type, list_label, *token_pos(pos)]
[:SPACE, width, *token_pos(pos)]
when s.scan(/\[(.*?)\]( +|$)/) then
@tokens << [:LABEL, s[1], *token_pos(pos)]
[:SPACE, s.matched_size, *token_pos(pos)]
when s.scan(/(.*?)::( +|$)/) then
@tokens << [:NOTE, s[1], *token_pos(pos)]
[:SPACE, s.matched_size, *token_pos(pos)]
else s.scan(/.*/)
[:TEXT, s.matched, *token_pos(pos)]
end
end
self
end