Method deprecated or moved
This method is deprecated or moved on the latest stable version. The last existing version (v2_2_9) is shown here.
tokenize(input)public
Turns text input into a stream of tokens
# File lib/rdoc/markup/parser.rb, line 456
def tokenize input
setup_scanner input
until @s.eos? do
pos = @s.pos
# leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file
next if @s.scan(/ +/)
# note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token
@tokens << case
# [CR]LF => :NEWLINE
when @s.scan(/\r?\n/) then
token = [:NEWLINE, @s.matched, *token_pos(pos)]
@line_pos = char_pos @s.pos
@line += 1
token
# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/) then
level = @s[1].length
header = [:HEADER, level, *token_pos(pos)]
if @s[2] =~ /^\r?\n/ then
@s.pos -= @s[2].length
header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
[:TEXT, @s.matched.sub(/\r$/, ''), *token_pos(pos)]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/) then
[:RULE, @s[1].length - 2, *token_pos(pos)]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(\S)/) then
@s.pos -= @s[2].bytesize # unget \S
[:BULLET, @s[1], *token_pos(pos)]
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(\S)/) then
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
@s.pos -= @s[2].bytesize # unget \S
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *token_pos(pos)]
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
[:LABEL, @s[1], *token_pos(pos)]
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/) then
[:NOTE, @s[1], *token_pos(pos)]
# anything else: :TEXT
else @s.scan(/(.*?)( )?\r?$/)
token = [:TEXT, @s[1], *token_pos(pos)]
if @s[2] then
@tokens << token
[:BREAK, @s[2], *token_pos(pos + @s[1].length)]
else
token
end
end
end
self
end Related methods
- Instance methods
- build_heading
- build_list
- build_paragraph
- build_verbatim
- char_pos
- get
- parse
- parse_text
- peek_token
- setup_scanner
- skip
- token_pos
- tokenize
- unget
- Class methods
- new
- parse
- tokenize