Using the Pull Parser
This API is experimental, and subject to change.
parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) while parser.has_next? res = parser.next puts res[1]['att'] if res.start_tag? and res[0] == 'b' end
See the PullEvent class for information on the content of the results. The data is identical to the arguments passed for the various events to the StreamListener API.
Notice that:
parser = PullParser.new( "<a>BAD DOCUMENT" ) while parser.has_next? res = parser.next raise res[1] if res.error? end
Nat Price gave me some good ideas for the API.
Constants
ATTDEF = "\\\\s+#{NAME}\\\\s+#{ATTTYPE}\\\\s+#{DEFAULTDECL}"
ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_PATTERN = /\\A\\s*<!ATTLIST\\s+#{NAME}(?:#{ATTDEF})*\\s*>/um
ATTLISTDECL_START = /\\A\\s*<!ATTLIST/um
ATTRIBUTE_PATTERN = /\\s*(#{NAME_STR})\\s*=\\s*(["'])(.*?)\\4/um
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
ATTVALUE = "(?:\\"((?:[^<&\\"]|#{REFERENCE})*)\\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
CDATA_END = /\\A\\s*\\]\\s*>/um
CDATA_PATTERN = /<!\\[CDATA\\[(.*?)\\]\\]>/um
CDATA_START = /\\A<!\\[CDATA\\[/u
CLOSE_MATCH = /^\\s*<\\/(#{NAME_STR})\\s*>/um
COMBININGCHAR = ''
COMMENT_PATTERN = /<!--(.*?)-->/um
COMMENT_START = /\\A<!--/u
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\\\s+)?#{ATTVALUE}))"
DEFAULT_ENTITIES = {\n'gt' => [/>/, '>', '>', />/],\n'lt' => [/</, '<', '<', /</],\n'quot' => [/"/, '"', '"', /"/],\n"apos" => [/'/, "'", "'", /'/]\n}
DIGIT = '[:digit:]'
DOCTYPE_END = /\\A\\s*\\]\\s*>/um
DOCTYPE_PATTERN = /\\s*<!DOCTYPE\\s+(.*?)(\\[|>)/um
DOCTYPE_START = /\\A\\s*<!DOCTYPE\\s/um
ELEMENTDECL_PATTERN = /\\A\\s*(<!ELEMENT.*?)>/um
ELEMENTDECL_START = /\\A\\s*<!ELEMENT/um
ENCODING = /\\bencoding\\s*=\\s*["'](.*?)['"]/um
ENTITYDECL = /\\s*(?:#{GEDECL})|(?:#{PEDECL})/um
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
ENTITY_START = /\\A\\s*<!ENTITY/
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
ENUMERATION = "\\\\(\\\\s*#{NMTOKEN}(?:\\\\s*\\\\|\\\\s*#{NMTOKEN})*\\\\s*\\\\)"
EREFERENCE = /&(?!#{NAME};)/
EXTENDER = ''
EXTERNALID = "(?:(?:(SYSTEM)\\\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\\\s+#{PUBIDLITERAL}\\\\s+#{SYSTEMLITERAL}))"
GEDECL = "<!ENTITY\\\\s+#{NAME}\\\\s+#{ENTITYDEF}\\\\s*>"
IDENTITY = /^([!\\*\\w\\-]+)(\\s+#{NCNAME_STR})?(\\s+["'](.*?)['"])?(\\s+['"](.*?)["'])?/u
INSTRUCTION_PATTERN = /<\\?(.*?)(\\s+.*?)?\\?>/um
INSTRUCTION_START = /\\A<\\?/u
LETTER = '[:alpha:]'
MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\\s+#{NAME_STR}\\s*=\\s*[^"']/um
NAME = "([\\\\w:]#{NAMECHAR}*)"
NAMECHAR = '[\\-\\w\\.:]'
NAME_STR = "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
NCNAME_STR = "[#{LETTER}_:][-[:alnum:]._:#{COMBININGCHAR}#{EXTENDER}]*"
NDATADECL = "\\\\s+NDATA\\\\s+#{NAME}"
NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\\\s+#{NMTOKEN})*"
NOTATIONDECL_START = /\\A\\s*<!NOTATION/um
NOTATIONTYPE = "NOTATION\\\\s+\\\\(\\\\s*#{NAME}(?:\\\\s*\\\\|\\\\s*#{NAME})*\\\\s*\\\\)"
PEDECL = "<!ENTITY\\\\s+(%)\\\\s+#{NAME}\\\\s+#{PEDEF}\\\\s*>"
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
PEREFERENCE = "%#{NAME};"
PUBIDCHAR = "\\x20\\x0D\\x0Aa-zA-Z0-9\\\\-()+,./:=?;!*@$_%#"
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
PUBLIC = /\\A\\s*<!NOTATION\\s+(\\w[\\-\\w]*)\\s+(PUBLIC)\\s+(["'])(.*?)\\3(?:\\s+(["'])(.*?)\\5)?\\s*>/um
REFERENCE = "&(?:#{NAME};|#\\\\d+;|#x[0-9a-fA-F]+;)"
REFERENCE_RE = /#{REFERENCE}/
STANDALONE = /\\bstandalone\\s*=\\s*["'](.*?)['"]/um
SYSTEM = /\\A\\s*<!NOTATION\\s+(\\w[\\-\\w]*)\\s+(SYSTEM)\\s+(["'])(.*?)\\3\\s*>/um
SYSTEMENTITY = /\\A\\s*(%.*?;)\\s*$/um
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
TAG_MATCH = /^<((?>#{NAME_STR}))\\s*((?>\\s+#{UNAME_STR}\\s*=\\s*(["']).*?\\5)*)\\s*(\\/)?>/um
TEXT_PATTERN = /\\A([^<]*)/um
UNAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
VERSION = /\\bversion\\s*=\\s*["'](.*?)['"]/um
XMLDECL_PATTERN = /<\\?xml\\s+(.*?)\\?>/um
XMLDECL_START = /\\A<\\?xml\\s/u;
Attributes
| [R] | source |
Files
- lib/rexml/parsers/baseparser.rb