Repositorio del curso CCOM4030 el semestre B91 del proyecto kilometro0

sax.js 43KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563
  1. ;(function (sax) { // wrapper for non-node envs
  2. sax.parser = function (strict, opt) { return new SAXParser(strict, opt) }
  3. sax.SAXParser = SAXParser
  4. sax.SAXStream = SAXStream
  5. sax.createStream = createStream
  6. // When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns.
  7. // When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)),
  8. // since that's the earliest that a buffer overrun could occur. This way, checks are
  9. // as rare as required, but as often as necessary to ensure never crossing this bound.
  10. // Furthermore, buffers are only tested at most once per write(), so passing a very
  11. // large string into write() might have undesirable effects, but this is manageable by
  12. // the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme
  13. // edge case, result in creating at most one complete copy of the string passed in.
  14. // Set to Infinity to have unlimited buffers.
  15. sax.MAX_BUFFER_LENGTH = 64 * 1024
  16. var buffers = [
  17. 'comment', 'sgmlDecl', 'textNode', 'tagName', 'doctype',
  18. 'procInstName', 'procInstBody', 'entity', 'attribName',
  19. 'attribValue', 'cdata', 'script'
  20. ]
  21. sax.EVENTS = [
  22. 'text',
  23. 'processinginstruction',
  24. 'sgmldeclaration',
  25. 'doctype',
  26. 'comment',
  27. 'attribute',
  28. 'opentag',
  29. 'closetag',
  30. 'opencdata',
  31. 'cdata',
  32. 'closecdata',
  33. 'error',
  34. 'end',
  35. 'ready',
  36. 'script',
  37. 'opennamespace',
  38. 'closenamespace'
  39. ]
  40. function SAXParser (strict, opt) {
  41. if (!(this instanceof SAXParser)) {
  42. return new SAXParser(strict, opt)
  43. }
  44. var parser = this
  45. clearBuffers(parser)
  46. parser.q = parser.c = ''
  47. parser.bufferCheckPosition = sax.MAX_BUFFER_LENGTH
  48. parser.opt = opt || {}
  49. parser.opt.lowercase = parser.opt.lowercase || parser.opt.lowercasetags
  50. parser.looseCase = parser.opt.lowercase ? 'toLowerCase' : 'toUpperCase'
  51. parser.tags = []
  52. parser.closed = parser.closedRoot = parser.sawRoot = false
  53. parser.tag = parser.error = null
  54. parser.strict = !!strict
  55. parser.noscript = !!(strict || parser.opt.noscript)
  56. parser.state = S.BEGIN
  57. parser.strictEntities = parser.opt.strictEntities
  58. parser.ENTITIES = parser.strictEntities ? Object.create(sax.XML_ENTITIES) : Object.create(sax.ENTITIES)
  59. parser.attribList = []
  60. // namespaces form a prototype chain.
  61. // it always points at the current tag,
  62. // which protos to its parent tag.
  63. if (parser.opt.xmlns) {
  64. parser.ns = Object.create(rootNS)
  65. }
  66. // mostly just for error reporting
  67. parser.trackPosition = parser.opt.position !== false
  68. if (parser.trackPosition) {
  69. parser.position = parser.line = parser.column = 0
  70. }
  71. emit(parser, 'onready')
  72. }
  73. if (!Object.create) {
  74. Object.create = function (o) {
  75. function F () {}
  76. F.prototype = o
  77. var newf = new F()
  78. return newf
  79. }
  80. }
  81. if (!Object.keys) {
  82. Object.keys = function (o) {
  83. var a = []
  84. for (var i in o) if (o.hasOwnProperty(i)) a.push(i)
  85. return a
  86. }
  87. }
  88. function checkBufferLength (parser) {
  89. var maxAllowed = Math.max(sax.MAX_BUFFER_LENGTH, 10)
  90. var maxActual = 0
  91. for (var i = 0, l = buffers.length; i < l; i++) {
  92. var len = parser[buffers[i]].length
  93. if (len > maxAllowed) {
  94. // Text/cdata nodes can get big, and since they're buffered,
  95. // we can get here under normal conditions.
  96. // Avoid issues by emitting the text node now,
  97. // so at least it won't get any bigger.
  98. switch (buffers[i]) {
  99. case 'textNode':
  100. closeText(parser)
  101. break
  102. case 'cdata':
  103. emitNode(parser, 'oncdata', parser.cdata)
  104. parser.cdata = ''
  105. break
  106. case 'script':
  107. emitNode(parser, 'onscript', parser.script)
  108. parser.script = ''
  109. break
  110. default:
  111. error(parser, 'Max buffer length exceeded: ' + buffers[i])
  112. }
  113. }
  114. maxActual = Math.max(maxActual, len)
  115. }
  116. // schedule the next check for the earliest possible buffer overrun.
  117. var m = sax.MAX_BUFFER_LENGTH - maxActual
  118. parser.bufferCheckPosition = m + parser.position
  119. }
  120. function clearBuffers (parser) {
  121. for (var i = 0, l = buffers.length; i < l; i++) {
  122. parser[buffers[i]] = ''
  123. }
  124. }
  125. function flushBuffers (parser) {
  126. closeText(parser)
  127. if (parser.cdata !== '') {
  128. emitNode(parser, 'oncdata', parser.cdata)
  129. parser.cdata = ''
  130. }
  131. if (parser.script !== '') {
  132. emitNode(parser, 'onscript', parser.script)
  133. parser.script = ''
  134. }
  135. }
  136. SAXParser.prototype = {
  137. end: function () { end(this) },
  138. write: write,
  139. resume: function () { this.error = null; return this },
  140. close: function () { return this.write(null) },
  141. flush: function () { flushBuffers(this) }
  142. }
  143. var Stream
  144. try {
  145. Stream = require('stream').Stream
  146. } catch (ex) {
  147. Stream = function () {}
  148. }
  149. var streamWraps = sax.EVENTS.filter(function (ev) {
  150. return ev !== 'error' && ev !== 'end'
  151. })
  152. function createStream (strict, opt) {
  153. return new SAXStream(strict, opt)
  154. }
  155. function SAXStream (strict, opt) {
  156. if (!(this instanceof SAXStream)) {
  157. return new SAXStream(strict, opt)
  158. }
  159. Stream.apply(this)
  160. this._parser = new SAXParser(strict, opt)
  161. this.writable = true
  162. this.readable = true
  163. var me = this
  164. this._parser.onend = function () {
  165. me.emit('end')
  166. }
  167. this._parser.onerror = function (er) {
  168. me.emit('error', er)
  169. // if didn't throw, then means error was handled.
  170. // go ahead and clear error, so we can write again.
  171. me._parser.error = null
  172. }
  173. this._decoder = null
  174. streamWraps.forEach(function (ev) {
  175. Object.defineProperty(me, 'on' + ev, {
  176. get: function () {
  177. return me._parser['on' + ev]
  178. },
  179. set: function (h) {
  180. if (!h) {
  181. me.removeAllListeners(ev)
  182. me._parser['on' + ev] = h
  183. return h
  184. }
  185. me.on(ev, h)
  186. },
  187. enumerable: true,
  188. configurable: false
  189. })
  190. })
  191. }
  192. SAXStream.prototype = Object.create(Stream.prototype, {
  193. constructor: {
  194. value: SAXStream
  195. }
  196. })
  197. SAXStream.prototype.write = function (data) {
  198. if (typeof Buffer === 'function' &&
  199. typeof Buffer.isBuffer === 'function' &&
  200. Buffer.isBuffer(data)) {
  201. if (!this._decoder) {
  202. var SD = require('string_decoder').StringDecoder
  203. this._decoder = new SD('utf8')
  204. }
  205. data = this._decoder.write(data)
  206. }
  207. this._parser.write(data.toString())
  208. this.emit('data', data)
  209. return true
  210. }
  211. SAXStream.prototype.end = function (chunk) {
  212. if (chunk && chunk.length) {
  213. this.write(chunk)
  214. }
  215. this._parser.end()
  216. return true
  217. }
  218. SAXStream.prototype.on = function (ev, handler) {
  219. var me = this
  220. if (!me._parser['on' + ev] && streamWraps.indexOf(ev) !== -1) {
  221. me._parser['on' + ev] = function () {
  222. var args = arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments)
  223. args.splice(0, 0, ev)
  224. me.emit.apply(me, args)
  225. }
  226. }
  227. return Stream.prototype.on.call(me, ev, handler)
  228. }
  229. // character classes and tokens
  230. var whitespace = '\r\n\t '
  231. // this really needs to be replaced with character classes.
  232. // XML allows all manner of ridiculous numbers and digits.
  233. var number = '0124356789'
  234. var letter = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  235. // (Letter | "_" | ":")
  236. var quote = '\'"'
  237. var attribEnd = whitespace + '>'
  238. var CDATA = '[CDATA['
  239. var DOCTYPE = 'DOCTYPE'
  240. var XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  241. var XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'
  242. var rootNS = { xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE }
  243. // turn all the string character sets into character class objects.
  244. whitespace = charClass(whitespace)
  245. number = charClass(number)
  246. letter = charClass(letter)
  247. // http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  248. // This implementation works on strings, a single character at a time
  249. // as such, it cannot ever support astral-plane characters (10000-EFFFF)
  250. // without a significant breaking change to either this parser, or the
  251. // JavaScript language. Implementation of an emoji-capable xml parser
  252. // is left as an exercise for the reader.
  253. var nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  254. var nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
  255. var entityStart = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/
  256. var entityBody = /[#:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/
  257. quote = charClass(quote)
  258. attribEnd = charClass(attribEnd)
  259. function charClass (str) {
  260. return str.split('').reduce(function (s, c) {
  261. s[c] = true
  262. return s
  263. }, {})
  264. }
  265. function isRegExp (c) {
  266. return Object.prototype.toString.call(c) === '[object RegExp]'
  267. }
  268. function is (charclass, c) {
  269. return isRegExp(charclass) ? !!c.match(charclass) : charclass[c]
  270. }
  271. function not (charclass, c) {
  272. return !is(charclass, c)
  273. }
  274. var S = 0
  275. sax.STATE = {
  276. BEGIN: S++, // leading byte order mark or whitespace
  277. BEGIN_WHITESPACE: S++, // leading whitespace
  278. TEXT: S++, // general stuff
  279. TEXT_ENTITY: S++, // &amp and such.
  280. OPEN_WAKA: S++, // <
  281. SGML_DECL: S++, // <!BLARG
  282. SGML_DECL_QUOTED: S++, // <!BLARG foo "bar
  283. DOCTYPE: S++, // <!DOCTYPE
  284. DOCTYPE_QUOTED: S++, // <!DOCTYPE "//blah
  285. DOCTYPE_DTD: S++, // <!DOCTYPE "//blah" [ ...
  286. DOCTYPE_DTD_QUOTED: S++, // <!DOCTYPE "//blah" [ "foo
  287. COMMENT_STARTING: S++, // <!-
  288. COMMENT: S++, // <!--
  289. COMMENT_ENDING: S++, // <!-- blah -
  290. COMMENT_ENDED: S++, // <!-- blah --
  291. CDATA: S++, // <![CDATA[ something
  292. CDATA_ENDING: S++, // ]
  293. CDATA_ENDING_2: S++, // ]]
  294. PROC_INST: S++, // <?hi
  295. PROC_INST_BODY: S++, // <?hi there
  296. PROC_INST_ENDING: S++, // <?hi "there" ?
  297. OPEN_TAG: S++, // <strong
  298. OPEN_TAG_SLASH: S++, // <strong /
  299. ATTRIB: S++, // <a
  300. ATTRIB_NAME: S++, // <a foo
  301. ATTRIB_NAME_SAW_WHITE: S++, // <a foo _
  302. ATTRIB_VALUE: S++, // <a foo=
  303. ATTRIB_VALUE_QUOTED: S++, // <a foo="bar
  304. ATTRIB_VALUE_CLOSED: S++, // <a foo="bar"
  305. ATTRIB_VALUE_UNQUOTED: S++, // <a foo=bar
  306. ATTRIB_VALUE_ENTITY_Q: S++, // <foo bar="&quot;"
  307. ATTRIB_VALUE_ENTITY_U: S++, // <foo bar=&quot
  308. CLOSE_TAG: S++, // </a
  309. CLOSE_TAG_SAW_WHITE: S++, // </a >
  310. SCRIPT: S++, // <script> ...
  311. SCRIPT_ENDING: S++ // <script> ... <
  312. }
  313. sax.XML_ENTITIES = {
  314. 'amp': '&',
  315. 'gt': '>',
  316. 'lt': '<',
  317. 'quot': '"',
  318. 'apos': "'"
  319. }
  320. sax.ENTITIES = {
  321. 'amp': '&',
  322. 'gt': '>',
  323. 'lt': '<',
  324. 'quot': '"',
  325. 'apos': "'",
  326. 'AElig': 198,
  327. 'Aacute': 193,
  328. 'Acirc': 194,
  329. 'Agrave': 192,
  330. 'Aring': 197,
  331. 'Atilde': 195,
  332. 'Auml': 196,
  333. 'Ccedil': 199,
  334. 'ETH': 208,
  335. 'Eacute': 201,
  336. 'Ecirc': 202,
  337. 'Egrave': 200,
  338. 'Euml': 203,
  339. 'Iacute': 205,
  340. 'Icirc': 206,
  341. 'Igrave': 204,
  342. 'Iuml': 207,
  343. 'Ntilde': 209,
  344. 'Oacute': 211,
  345. 'Ocirc': 212,
  346. 'Ograve': 210,
  347. 'Oslash': 216,
  348. 'Otilde': 213,
  349. 'Ouml': 214,
  350. 'THORN': 222,
  351. 'Uacute': 218,
  352. 'Ucirc': 219,
  353. 'Ugrave': 217,
  354. 'Uuml': 220,
  355. 'Yacute': 221,
  356. 'aacute': 225,
  357. 'acirc': 226,
  358. 'aelig': 230,
  359. 'agrave': 224,
  360. 'aring': 229,
  361. 'atilde': 227,
  362. 'auml': 228,
  363. 'ccedil': 231,
  364. 'eacute': 233,
  365. 'ecirc': 234,
  366. 'egrave': 232,
  367. 'eth': 240,
  368. 'euml': 235,
  369. 'iacute': 237,
  370. 'icirc': 238,
  371. 'igrave': 236,
  372. 'iuml': 239,
  373. 'ntilde': 241,
  374. 'oacute': 243,
  375. 'ocirc': 244,
  376. 'ograve': 242,
  377. 'oslash': 248,
  378. 'otilde': 245,
  379. 'ouml': 246,
  380. 'szlig': 223,
  381. 'thorn': 254,
  382. 'uacute': 250,
  383. 'ucirc': 251,
  384. 'ugrave': 249,
  385. 'uuml': 252,
  386. 'yacute': 253,
  387. 'yuml': 255,
  388. 'copy': 169,
  389. 'reg': 174,
  390. 'nbsp': 160,
  391. 'iexcl': 161,
  392. 'cent': 162,
  393. 'pound': 163,
  394. 'curren': 164,
  395. 'yen': 165,
  396. 'brvbar': 166,
  397. 'sect': 167,
  398. 'uml': 168,
  399. 'ordf': 170,
  400. 'laquo': 171,
  401. 'not': 172,
  402. 'shy': 173,
  403. 'macr': 175,
  404. 'deg': 176,
  405. 'plusmn': 177,
  406. 'sup1': 185,
  407. 'sup2': 178,
  408. 'sup3': 179,
  409. 'acute': 180,
  410. 'micro': 181,
  411. 'para': 182,
  412. 'middot': 183,
  413. 'cedil': 184,
  414. 'ordm': 186,
  415. 'raquo': 187,
  416. 'frac14': 188,
  417. 'frac12': 189,
  418. 'frac34': 190,
  419. 'iquest': 191,
  420. 'times': 215,
  421. 'divide': 247,
  422. 'OElig': 338,
  423. 'oelig': 339,
  424. 'Scaron': 352,
  425. 'scaron': 353,
  426. 'Yuml': 376,
  427. 'fnof': 402,
  428. 'circ': 710,
  429. 'tilde': 732,
  430. 'Alpha': 913,
  431. 'Beta': 914,
  432. 'Gamma': 915,
  433. 'Delta': 916,
  434. 'Epsilon': 917,
  435. 'Zeta': 918,
  436. 'Eta': 919,
  437. 'Theta': 920,
  438. 'Iota': 921,
  439. 'Kappa': 922,
  440. 'Lambda': 923,
  441. 'Mu': 924,
  442. 'Nu': 925,
  443. 'Xi': 926,
  444. 'Omicron': 927,
  445. 'Pi': 928,
  446. 'Rho': 929,
  447. 'Sigma': 931,
  448. 'Tau': 932,
  449. 'Upsilon': 933,
  450. 'Phi': 934,
  451. 'Chi': 935,
  452. 'Psi': 936,
  453. 'Omega': 937,
  454. 'alpha': 945,
  455. 'beta': 946,
  456. 'gamma': 947,
  457. 'delta': 948,
  458. 'epsilon': 949,
  459. 'zeta': 950,
  460. 'eta': 951,
  461. 'theta': 952,
  462. 'iota': 953,
  463. 'kappa': 954,
  464. 'lambda': 955,
  465. 'mu': 956,
  466. 'nu': 957,
  467. 'xi': 958,
  468. 'omicron': 959,
  469. 'pi': 960,
  470. 'rho': 961,
  471. 'sigmaf': 962,
  472. 'sigma': 963,
  473. 'tau': 964,
  474. 'upsilon': 965,
  475. 'phi': 966,
  476. 'chi': 967,
  477. 'psi': 968,
  478. 'omega': 969,
  479. 'thetasym': 977,
  480. 'upsih': 978,
  481. 'piv': 982,
  482. 'ensp': 8194,
  483. 'emsp': 8195,
  484. 'thinsp': 8201,
  485. 'zwnj': 8204,
  486. 'zwj': 8205,
  487. 'lrm': 8206,
  488. 'rlm': 8207,
  489. 'ndash': 8211,
  490. 'mdash': 8212,
  491. 'lsquo': 8216,
  492. 'rsquo': 8217,
  493. 'sbquo': 8218,
  494. 'ldquo': 8220,
  495. 'rdquo': 8221,
  496. 'bdquo': 8222,
  497. 'dagger': 8224,
  498. 'Dagger': 8225,
  499. 'bull': 8226,
  500. 'hellip': 8230,
  501. 'permil': 8240,
  502. 'prime': 8242,
  503. 'Prime': 8243,
  504. 'lsaquo': 8249,
  505. 'rsaquo': 8250,
  506. 'oline': 8254,
  507. 'frasl': 8260,
  508. 'euro': 8364,
  509. 'image': 8465,
  510. 'weierp': 8472,
  511. 'real': 8476,
  512. 'trade': 8482,
  513. 'alefsym': 8501,
  514. 'larr': 8592,
  515. 'uarr': 8593,
  516. 'rarr': 8594,
  517. 'darr': 8595,
  518. 'harr': 8596,
  519. 'crarr': 8629,
  520. 'lArr': 8656,
  521. 'uArr': 8657,
  522. 'rArr': 8658,
  523. 'dArr': 8659,
  524. 'hArr': 8660,
  525. 'forall': 8704,
  526. 'part': 8706,
  527. 'exist': 8707,
  528. 'empty': 8709,
  529. 'nabla': 8711,
  530. 'isin': 8712,
  531. 'notin': 8713,
  532. 'ni': 8715,
  533. 'prod': 8719,
  534. 'sum': 8721,
  535. 'minus': 8722,
  536. 'lowast': 8727,
  537. 'radic': 8730,
  538. 'prop': 8733,
  539. 'infin': 8734,
  540. 'ang': 8736,
  541. 'and': 8743,
  542. 'or': 8744,
  543. 'cap': 8745,
  544. 'cup': 8746,
  545. 'int': 8747,
  546. 'there4': 8756,
  547. 'sim': 8764,
  548. 'cong': 8773,
  549. 'asymp': 8776,
  550. 'ne': 8800,
  551. 'equiv': 8801,
  552. 'le': 8804,
  553. 'ge': 8805,
  554. 'sub': 8834,
  555. 'sup': 8835,
  556. 'nsub': 8836,
  557. 'sube': 8838,
  558. 'supe': 8839,
  559. 'oplus': 8853,
  560. 'otimes': 8855,
  561. 'perp': 8869,
  562. 'sdot': 8901,
  563. 'lceil': 8968,
  564. 'rceil': 8969,
  565. 'lfloor': 8970,
  566. 'rfloor': 8971,
  567. 'lang': 9001,
  568. 'rang': 9002,
  569. 'loz': 9674,
  570. 'spades': 9824,
  571. 'clubs': 9827,
  572. 'hearts': 9829,
  573. 'diams': 9830
  574. }
  575. Object.keys(sax.ENTITIES).forEach(function (key) {
  576. var e = sax.ENTITIES[key]
  577. var s = typeof e === 'number' ? String.fromCharCode(e) : e
  578. sax.ENTITIES[key] = s
  579. })
  580. for (var s in sax.STATE) {
  581. sax.STATE[sax.STATE[s]] = s
  582. }
  583. // shorthand
  584. S = sax.STATE
  585. function emit (parser, event, data) {
  586. parser[event] && parser[event](data)
  587. }
  588. function emitNode (parser, nodeType, data) {
  589. if (parser.textNode) closeText(parser)
  590. emit(parser, nodeType, data)
  591. }
  592. function closeText (parser) {
  593. parser.textNode = textopts(parser.opt, parser.textNode)
  594. if (parser.textNode) emit(parser, 'ontext', parser.textNode)
  595. parser.textNode = ''
  596. }
  597. function textopts (opt, text) {
  598. if (opt.trim) text = text.trim()
  599. if (opt.normalize) text = text.replace(/\s+/g, ' ')
  600. return text
  601. }
  602. function error (parser, er) {
  603. closeText(parser)
  604. if (parser.trackPosition) {
  605. er += '\nLine: ' + parser.line +
  606. '\nColumn: ' + parser.column +
  607. '\nChar: ' + parser.c
  608. }
  609. er = new Error(er)
  610. parser.error = er
  611. emit(parser, 'onerror', er)
  612. return parser
  613. }
  614. function end (parser) {
  615. if (parser.sawRoot && !parser.closedRoot) strictFail(parser, 'Unclosed root tag')
  616. if ((parser.state !== S.BEGIN) &&
  617. (parser.state !== S.BEGIN_WHITESPACE) &&
  618. (parser.state !== S.TEXT)) {
  619. error(parser, 'Unexpected end')
  620. }
  621. closeText(parser)
  622. parser.c = ''
  623. parser.closed = true
  624. emit(parser, 'onend')
  625. SAXParser.call(parser, parser.strict, parser.opt)
  626. return parser
  627. }
  628. function strictFail (parser, message) {
  629. if (typeof parser !== 'object' || !(parser instanceof SAXParser)) {
  630. throw new Error('bad call to strictFail')
  631. }
  632. if (parser.strict) {
  633. error(parser, message)
  634. }
  635. }
  636. function newTag (parser) {
  637. if (!parser.strict) parser.tagName = parser.tagName[parser.looseCase]()
  638. var parent = parser.tags[parser.tags.length - 1] || parser
  639. var tag = parser.tag = { name: parser.tagName, attributes: {} }
  640. // will be overridden if tag contails an xmlns="foo" or xmlns:foo="bar"
  641. if (parser.opt.xmlns) {
  642. tag.ns = parent.ns
  643. }
  644. parser.attribList.length = 0
  645. }
  646. function qname (name, attribute) {
  647. var i = name.indexOf(':')
  648. var qualName = i < 0 ? [ '', name ] : name.split(':')
  649. var prefix = qualName[0]
  650. var local = qualName[1]
  651. // <x "xmlns"="http://foo">
  652. if (attribute && name === 'xmlns') {
  653. prefix = 'xmlns'
  654. local = ''
  655. }
  656. return { prefix: prefix, local: local }
  657. }
  658. function attrib (parser) {
  659. if (!parser.strict) {
  660. parser.attribName = parser.attribName[parser.looseCase]()
  661. }
  662. if (parser.attribList.indexOf(parser.attribName) !== -1 ||
  663. parser.tag.attributes.hasOwnProperty(parser.attribName)) {
  664. parser.attribName = parser.attribValue = ''
  665. return
  666. }
  667. if (parser.opt.xmlns) {
  668. var qn = qname(parser.attribName, true)
  669. var prefix = qn.prefix
  670. var local = qn.local
  671. if (prefix === 'xmlns') {
  672. // namespace binding attribute. push the binding into scope
  673. if (local === 'xml' && parser.attribValue !== XML_NAMESPACE) {
  674. strictFail(parser,
  675. 'xml: prefix must be bound to ' + XML_NAMESPACE + '\n' +
  676. 'Actual: ' + parser.attribValue)
  677. } else if (local === 'xmlns' && parser.attribValue !== XMLNS_NAMESPACE) {
  678. strictFail(parser,
  679. 'xmlns: prefix must be bound to ' + XMLNS_NAMESPACE + '\n' +
  680. 'Actual: ' + parser.attribValue)
  681. } else {
  682. var tag = parser.tag
  683. var parent = parser.tags[parser.tags.length - 1] || parser
  684. if (tag.ns === parent.ns) {
  685. tag.ns = Object.create(parent.ns)
  686. }
  687. tag.ns[local] = parser.attribValue
  688. }
  689. }
  690. // defer onattribute events until all attributes have been seen
  691. // so any new bindings can take effect. preserve attribute order
  692. // so deferred events can be emitted in document order
  693. parser.attribList.push([parser.attribName, parser.attribValue])
  694. } else {
  695. // in non-xmlns mode, we can emit the event right away
  696. parser.tag.attributes[parser.attribName] = parser.attribValue
  697. emitNode(parser, 'onattribute', {
  698. name: parser.attribName,
  699. value: parser.attribValue
  700. })
  701. }
  702. parser.attribName = parser.attribValue = ''
  703. }
  704. function openTag (parser, selfClosing) {
  705. if (parser.opt.xmlns) {
  706. // emit namespace binding events
  707. var tag = parser.tag
  708. // add namespace info to tag
  709. var qn = qname(parser.tagName)
  710. tag.prefix = qn.prefix
  711. tag.local = qn.local
  712. tag.uri = tag.ns[qn.prefix] || ''
  713. if (tag.prefix && !tag.uri) {
  714. strictFail(parser, 'Unbound namespace prefix: ' +
  715. JSON.stringify(parser.tagName))
  716. tag.uri = qn.prefix
  717. }
  718. var parent = parser.tags[parser.tags.length - 1] || parser
  719. if (tag.ns && parent.ns !== tag.ns) {
  720. Object.keys(tag.ns).forEach(function (p) {
  721. emitNode(parser, 'onopennamespace', {
  722. prefix: p,
  723. uri: tag.ns[p]
  724. })
  725. })
  726. }
  727. // handle deferred onattribute events
  728. // Note: do not apply default ns to attributes:
  729. // http://www.w3.org/TR/REC-xml-names/#defaulting
  730. for (var i = 0, l = parser.attribList.length; i < l; i++) {
  731. var nv = parser.attribList[i]
  732. var name = nv[0]
  733. var value = nv[1]
  734. var qualName = qname(name, true)
  735. var prefix = qualName.prefix
  736. var local = qualName.local
  737. var uri = prefix === '' ? '' : (tag.ns[prefix] || '')
  738. var a = {
  739. name: name,
  740. value: value,
  741. prefix: prefix,
  742. local: local,
  743. uri: uri
  744. }
  745. // if there's any attributes with an undefined namespace,
  746. // then fail on them now.
  747. if (prefix && prefix !== 'xmlns' && !uri) {
  748. strictFail(parser, 'Unbound namespace prefix: ' +
  749. JSON.stringify(prefix))
  750. a.uri = prefix
  751. }
  752. parser.tag.attributes[name] = a
  753. emitNode(parser, 'onattribute', a)
  754. }
  755. parser.attribList.length = 0
  756. }
  757. parser.tag.isSelfClosing = !!selfClosing
  758. // process the tag
  759. parser.sawRoot = true
  760. parser.tags.push(parser.tag)
  761. emitNode(parser, 'onopentag', parser.tag)
  762. if (!selfClosing) {
  763. // special case for <script> in non-strict mode.
  764. if (!parser.noscript && parser.tagName.toLowerCase() === 'script') {
  765. parser.state = S.SCRIPT
  766. } else {
  767. parser.state = S.TEXT
  768. }
  769. parser.tag = null
  770. parser.tagName = ''
  771. }
  772. parser.attribName = parser.attribValue = ''
  773. parser.attribList.length = 0
  774. }
  775. function closeTag (parser) {
  776. if (!parser.tagName) {
  777. strictFail(parser, 'Weird empty close tag.')
  778. parser.textNode += '</>'
  779. parser.state = S.TEXT
  780. return
  781. }
  782. if (parser.script) {
  783. if (parser.tagName !== 'script') {
  784. parser.script += '</' + parser.tagName + '>'
  785. parser.tagName = ''
  786. parser.state = S.SCRIPT
  787. return
  788. }
  789. emitNode(parser, 'onscript', parser.script)
  790. parser.script = ''
  791. }
  792. // first make sure that the closing tag actually exists.
  793. // <a><b></c></b></a> will close everything, otherwise.
  794. var t = parser.tags.length
  795. var tagName = parser.tagName
  796. if (!parser.strict) {
  797. tagName = tagName[parser.looseCase]()
  798. }
  799. var closeTo = tagName
  800. while (t--) {
  801. var close = parser.tags[t]
  802. if (close.name !== closeTo) {
  803. // fail the first time in strict mode
  804. strictFail(parser, 'Unexpected close tag')
  805. } else {
  806. break
  807. }
  808. }
  809. // didn't find it. we already failed for strict, so just abort.
  810. if (t < 0) {
  811. strictFail(parser, 'Unmatched closing tag: ' + parser.tagName)
  812. parser.textNode += '</' + parser.tagName + '>'
  813. parser.state = S.TEXT
  814. return
  815. }
  816. parser.tagName = tagName
  817. var s = parser.tags.length
  818. while (s-- > t) {
  819. var tag = parser.tag = parser.tags.pop()
  820. parser.tagName = parser.tag.name
  821. emitNode(parser, 'onclosetag', parser.tagName)
  822. var x = {}
  823. for (var i in tag.ns) {
  824. x[i] = tag.ns[i]
  825. }
  826. var parent = parser.tags[parser.tags.length - 1] || parser
  827. if (parser.opt.xmlns && tag.ns !== parent.ns) {
  828. // remove namespace bindings introduced by tag
  829. Object.keys(tag.ns).forEach(function (p) {
  830. var n = tag.ns[p]
  831. emitNode(parser, 'onclosenamespace', { prefix: p, uri: n })
  832. })
  833. }
  834. }
  835. if (t === 0) parser.closedRoot = true
  836. parser.tagName = parser.attribValue = parser.attribName = ''
  837. parser.attribList.length = 0
  838. parser.state = S.TEXT
  839. }
  840. function parseEntity (parser) {
  841. var entity = parser.entity
  842. var entityLC = entity.toLowerCase()
  843. var num
  844. var numStr = ''
  845. if (parser.ENTITIES[entity]) {
  846. return parser.ENTITIES[entity]
  847. }
  848. if (parser.ENTITIES[entityLC]) {
  849. return parser.ENTITIES[entityLC]
  850. }
  851. entity = entityLC
  852. if (entity.charAt(0) === '#') {
  853. if (entity.charAt(1) === 'x') {
  854. entity = entity.slice(2)
  855. num = parseInt(entity, 16)
  856. numStr = num.toString(16)
  857. } else {
  858. entity = entity.slice(1)
  859. num = parseInt(entity, 10)
  860. numStr = num.toString(10)
  861. }
  862. }
  863. entity = entity.replace(/^0+/, '')
  864. if (numStr.toLowerCase() !== entity) {
  865. strictFail(parser, 'Invalid character entity')
  866. return '&' + parser.entity + ';'
  867. }
  868. return String.fromCodePoint(num)
  869. }
  870. function beginWhiteSpace (parser, c) {
  871. if (c === '<') {
  872. parser.state = S.OPEN_WAKA
  873. parser.startTagPosition = parser.position
  874. } else if (not(whitespace, c)) {
  875. // have to process this as a text node.
  876. // weird, but happens.
  877. strictFail(parser, 'Non-whitespace before first tag.')
  878. parser.textNode = c
  879. parser.state = S.TEXT
  880. }
  881. }
  882. function write (chunk) {
  883. var parser = this
  884. if (this.error) {
  885. throw this.error
  886. }
  887. if (parser.closed) {
  888. return error(parser,
  889. 'Cannot write after close. Assign an onready handler.')
  890. }
  891. if (chunk === null) {
  892. return end(parser)
  893. }
  894. var i = 0
  895. var c = ''
  896. while (true) {
  897. c = chunk.charAt(i++)
  898. parser.c = c
  899. if (!c) {
  900. break
  901. }
  902. if (parser.trackPosition) {
  903. parser.position++
  904. if (c === '\n') {
  905. parser.line++
  906. parser.column = 0
  907. } else {
  908. parser.column++
  909. }
  910. }
  911. switch (parser.state) {
  912. case S.BEGIN:
  913. parser.state = S.BEGIN_WHITESPACE
  914. if (c === '\uFEFF') {
  915. continue
  916. }
  917. beginWhiteSpace(parser, c)
  918. continue
  919. case S.BEGIN_WHITESPACE:
  920. beginWhiteSpace(parser, c)
  921. continue
  922. case S.TEXT:
  923. if (parser.sawRoot && !parser.closedRoot) {
  924. var starti = i - 1
  925. while (c && c !== '<' && c !== '&') {
  926. c = chunk.charAt(i++)
  927. if (c && parser.trackPosition) {
  928. parser.position++
  929. if (c === '\n') {
  930. parser.line++
  931. parser.column = 0
  932. } else {
  933. parser.column++
  934. }
  935. }
  936. }
  937. parser.textNode += chunk.substring(starti, i - 1)
  938. }
  939. if (c === '<' && !(parser.sawRoot && parser.closedRoot && !parser.strict)) {
  940. parser.state = S.OPEN_WAKA
  941. parser.startTagPosition = parser.position
  942. } else {
  943. if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
  944. strictFail(parser, 'Text data outside of root node.')
  945. }
  946. if (c === '&') {
  947. parser.state = S.TEXT_ENTITY
  948. } else {
  949. parser.textNode += c
  950. }
  951. }
  952. continue
  953. case S.SCRIPT:
  954. // only non-strict
  955. if (c === '<') {
  956. parser.state = S.SCRIPT_ENDING
  957. } else {
  958. parser.script += c
  959. }
  960. continue
  961. case S.SCRIPT_ENDING:
  962. if (c === '/') {
  963. parser.state = S.CLOSE_TAG
  964. } else {
  965. parser.script += '<' + c
  966. parser.state = S.SCRIPT
  967. }
  968. continue
  969. case S.OPEN_WAKA:
  970. // either a /, ?, !, or text is coming next.
  971. if (c === '!') {
  972. parser.state = S.SGML_DECL
  973. parser.sgmlDecl = ''
  974. } else if (is(whitespace, c)) {
  975. // wait for it...
  976. } else if (is(nameStart, c)) {
  977. parser.state = S.OPEN_TAG
  978. parser.tagName = c
  979. } else if (c === '/') {
  980. parser.state = S.CLOSE_TAG
  981. parser.tagName = ''
  982. } else if (c === '?') {
  983. parser.state = S.PROC_INST
  984. parser.procInstName = parser.procInstBody = ''
  985. } else {
  986. strictFail(parser, 'Unencoded <')
  987. // if there was some whitespace, then add that in.
  988. if (parser.startTagPosition + 1 < parser.position) {
  989. var pad = parser.position - parser.startTagPosition
  990. c = new Array(pad).join(' ') + c
  991. }
  992. parser.textNode += '<' + c
  993. parser.state = S.TEXT
  994. }
  995. continue
  996. case S.SGML_DECL:
  997. if ((parser.sgmlDecl + c).toUpperCase() === CDATA) {
  998. emitNode(parser, 'onopencdata')
  999. parser.state = S.CDATA
  1000. parser.sgmlDecl = ''
  1001. parser.cdata = ''
  1002. } else if (parser.sgmlDecl + c === '--') {
  1003. parser.state = S.COMMENT
  1004. parser.comment = ''
  1005. parser.sgmlDecl = ''
  1006. } else if ((parser.sgmlDecl + c).toUpperCase() === DOCTYPE) {
  1007. parser.state = S.DOCTYPE
  1008. if (parser.doctype || parser.sawRoot) {
  1009. strictFail(parser,
  1010. 'Inappropriately located doctype declaration')
  1011. }
  1012. parser.doctype = ''
  1013. parser.sgmlDecl = ''
  1014. } else if (c === '>') {
  1015. emitNode(parser, 'onsgmldeclaration', parser.sgmlDecl)
  1016. parser.sgmlDecl = ''
  1017. parser.state = S.TEXT
  1018. } else if (is(quote, c)) {
  1019. parser.state = S.SGML_DECL_QUOTED
  1020. parser.sgmlDecl += c
  1021. } else {
  1022. parser.sgmlDecl += c
  1023. }
  1024. continue
  1025. case S.SGML_DECL_QUOTED:
  1026. if (c === parser.q) {
  1027. parser.state = S.SGML_DECL
  1028. parser.q = ''
  1029. }
  1030. parser.sgmlDecl += c
  1031. continue
  1032. case S.DOCTYPE:
  1033. if (c === '>') {
  1034. parser.state = S.TEXT
  1035. emitNode(parser, 'ondoctype', parser.doctype)
  1036. parser.doctype = true // just remember that we saw it.
  1037. } else {
  1038. parser.doctype += c
  1039. if (c === '[') {
  1040. parser.state = S.DOCTYPE_DTD
  1041. } else if (is(quote, c)) {
  1042. parser.state = S.DOCTYPE_QUOTED
  1043. parser.q = c
  1044. }
  1045. }
  1046. continue
  1047. case S.DOCTYPE_QUOTED:
  1048. parser.doctype += c
  1049. if (c === parser.q) {
  1050. parser.q = ''
  1051. parser.state = S.DOCTYPE
  1052. }
  1053. continue
  1054. case S.DOCTYPE_DTD:
  1055. parser.doctype += c
  1056. if (c === ']') {
  1057. parser.state = S.DOCTYPE
  1058. } else if (is(quote, c)) {
  1059. parser.state = S.DOCTYPE_DTD_QUOTED
  1060. parser.q = c
  1061. }
  1062. continue
  1063. case S.DOCTYPE_DTD_QUOTED:
  1064. parser.doctype += c
  1065. if (c === parser.q) {
  1066. parser.state = S.DOCTYPE_DTD
  1067. parser.q = ''
  1068. }
  1069. continue
  1070. case S.COMMENT:
  1071. if (c === '-') {
  1072. parser.state = S.COMMENT_ENDING
  1073. } else {
  1074. parser.comment += c
  1075. }
  1076. continue
  1077. case S.COMMENT_ENDING:
  1078. if (c === '-') {
  1079. parser.state = S.COMMENT_ENDED
  1080. parser.comment = textopts(parser.opt, parser.comment)
  1081. if (parser.comment) {
  1082. emitNode(parser, 'oncomment', parser.comment)
  1083. }
  1084. parser.comment = ''
  1085. } else {
  1086. parser.comment += '-' + c
  1087. parser.state = S.COMMENT
  1088. }
  1089. continue
  1090. case S.COMMENT_ENDED:
  1091. if (c !== '>') {
  1092. strictFail(parser, 'Malformed comment')
  1093. // allow <!-- blah -- bloo --> in non-strict mode,
  1094. // which is a comment of " blah -- bloo "
  1095. parser.comment += '--' + c
  1096. parser.state = S.COMMENT
  1097. } else {
  1098. parser.state = S.TEXT
  1099. }
  1100. continue
  1101. case S.CDATA:
  1102. if (c === ']') {
  1103. parser.state = S.CDATA_ENDING
  1104. } else {
  1105. parser.cdata += c
  1106. }
  1107. continue
  1108. case S.CDATA_ENDING:
  1109. if (c === ']') {
  1110. parser.state = S.CDATA_ENDING_2
  1111. } else {
  1112. parser.cdata += ']' + c
  1113. parser.state = S.CDATA
  1114. }
  1115. continue
  1116. case S.CDATA_ENDING_2:
  1117. if (c === '>') {
  1118. if (parser.cdata) {
  1119. emitNode(parser, 'oncdata', parser.cdata)
  1120. }
  1121. emitNode(parser, 'onclosecdata')
  1122. parser.cdata = ''
  1123. parser.state = S.TEXT
  1124. } else if (c === ']') {
  1125. parser.cdata += ']'
  1126. } else {
  1127. parser.cdata += ']]' + c
  1128. parser.state = S.CDATA
  1129. }
  1130. continue
  1131. case S.PROC_INST:
  1132. if (c === '?') {
  1133. parser.state = S.PROC_INST_ENDING
  1134. } else if (is(whitespace, c)) {
  1135. parser.state = S.PROC_INST_BODY
  1136. } else {
  1137. parser.procInstName += c
  1138. }
  1139. continue
  1140. case S.PROC_INST_BODY:
  1141. if (!parser.procInstBody && is(whitespace, c)) {
  1142. continue
  1143. } else if (c === '?') {
  1144. parser.state = S.PROC_INST_ENDING
  1145. } else {
  1146. parser.procInstBody += c
  1147. }
  1148. continue
  1149. case S.PROC_INST_ENDING:
  1150. if (c === '>') {
  1151. emitNode(parser, 'onprocessinginstruction', {
  1152. name: parser.procInstName,
  1153. body: parser.procInstBody
  1154. })
  1155. parser.procInstName = parser.procInstBody = ''
  1156. parser.state = S.TEXT
  1157. } else {
  1158. parser.procInstBody += '?' + c
  1159. parser.state = S.PROC_INST_BODY
  1160. }
  1161. continue
  1162. case S.OPEN_TAG:
  1163. if (is(nameBody, c)) {
  1164. parser.tagName += c
  1165. } else {
  1166. newTag(parser)
  1167. if (c === '>') {
  1168. openTag(parser)
  1169. } else if (c === '/') {
  1170. parser.state = S.OPEN_TAG_SLASH
  1171. } else {
  1172. if (not(whitespace, c)) {
  1173. strictFail(parser, 'Invalid character in tag name')
  1174. }
  1175. parser.state = S.ATTRIB
  1176. }
  1177. }
  1178. continue
  1179. case S.OPEN_TAG_SLASH:
  1180. if (c === '>') {
  1181. openTag(parser, true)
  1182. closeTag(parser)
  1183. } else {
  1184. strictFail(parser, 'Forward-slash in opening tag not followed by >')
  1185. parser.state = S.ATTRIB
  1186. }
  1187. continue
  1188. case S.ATTRIB:
  1189. // haven't read the attribute name yet.
  1190. if (is(whitespace, c)) {
  1191. continue
  1192. } else if (c === '>') {
  1193. openTag(parser)
  1194. } else if (c === '/') {
  1195. parser.state = S.OPEN_TAG_SLASH
  1196. } else if (is(nameStart, c)) {
  1197. parser.attribName = c
  1198. parser.attribValue = ''
  1199. parser.state = S.ATTRIB_NAME
  1200. } else {
  1201. strictFail(parser, 'Invalid attribute name')
  1202. }
  1203. continue
  1204. case S.ATTRIB_NAME:
  1205. if (c === '=') {
  1206. parser.state = S.ATTRIB_VALUE
  1207. } else if (c === '>') {
  1208. strictFail(parser, 'Attribute without value')
  1209. parser.attribValue = parser.attribName
  1210. attrib(parser)
  1211. openTag(parser)
  1212. } else if (is(whitespace, c)) {
  1213. parser.state = S.ATTRIB_NAME_SAW_WHITE
  1214. } else if (is(nameBody, c)) {
  1215. parser.attribName += c
  1216. } else {
  1217. strictFail(parser, 'Invalid attribute name')
  1218. }
  1219. continue
  1220. case S.ATTRIB_NAME_SAW_WHITE:
  1221. if (c === '=') {
  1222. parser.state = S.ATTRIB_VALUE
  1223. } else if (is(whitespace, c)) {
  1224. continue
  1225. } else {
  1226. strictFail(parser, 'Attribute without value')
  1227. parser.tag.attributes[parser.attribName] = ''
  1228. parser.attribValue = ''
  1229. emitNode(parser, 'onattribute', {
  1230. name: parser.attribName,
  1231. value: ''
  1232. })
  1233. parser.attribName = ''
  1234. if (c === '>') {
  1235. openTag(parser)
  1236. } else if (is(nameStart, c)) {
  1237. parser.attribName = c
  1238. parser.state = S.ATTRIB_NAME
  1239. } else {
  1240. strictFail(parser, 'Invalid attribute name')
  1241. parser.state = S.ATTRIB
  1242. }
  1243. }
  1244. continue
  1245. case S.ATTRIB_VALUE:
  1246. if (is(whitespace, c)) {
  1247. continue
  1248. } else if (is(quote, c)) {
  1249. parser.q = c
  1250. parser.state = S.ATTRIB_VALUE_QUOTED
  1251. } else {
  1252. strictFail(parser, 'Unquoted attribute value')
  1253. parser.state = S.ATTRIB_VALUE_UNQUOTED
  1254. parser.attribValue = c
  1255. }
  1256. continue
  1257. case S.ATTRIB_VALUE_QUOTED:
  1258. if (c !== parser.q) {
  1259. if (c === '&') {
  1260. parser.state = S.ATTRIB_VALUE_ENTITY_Q
  1261. } else {
  1262. parser.attribValue += c
  1263. }
  1264. continue
  1265. }
  1266. attrib(parser)
  1267. parser.q = ''
  1268. parser.state = S.ATTRIB_VALUE_CLOSED
  1269. continue
  1270. case S.ATTRIB_VALUE_CLOSED:
  1271. if (is(whitespace, c)) {
  1272. parser.state = S.ATTRIB
  1273. } else if (c === '>') {
  1274. openTag(parser)
  1275. } else if (c === '/') {
  1276. parser.state = S.OPEN_TAG_SLASH
  1277. } else if (is(nameStart, c)) {
  1278. strictFail(parser, 'No whitespace between attributes')
  1279. parser.attribName = c
  1280. parser.attribValue = ''
  1281. parser.state = S.ATTRIB_NAME
  1282. } else {
  1283. strictFail(parser, 'Invalid attribute name')
  1284. }
  1285. continue
  1286. case S.ATTRIB_VALUE_UNQUOTED:
  1287. if (not(attribEnd, c)) {
  1288. if (c === '&') {
  1289. parser.state = S.ATTRIB_VALUE_ENTITY_U
  1290. } else {
  1291. parser.attribValue += c
  1292. }
  1293. continue
  1294. }
  1295. attrib(parser)
  1296. if (c === '>') {
  1297. openTag(parser)
  1298. } else {
  1299. parser.state = S.ATTRIB
  1300. }
  1301. continue
  1302. case S.CLOSE_TAG:
  1303. if (!parser.tagName) {
  1304. if (is(whitespace, c)) {
  1305. continue
  1306. } else if (not(nameStart, c)) {
  1307. if (parser.script) {
  1308. parser.script += '</' + c
  1309. parser.state = S.SCRIPT
  1310. } else {
  1311. strictFail(parser, 'Invalid tagname in closing tag.')
  1312. }
  1313. } else {
  1314. parser.tagName = c
  1315. }
  1316. } else if (c === '>') {
  1317. closeTag(parser)
  1318. } else if (is(nameBody, c)) {
  1319. parser.tagName += c
  1320. } else if (parser.script) {
  1321. parser.script += '</' + parser.tagName
  1322. parser.tagName = ''
  1323. parser.state = S.SCRIPT
  1324. } else {
  1325. if (not(whitespace, c)) {
  1326. strictFail(parser, 'Invalid tagname in closing tag')
  1327. }
  1328. parser.state = S.CLOSE_TAG_SAW_WHITE
  1329. }
  1330. continue
  1331. case S.CLOSE_TAG_SAW_WHITE:
  1332. if (is(whitespace, c)) {
  1333. continue
  1334. }
  1335. if (c === '>') {
  1336. closeTag(parser)
  1337. } else {
  1338. strictFail(parser, 'Invalid characters in closing tag')
  1339. }
  1340. continue
  1341. case S.TEXT_ENTITY:
  1342. case S.ATTRIB_VALUE_ENTITY_Q:
  1343. case S.ATTRIB_VALUE_ENTITY_U:
  1344. var returnState
  1345. var buffer
  1346. switch (parser.state) {
  1347. case S.TEXT_ENTITY:
  1348. returnState = S.TEXT
  1349. buffer = 'textNode'
  1350. break
  1351. case S.ATTRIB_VALUE_ENTITY_Q:
  1352. returnState = S.ATTRIB_VALUE_QUOTED
  1353. buffer = 'attribValue'
  1354. break
  1355. case S.ATTRIB_VALUE_ENTITY_U:
  1356. returnState = S.ATTRIB_VALUE_UNQUOTED
  1357. buffer = 'attribValue'
  1358. break
  1359. }
  1360. if (c === ';') {
  1361. parser[buffer] += parseEntity(parser)
  1362. parser.entity = ''
  1363. parser.state = returnState
  1364. } else if (is(parser.entity.length ? entityBody : entityStart, c)) {
  1365. parser.entity += c
  1366. } else {
  1367. strictFail(parser, 'Invalid character in entity name')
  1368. parser[buffer] += '&' + parser.entity + c
  1369. parser.entity = ''
  1370. parser.state = returnState
  1371. }
  1372. continue
  1373. default:
  1374. throw new Error(parser, 'Unknown state: ' + parser.state)
  1375. }
  1376. } // while
  1377. if (parser.position >= parser.bufferCheckPosition) {
  1378. checkBufferLength(parser)
  1379. }
  1380. return parser
  1381. }
  1382. /*! http://mths.be/fromcodepoint v0.1.0 by @mathias */
  1383. if (!String.fromCodePoint) {
  1384. (function () {
  1385. var stringFromCharCode = String.fromCharCode
  1386. var floor = Math.floor
  1387. var fromCodePoint = function () {
  1388. var MAX_SIZE = 0x4000
  1389. var codeUnits = []
  1390. var highSurrogate
  1391. var lowSurrogate
  1392. var index = -1
  1393. var length = arguments.length
  1394. if (!length) {
  1395. return ''
  1396. }
  1397. var result = ''
  1398. while (++index < length) {
  1399. var codePoint = Number(arguments[index])
  1400. if (
  1401. !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
  1402. codePoint < 0 || // not a valid Unicode code point
  1403. codePoint > 0x10FFFF || // not a valid Unicode code point
  1404. floor(codePoint) !== codePoint // not an integer
  1405. ) {
  1406. throw RangeError('Invalid code point: ' + codePoint)
  1407. }
  1408. if (codePoint <= 0xFFFF) { // BMP code point
  1409. codeUnits.push(codePoint)
  1410. } else { // Astral code point; split in surrogate halves
  1411. // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
  1412. codePoint -= 0x10000
  1413. highSurrogate = (codePoint >> 10) + 0xD800
  1414. lowSurrogate = (codePoint % 0x400) + 0xDC00
  1415. codeUnits.push(highSurrogate, lowSurrogate)
  1416. }
  1417. if (index + 1 === length || codeUnits.length > MAX_SIZE) {
  1418. result += stringFromCharCode.apply(null, codeUnits)
  1419. codeUnits.length = 0
  1420. }
  1421. }
  1422. return result
  1423. }
  1424. if (Object.defineProperty) {
  1425. Object.defineProperty(String, 'fromCodePoint', {
  1426. value: fromCodePoint,
  1427. configurable: true,
  1428. writable: true
  1429. })
  1430. } else {
  1431. String.fromCodePoint = fromCodePoint
  1432. }
  1433. }())
  1434. }
  1435. })(typeof exports === 'undefined' ? this.sax = {} : exports)