No Description

elementtree.js 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /**
  2. * Copyright 2011 Rackspace
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. */
  17. var sprintf = require('./sprintf').sprintf;
  18. var utils = require('./utils');
  19. var ElementPath = require('./elementpath');
  20. var TreeBuilder = require('./treebuilder').TreeBuilder;
  21. var get_parser = require('./parser').get_parser;
  22. var constants = require('./constants');
  23. var element_ids = 0;
  24. function Element(tag, attrib)
  25. {
  26. this._id = element_ids++;
  27. this.tag = tag;
  28. this.attrib = {};
  29. this.text = null;
  30. this.tail = null;
  31. this._children = [];
  32. if (attrib) {
  33. this.attrib = utils.merge(this.attrib, attrib);
  34. }
  35. }
  36. Element.prototype.toString = function()
  37. {
  38. return sprintf("<Element %s at %s>", this.tag, this._id);
  39. };
  40. Element.prototype.makeelement = function(tag, attrib)
  41. {
  42. return new Element(tag, attrib);
  43. };
  44. Element.prototype.len = function()
  45. {
  46. return this._children.length;
  47. };
  48. Element.prototype.getItem = function(index)
  49. {
  50. return this._children[index];
  51. };
  52. Element.prototype.setItem = function(index, element)
  53. {
  54. this._children[index] = element;
  55. };
  56. Element.prototype.delItem = function(index)
  57. {
  58. this._children.splice(index, 1);
  59. };
  60. Element.prototype.getSlice = function(start, stop)
  61. {
  62. return this._children.slice(start, stop);
  63. };
  64. Element.prototype.setSlice = function(start, stop, elements)
  65. {
  66. var i;
  67. var k = 0;
  68. for (i = start; i < stop; i++, k++) {
  69. this._children[i] = elements[k];
  70. }
  71. };
  72. Element.prototype.delSlice = function(start, stop)
  73. {
  74. this._children.splice(start, stop - start);
  75. };
  76. Element.prototype.append = function(element)
  77. {
  78. this._children.push(element);
  79. };
  80. Element.prototype.extend = function(elements)
  81. {
  82. this._children.concat(elements);
  83. };
  84. Element.prototype.insert = function(index, element)
  85. {
  86. this._children[index] = element;
  87. };
  88. Element.prototype.remove = function(element)
  89. {
  90. this._children = this._children.filter(function(e) {
  91. /* TODO: is this the right way to do this? */
  92. if (e._id === element._id) {
  93. return false;
  94. }
  95. return true;
  96. });
  97. };
  98. Element.prototype.getchildren = function() {
  99. return this._children;
  100. };
  101. Element.prototype.find = function(path)
  102. {
  103. return ElementPath.find(this, path);
  104. };
  105. Element.prototype.findtext = function(path, defvalue)
  106. {
  107. return ElementPath.findtext(this, path, defvalue);
  108. };
  109. Element.prototype.findall = function(path, defvalue)
  110. {
  111. return ElementPath.findall(this, path, defvalue);
  112. };
  113. Element.prototype.clear = function()
  114. {
  115. this.attrib = {};
  116. this._children = [];
  117. this.text = null;
  118. this.tail = null;
  119. };
  120. Element.prototype.get = function(key, defvalue)
  121. {
  122. if (this.attrib[key] !== undefined) {
  123. return this.attrib[key];
  124. }
  125. else {
  126. return defvalue;
  127. }
  128. };
  129. Element.prototype.set = function(key, value)
  130. {
  131. this.attrib[key] = value;
  132. };
  133. Element.prototype.keys = function()
  134. {
  135. return Object.keys(this.attrib);
  136. };
  137. Element.prototype.items = function()
  138. {
  139. return utils.items(this.attrib);
  140. };
  141. /*
  142. * In python this uses a generator, but in v8 we don't have em,
  143. * so we use a callback instead.
  144. **/
  145. Element.prototype.iter = function(tag, callback)
  146. {
  147. var self = this;
  148. var i, child;
  149. if (tag === "*") {
  150. tag = null;
  151. }
  152. if (tag === null || this.tag === tag) {
  153. callback(self);
  154. }
  155. for (i = 0; i < this._children.length; i++) {
  156. child = this._children[i];
  157. child.iter(tag, function(e) {
  158. callback(e);
  159. });
  160. }
  161. };
  162. Element.prototype.itertext = function(callback)
  163. {
  164. this.iter(null, function(e) {
  165. if (e.text) {
  166. callback(e.text);
  167. }
  168. if (e.tail) {
  169. callback(e.tail);
  170. }
  171. });
  172. };
  173. function SubElement(parent, tag, attrib) {
  174. var element = parent.makeelement(tag, attrib);
  175. parent.append(element);
  176. return element;
  177. }
  178. function Comment(text) {
  179. var element = new Element(Comment);
  180. if (text) {
  181. element.text = text;
  182. }
  183. return element;
  184. }
  185. function CData(text) {
  186. var element = new Element(CData);
  187. if (text) {
  188. element.text = text;
  189. }
  190. return element;
  191. }
  192. function ProcessingInstruction(target, text)
  193. {
  194. var element = new Element(ProcessingInstruction);
  195. element.text = target;
  196. if (text) {
  197. element.text = element.text + " " + text;
  198. }
  199. return element;
  200. }
  201. function QName(text_or_uri, tag)
  202. {
  203. if (tag) {
  204. text_or_uri = sprintf("{%s}%s", text_or_uri, tag);
  205. }
  206. this.text = text_or_uri;
  207. }
  208. QName.prototype.toString = function() {
  209. return this.text;
  210. };
  211. function ElementTree(element)
  212. {
  213. this._root = element;
  214. }
  215. ElementTree.prototype.getroot = function() {
  216. return this._root;
  217. };
  218. ElementTree.prototype._setroot = function(element) {
  219. this._root = element;
  220. };
  221. ElementTree.prototype.parse = function(source, parser) {
  222. if (!parser) {
  223. parser = get_parser(constants.DEFAULT_PARSER);
  224. parser = new parser.XMLParser(new TreeBuilder());
  225. }
  226. parser.feed(source);
  227. this._root = parser.close();
  228. return this._root;
  229. };
  230. ElementTree.prototype.iter = function(tag, callback) {
  231. this._root.iter(tag, callback);
  232. };
  233. ElementTree.prototype.find = function(path) {
  234. return this._root.find(path);
  235. };
  236. ElementTree.prototype.findtext = function(path, defvalue) {
  237. return this._root.findtext(path, defvalue);
  238. };
  239. ElementTree.prototype.findall = function(path) {
  240. return this._root.findall(path);
  241. };
  242. /**
  243. * Unlike ElementTree, we don't write to a file, we return you a string.
  244. */
  245. ElementTree.prototype.write = function(options) {
  246. var sb = [];
  247. options = utils.merge({
  248. encoding: 'utf-8',
  249. xml_declaration: null,
  250. default_namespace: null,
  251. method: 'xml'}, options);
  252. if (options.xml_declaration !== false) {
  253. sb.push("<?xml version='1.0' encoding='"+options.encoding +"'?>\n");
  254. }
  255. if (options.method === "text") {
  256. _serialize_text(sb, self._root, encoding);
  257. }
  258. else {
  259. var qnames, namespaces, indent, indent_string;
  260. var x = _namespaces(this._root, options.encoding, options.default_namespace);
  261. qnames = x[0];
  262. namespaces = x[1];
  263. if (options.hasOwnProperty('indent')) {
  264. indent = 0;
  265. indent_string = new Array(options.indent + 1).join(' ');
  266. }
  267. else {
  268. indent = false;
  269. }
  270. if (options.method === "xml") {
  271. _serialize_xml(function(data) {
  272. sb.push(data);
  273. }, this._root, options.encoding, qnames, namespaces, indent, indent_string);
  274. }
  275. else {
  276. /* TODO: html */
  277. throw new Error("unknown serialization method "+ options.method);
  278. }
  279. }
  280. return sb.join("");
  281. };
  282. var _namespace_map = {
  283. /* "well-known" namespace prefixes */
  284. "http://www.w3.org/XML/1998/namespace": "xml",
  285. "http://www.w3.org/1999/xhtml": "html",
  286. "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
  287. "http://schemas.xmlsoap.org/wsdl/": "wsdl",
  288. /* xml schema */
  289. "http://www.w3.org/2001/XMLSchema": "xs",
  290. "http://www.w3.org/2001/XMLSchema-instance": "xsi",
  291. /* dublic core */
  292. "http://purl.org/dc/elements/1.1/": "dc",
  293. };
  294. function register_namespace(prefix, uri) {
  295. if (/ns\d+$/.test(prefix)) {
  296. throw new Error('Prefix format reserved for internal use');
  297. }
  298. if (_namespace_map.hasOwnProperty(uri) && _namespace_map[uri] === prefix) {
  299. delete _namespace_map[uri];
  300. }
  301. _namespace_map[uri] = prefix;
  302. }
  303. function _escape(text, encoding, isAttribute, isText) {
  304. if (text) {
  305. text = text.toString();
  306. text = text.replace(/&/g, '&amp;');
  307. text = text.replace(/</g, '&lt;');
  308. text = text.replace(/>/g, '&gt;');
  309. if (!isText) {
  310. text = text.replace(/\n/g, '&#xA;');
  311. text = text.replace(/\r/g, '&#xD;');
  312. }
  313. if (isAttribute) {
  314. text = text.replace(/"/g, '&quot;');
  315. }
  316. }
  317. return text;
  318. }
  319. /* TODO: benchmark single regex */
  320. function _escape_attrib(text, encoding) {
  321. return _escape(text, encoding, true);
  322. }
  323. function _escape_cdata(text, encoding) {
  324. return _escape(text, encoding, false);
  325. }
  326. function _escape_text(text, encoding) {
  327. return _escape(text, encoding, false, true);
  328. }
  329. function _namespaces(elem, encoding, default_namespace) {
  330. var qnames = {};
  331. var namespaces = {};
  332. if (default_namespace) {
  333. namespaces[default_namespace] = "";
  334. }
  335. function encode(text) {
  336. return text;
  337. }
  338. function add_qname(qname) {
  339. if (qname[0] === "{") {
  340. var tmp = qname.substring(1).split("}", 2);
  341. var uri = tmp[0];
  342. var tag = tmp[1];
  343. var prefix = namespaces[uri];
  344. if (prefix === undefined) {
  345. prefix = _namespace_map[uri];
  346. if (prefix === undefined) {
  347. prefix = "ns" + Object.keys(namespaces).length;
  348. }
  349. if (prefix !== "xml") {
  350. namespaces[uri] = prefix;
  351. }
  352. }
  353. if (prefix) {
  354. qnames[qname] = sprintf("%s:%s", prefix, tag);
  355. }
  356. else {
  357. qnames[qname] = tag;
  358. }
  359. }
  360. else {
  361. if (default_namespace) {
  362. throw new Error('cannot use non-qualified names with default_namespace option');
  363. }
  364. qnames[qname] = qname;
  365. }
  366. }
  367. elem.iter(null, function(e) {
  368. var i;
  369. var tag = e.tag;
  370. var text = e.text;
  371. var items = e.items();
  372. if (tag instanceof QName && qnames[tag.text] === undefined) {
  373. add_qname(tag.text);
  374. }
  375. else if (typeof(tag) === "string") {
  376. add_qname(tag);
  377. }
  378. else if (tag !== null && tag !== Comment && tag !== CData && tag !== ProcessingInstruction) {
  379. throw new Error('Invalid tag type for serialization: '+ tag);
  380. }
  381. if (text instanceof QName && qnames[text.text] === undefined) {
  382. add_qname(text.text);
  383. }
  384. items.forEach(function(item) {
  385. var key = item[0],
  386. value = item[1];
  387. if (key instanceof QName) {
  388. key = key.text;
  389. }
  390. if (qnames[key] === undefined) {
  391. add_qname(key);
  392. }
  393. if (value instanceof QName && qnames[value.text] === undefined) {
  394. add_qname(value.text);
  395. }
  396. });
  397. });
  398. return [qnames, namespaces];
  399. }
  400. function _serialize_xml(write, elem, encoding, qnames, namespaces, indent, indent_string) {
  401. var tag = elem.tag;
  402. var text = elem.text;
  403. var items;
  404. var i;
  405. var newlines = indent || (indent === 0);
  406. write(Array(indent + 1).join(indent_string));
  407. if (tag === Comment) {
  408. write(sprintf("<!--%s-->", _escape_cdata(text, encoding)));
  409. }
  410. else if (tag === ProcessingInstruction) {
  411. write(sprintf("<?%s?>", _escape_cdata(text, encoding)));
  412. }
  413. else if (tag === CData) {
  414. text = text || '';
  415. write(sprintf("<![CDATA[%s]]>", text));
  416. }
  417. else {
  418. tag = qnames[tag];
  419. if (tag === undefined) {
  420. if (text) {
  421. write(_escape_text(text, encoding));
  422. }
  423. elem.iter(function(e) {
  424. _serialize_xml(write, e, encoding, qnames, null, newlines ? indent + 1 : false, indent_string);
  425. });
  426. }
  427. else {
  428. write("<" + tag);
  429. items = elem.items();
  430. if (items || namespaces) {
  431. items.sort(); // lexical order
  432. items.forEach(function(item) {
  433. var k = item[0],
  434. v = item[1];
  435. if (k instanceof QName) {
  436. k = k.text;
  437. }
  438. if (v instanceof QName) {
  439. v = qnames[v.text];
  440. }
  441. else {
  442. v = _escape_attrib(v, encoding);
  443. }
  444. write(sprintf(" %s=\"%s\"", qnames[k], v));
  445. });
  446. if (namespaces) {
  447. items = utils.items(namespaces);
  448. items.sort(function(a, b) { return a[1] < b[1]; });
  449. items.forEach(function(item) {
  450. var k = item[1],
  451. v = item[0];
  452. if (k) {
  453. k = ':' + k;
  454. }
  455. write(sprintf(" xmlns%s=\"%s\"", k, _escape_attrib(v, encoding)));
  456. });
  457. }
  458. }
  459. if (text || elem.len()) {
  460. if (text && text.toString().match(/^\s*$/)) {
  461. text = null;
  462. }
  463. write(">");
  464. if (!text && newlines) {
  465. write("\n");
  466. }
  467. if (text) {
  468. write(_escape_text(text, encoding));
  469. }
  470. elem._children.forEach(function(e) {
  471. _serialize_xml(write, e, encoding, qnames, null, newlines ? indent + 1 : false, indent_string);
  472. });
  473. if (!text && indent) {
  474. write(Array(indent + 1).join(indent_string));
  475. }
  476. write("</" + tag + ">");
  477. }
  478. else {
  479. write(" />");
  480. }
  481. }
  482. }
  483. if (newlines) {
  484. write("\n");
  485. }
  486. }
  487. function parse(source, parser) {
  488. var tree = new ElementTree();
  489. tree.parse(source, parser);
  490. return tree;
  491. }
  492. function tostring(element, options) {
  493. return new ElementTree(element).write(options);
  494. }
  495. exports.PI = ProcessingInstruction;
  496. exports.Comment = Comment;
  497. exports.CData = CData;
  498. exports.ProcessingInstruction = ProcessingInstruction;
  499. exports.SubElement = SubElement;
  500. exports.QName = QName;
  501. exports.ElementTree = ElementTree;
  502. exports.ElementPath = ElementPath;
  503. exports.Element = function(tag, attrib) {
  504. return new Element(tag, attrib);
  505. };
  506. exports.XML = function(data) {
  507. var et = new ElementTree();
  508. return et.parse(data);
  509. };
  510. exports.parse = parse;
  511. exports.register_namespace = register_namespace;
  512. exports.tostring = tostring;