/** * Copyright 2011 Rackspace * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ var sprintf = require('./sprintf').sprintf; var utils = require('./utils'); var ElementPath = require('./elementpath'); var TreeBuilder = require('./treebuilder').TreeBuilder; var get_parser = require('./parser').get_parser; var constants = require('./constants'); var element_ids = 0; function Element(tag, attrib) { this._id = element_ids++; this.tag = tag; this.attrib = {}; this.text = null; this.tail = null; this._children = []; if (attrib) { this.attrib = utils.merge(this.attrib, attrib); } } Element.prototype.toString = function() { return sprintf("", this.tag, this._id); }; Element.prototype.makeelement = function(tag, attrib) { return new Element(tag, attrib); }; Element.prototype.len = function() { return this._children.length; }; Element.prototype.getItem = function(index) { return this._children[index]; }; Element.prototype.setItem = function(index, element) { this._children[index] = element; }; Element.prototype.delItem = function(index) { this._children.splice(index, 1); }; Element.prototype.getSlice = function(start, stop) { return this._children.slice(start, stop); }; Element.prototype.setSlice = function(start, stop, elements) { var i; var k = 0; for (i = start; i < stop; i++, k++) { this._children[i] = elements[k]; } }; Element.prototype.delSlice = function(start, stop) { this._children.splice(start, stop - start); }; Element.prototype.append = function(element) { this._children.push(element); }; Element.prototype.extend = function(elements) { this._children.concat(elements); }; Element.prototype.insert = function(index, element) { this._children[index] = element; }; Element.prototype.remove = function(element) { this._children = this._children.filter(function(e) { /* TODO: is this the right way to do this? */ if (e._id === element._id) { return false; } return true; }); }; Element.prototype.getchildren = function() { return this._children; }; Element.prototype.find = function(path) { return ElementPath.find(this, path); }; Element.prototype.findtext = function(path, defvalue) { return ElementPath.findtext(this, path, defvalue); }; Element.prototype.findall = function(path, defvalue) { return ElementPath.findall(this, path, defvalue); }; Element.prototype.clear = function() { this.attrib = {}; this._children = []; this.text = null; this.tail = null; }; Element.prototype.get = function(key, defvalue) { if (this.attrib[key] !== undefined) { return this.attrib[key]; } else { return defvalue; } }; Element.prototype.set = function(key, value) { this.attrib[key] = value; }; Element.prototype.keys = function() { return Object.keys(this.attrib); }; Element.prototype.items = function() { return utils.items(this.attrib); }; /* * In python this uses a generator, but in v8 we don't have em, * so we use a callback instead. **/ Element.prototype.iter = function(tag, callback) { var self = this; var i, child; if (tag === "*") { tag = null; } if (tag === null || this.tag === tag) { callback(self); } for (i = 0; i < this._children.length; i++) { child = this._children[i]; child.iter(tag, function(e) { callback(e); }); } }; Element.prototype.itertext = function(callback) { this.iter(null, function(e) { if (e.text) { callback(e.text); } if (e.tail) { callback(e.tail); } }); }; function SubElement(parent, tag, attrib) { var element = parent.makeelement(tag, attrib); parent.append(element); return element; } function Comment(text) { var element = new Element(Comment); if (text) { element.text = text; } return element; } function CData(text) { var element = new Element(CData); if (text) { element.text = text; } return element; } function ProcessingInstruction(target, text) { var element = new Element(ProcessingInstruction); element.text = target; if (text) { element.text = element.text + " " + text; } return element; } function QName(text_or_uri, tag) { if (tag) { text_or_uri = sprintf("{%s}%s", text_or_uri, tag); } this.text = text_or_uri; } QName.prototype.toString = function() { return this.text; }; function ElementTree(element) { this._root = element; } ElementTree.prototype.getroot = function() { return this._root; }; ElementTree.prototype._setroot = function(element) { this._root = element; }; ElementTree.prototype.parse = function(source, parser) { if (!parser) { parser = get_parser(constants.DEFAULT_PARSER); parser = new parser.XMLParser(new TreeBuilder()); } parser.feed(source); this._root = parser.close(); return this._root; }; ElementTree.prototype.iter = function(tag, callback) { this._root.iter(tag, callback); }; ElementTree.prototype.find = function(path) { return this._root.find(path); }; ElementTree.prototype.findtext = function(path, defvalue) { return this._root.findtext(path, defvalue); }; ElementTree.prototype.findall = function(path) { return this._root.findall(path); }; /** * Unlike ElementTree, we don't write to a file, we return you a string. */ ElementTree.prototype.write = function(options) { var sb = []; options = utils.merge({ encoding: 'utf-8', xml_declaration: null, default_namespace: null, method: 'xml'}, options); if (options.xml_declaration !== false) { sb.push("\n"); } if (options.method === "text") { _serialize_text(sb, self._root, encoding); } else { var qnames, namespaces, indent, indent_string; var x = _namespaces(this._root, options.encoding, options.default_namespace); qnames = x[0]; namespaces = x[1]; if (options.hasOwnProperty('indent')) { indent = 0; indent_string = new Array(options.indent + 1).join(' '); } else { indent = false; } if (options.method === "xml") { _serialize_xml(function(data) { sb.push(data); }, this._root, options.encoding, qnames, namespaces, indent, indent_string); } else { /* TODO: html */ throw new Error("unknown serialization method "+ options.method); } } return sb.join(""); }; var _namespace_map = { /* "well-known" namespace prefixes */ "http://www.w3.org/XML/1998/namespace": "xml", "http://www.w3.org/1999/xhtml": "html", "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", "http://schemas.xmlsoap.org/wsdl/": "wsdl", /* xml schema */ "http://www.w3.org/2001/XMLSchema": "xs", "http://www.w3.org/2001/XMLSchema-instance": "xsi", /* dublic core */ "http://purl.org/dc/elements/1.1/": "dc", }; function register_namespace(prefix, uri) { if (/ns\d+$/.test(prefix)) { throw new Error('Prefix format reserved for internal use'); } if (_namespace_map.hasOwnProperty(uri) && _namespace_map[uri] === prefix) { delete _namespace_map[uri]; } _namespace_map[uri] = prefix; } function _escape(text, encoding, isAttribute, isText) { if (text) { text = text.toString(); text = text.replace(/&/g, '&'); text = text.replace(//g, '>'); if (!isText) { text = text.replace(/\n/g, ' '); text = text.replace(/\r/g, ' '); } if (isAttribute) { text = text.replace(/"/g, '"'); } } return text; } /* TODO: benchmark single regex */ function _escape_attrib(text, encoding) { return _escape(text, encoding, true); } function _escape_cdata(text, encoding) { return _escape(text, encoding, false); } function _escape_text(text, encoding) { return _escape(text, encoding, false, true); } function _namespaces(elem, encoding, default_namespace) { var qnames = {}; var namespaces = {}; if (default_namespace) { namespaces[default_namespace] = ""; } function encode(text) { return text; } function add_qname(qname) { if (qname[0] === "{") { var tmp = qname.substring(1).split("}", 2); var uri = tmp[0]; var tag = tmp[1]; var prefix = namespaces[uri]; if (prefix === undefined) { prefix = _namespace_map[uri]; if (prefix === undefined) { prefix = "ns" + Object.keys(namespaces).length; } if (prefix !== "xml") { namespaces[uri] = prefix; } } if (prefix) { qnames[qname] = sprintf("%s:%s", prefix, tag); } else { qnames[qname] = tag; } } else { if (default_namespace) { throw new Error('cannot use non-qualified names with default_namespace option'); } qnames[qname] = qname; } } elem.iter(null, function(e) { var i; var tag = e.tag; var text = e.text; var items = e.items(); if (tag instanceof QName && qnames[tag.text] === undefined) { add_qname(tag.text); } else if (typeof(tag) === "string") { add_qname(tag); } else if (tag !== null && tag !== Comment && tag !== CData && tag !== ProcessingInstruction) { throw new Error('Invalid tag type for serialization: '+ tag); } if (text instanceof QName && qnames[text.text] === undefined) { add_qname(text.text); } items.forEach(function(item) { var key = item[0], value = item[1]; if (key instanceof QName) { key = key.text; } if (qnames[key] === undefined) { add_qname(key); } if (value instanceof QName && qnames[value.text] === undefined) { add_qname(value.text); } }); }); return [qnames, namespaces]; } function _serialize_xml(write, elem, encoding, qnames, namespaces, indent, indent_string) { var tag = elem.tag; var text = elem.text; var items; var i; var newlines = indent || (indent === 0); write(Array(indent + 1).join(indent_string)); if (tag === Comment) { write(sprintf("", _escape_cdata(text, encoding))); } else if (tag === ProcessingInstruction) { write(sprintf("", _escape_cdata(text, encoding))); } else if (tag === CData) { text = text || ''; write(sprintf("", text)); } else { tag = qnames[tag]; if (tag === undefined) { if (text) { write(_escape_text(text, encoding)); } elem.iter(function(e) { _serialize_xml(write, e, encoding, qnames, null, newlines ? indent + 1 : false, indent_string); }); } else { write("<" + tag); items = elem.items(); if (items || namespaces) { items.sort(); // lexical order items.forEach(function(item) { var k = item[0], v = item[1]; if (k instanceof QName) { k = k.text; } if (v instanceof QName) { v = qnames[v.text]; } else { v = _escape_attrib(v, encoding); } write(sprintf(" %s=\"%s\"", qnames[k], v)); }); if (namespaces) { items = utils.items(namespaces); items.sort(function(a, b) { return a[1] < b[1]; }); items.forEach(function(item) { var k = item[1], v = item[0]; if (k) { k = ':' + k; } write(sprintf(" xmlns%s=\"%s\"", k, _escape_attrib(v, encoding))); }); } } if (text || elem.len()) { if (text && text.toString().match(/^\s*$/)) { text = null; } write(">"); if (!text && newlines) { write("\n"); } if (text) { write(_escape_text(text, encoding)); } elem._children.forEach(function(e) { _serialize_xml(write, e, encoding, qnames, null, newlines ? indent + 1 : false, indent_string); }); if (!text && indent) { write(Array(indent + 1).join(indent_string)); } write(""); } else { write(" />"); } } } if (newlines) { write("\n"); } } function parse(source, parser) { var tree = new ElementTree(); tree.parse(source, parser); return tree; } function tostring(element, options) { return new ElementTree(element).write(options); } exports.PI = ProcessingInstruction; exports.Comment = Comment; exports.CData = CData; exports.ProcessingInstruction = ProcessingInstruction; exports.SubElement = SubElement; exports.QName = QName; exports.ElementTree = ElementTree; exports.ElementPath = ElementPath; exports.Element = function(tag, attrib) { return new Element(tag, attrib); }; exports.XML = function(data) { var et = new ElementTree(); return et.parse(data); }; exports.parse = parse; exports.register_namespace = register_namespace; exports.tostring = tostring;