/** * Copyright 2011 Rackspace * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ var sprintf = require('./sprintf').sprintf; var utils = require('./utils'); var SyntaxError = require('./errors').SyntaxError; var _cache = {}; var RE = new RegExp( "(" + "'[^']*'|\"[^\"]*\"|" + "::|" + "//?|" + "\\.\\.|" + "\\(\\)|" + "[/.*:\\[\\]\\(\\)@=])|" + "((?:\\{[^}]+\\})?[^/\\[\\]\\(\\)@=\\s]+)|" + "\\s+", 'g' ); var xpath_tokenizer = utils.findall.bind(null, RE); function prepare_tag(next, token) { var tag = token[0]; function select(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; elem._children.forEach(function(e) { if (e.tag === tag) { rv.push(e); } }); } return rv; } return select; } function prepare_star(next, token) { function select(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; elem._children.forEach(function(e) { rv.push(e); }); } return rv; } return select; } function prepare_dot(next, token) { function select(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; rv.push(elem); } return rv; } return select; } function prepare_iter(next, token) { var tag; token = next(); if (token[1] === '*') { tag = '*'; } else if (!token[1]) { tag = token[0] || ''; } else { throw new SyntaxError(token); } function select(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; elem.iter(tag, function(e) { if (e !== elem) { rv.push(e); } }); } return rv; } return select; } function prepare_dot_dot(next, token) { function select(context, result) { var i, len, elem, rv = [], parent_map = context.parent_map; if (!parent_map) { context.parent_map = parent_map = {}; context.root.iter(null, function(p) { p._children.forEach(function(e) { parent_map[e] = p; }); }); } for (i = 0, len = result.length; i < len; i++) { elem = result[i]; if (parent_map.hasOwnProperty(elem)) { rv.push(parent_map[elem]); } } return rv; } return select; } function prepare_predicate(next, token) { var tag, key, value, select; token = next(); if (token[1] === '@') { // attribute token = next(); if (token[1]) { throw new SyntaxError(token, 'Invalid attribute predicate'); } key = token[0]; token = next(); if (token[1] === ']') { select = function(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; if (elem.get(key)) { rv.push(elem); } } return rv; }; } else if (token[1] === '=') { value = next()[1]; if (value[0] === '"' || value[value.length - 1] === '\'') { value = value.slice(1, value.length - 1); } else { throw new SyntaxError(token, 'Ivalid comparison target'); } token = next(); select = function(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; if (elem.get(key) === value) { rv.push(elem); } } return rv; }; } if (token[1] !== ']') { throw new SyntaxError(token, 'Invalid attribute predicate'); } } else if (!token[1]) { tag = token[0] || ''; token = next(); if (token[1] !== ']') { throw new SyntaxError(token, 'Invalid node predicate'); } select = function(context, result) { var i, len, elem, rv = []; for (i = 0, len = result.length; i < len; i++) { elem = result[i]; if (elem.find(tag)) { rv.push(elem); } } return rv; }; } else { throw new SyntaxError(null, 'Invalid predicate'); } return select; } var ops = { "": prepare_tag, "*": prepare_star, ".": prepare_dot, "..": prepare_dot_dot, "//": prepare_iter, "[": prepare_predicate, }; function _SelectorContext(root) { this.parent_map = null; this.root = root; } function findall(elem, path) { var selector, result, i, len, token, value, select, context; if (_cache.hasOwnProperty(path)) { selector = _cache[path]; } else { // TODO: Use smarter cache purging approach if (Object.keys(_cache).length > 100) { _cache = {}; } if (path.charAt(0) === '/') { throw new SyntaxError(null, 'Cannot use absolute path on element'); } result = xpath_tokenizer(path); selector = []; function getToken() { return result.shift(); } token = getToken(); while (true) { var c = token[1] || ''; value = ops[c](getToken, token); if (!value) { throw new SyntaxError(null, sprintf('Invalid path: %s', path)); } selector.push(value); token = getToken(); if (!token) { break; } else if (token[1] === '/') { token = getToken(); } if (!token) { break; } } _cache[path] = selector; } // Execute slector pattern result = [elem]; context = new _SelectorContext(elem); for (i = 0, len = selector.length; i < len; i++) { select = selector[i]; result = select(context, result); } return result || []; } function find(element, path) { var resultElements = findall(element, path); if (resultElements && resultElements.length > 0) { return resultElements[0]; } return null; } function findtext(element, path, defvalue) { var resultElements = findall(element, path); if (resultElements && resultElements.length > 0) { return resultElements[0].text; } return defvalue; } exports.find = find; exports.findall = findall; exports.findtext = findtext;