XML helpers.
@package PHPUnit @subpackage Util @author Sebastian Bergmann <sebastian@phpunit.de> @copyright 2001-2013 Sebastian Bergmann <sebastian@phpunit.de> @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License @link http://www.phpunit.de/ @since Class available since Release 3.2.0
Expanded class hierarchy of PHPUnit_Util_XML
class PHPUnit_Util_XML {
/**
* @param string $string
* @return string
* @author Kore Nordmann <mail@kore-nordmann.de>
* @since Method available since Release 3.4.6
*/
public static function prepareString($string) {
return preg_replace_callback('/[\\x00-\\x04\\x0b\\x0c\\x0e-\\x1f\\x7f]/', function ($matches) {
return sprintf('&#x%02x;', ord($matches[0]));
}, htmlspecialchars(PHPUnit_Util_String::convertToUtf8($string), ENT_COMPAT, 'UTF-8'));
}
/**
* Loads an XML (or HTML) file into a DOMDocument object.
*
* @param string $filename
* @param boolean $isHtml
* @param boolean $xinclude
* @return DOMDocument
* @since Method available since Release 3.3.0
*/
public static function loadFile($filename, $isHtml = FALSE, $xinclude = FALSE) {
$reporting = error_reporting(0);
$contents = file_get_contents($filename);
error_reporting($reporting);
if ($contents === FALSE) {
throw new PHPUnit_Framework_Exception(sprintf('Could not read "%s".', $filename));
}
return self::load($contents, $isHtml, $filename, $xinclude);
}
/**
* Load an $actual document into a DOMDocument. This is called
* from the selector assertions.
*
* If $actual is already a DOMDocument, it is returned with
* no changes. Otherwise, $actual is loaded into a new DOMDocument
* as either HTML or XML, depending on the value of $isHtml. If $isHtml is
* false and $xinclude is true, xinclude is performed on the loaded
* DOMDocument.
*
* Note: prior to PHPUnit 3.3.0, this method loaded a file and
* not a string as it currently does. To load a file into a
* DOMDocument, use loadFile() instead.
*
* @param string|DOMDocument $actual
* @param boolean $isHtml
* @param string $filename
* @param boolean $xinclude
* @return DOMDocument
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
* @author Tobias Schlitt <toby@php.net>
*/
public static function load($actual, $isHtml = FALSE, $filename = '', $xinclude = FALSE) {
if ($actual instanceof DOMDocument) {
return $actual;
}
$document = new DOMDocument();
$internal = libxml_use_internal_errors(TRUE);
$message = '';
$reporting = error_reporting(0);
if ($isHtml) {
$loaded = $document
->loadHTML($actual);
}
else {
$loaded = $document
->loadXML($actual);
}
if ('' !== $filename) {
// Necessary for xinclude
$document->documentURI = $filename;
}
if (!$isHtml && $xinclude) {
$document
->xinclude();
}
foreach (libxml_get_errors() as $error) {
$message .= $error->message;
}
libxml_use_internal_errors($internal);
error_reporting($reporting);
if ($loaded === FALSE) {
if ($filename != '') {
throw new PHPUnit_Framework_Exception(sprintf('Could not load "%s".%s', $filename, $message != '' ? "\n" . $message : ''));
}
else {
throw new PHPUnit_Framework_Exception($message);
}
}
return $document;
}
/**
*
*
* @param DOMNode $node
* @return string
* @since Method available since Release 3.4.0
*/
public static function nodeToText(DOMNode $node) {
if ($node->childNodes->length == 1) {
return $node->nodeValue;
}
$result = '';
foreach ($node->childNodes as $childNode) {
$result .= $node->ownerDocument
->saveXML($childNode);
}
return $result;
}
/**
*
*
* @param DOMNode $node
* @since Method available since Release 3.3.0
* @author Mattis Stordalen Flister <mattis@xait.no>
*/
public static function removeCharacterDataNodes(DOMNode $node) {
if ($node
->hasChildNodes()) {
for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
if (($child = $node->childNodes
->item($i)) instanceof DOMCharacterData) {
$node
->removeChild($child);
}
}
}
}
/**
* "Convert" a DOMElement object into a PHP variable.
*
* @param DOMElement $element
* @return mixed
* @since Method available since Release 3.4.0
*/
public static function xmlToVariable(DOMElement $element) {
$variable = NULL;
switch ($element->tagName) {
case 'array':
$variable = array();
foreach ($element
->getElementsByTagName('element') as $element) {
$value = self::xmlToVariable($element->childNodes
->item(1));
if ($element
->hasAttribute('key')) {
$variable[(string) $element
->getAttribute('key')] = $value;
}
else {
$variable[] = $value;
}
}
break;
case 'object':
$className = $element
->getAttribute('class');
if ($element
->hasChildNodes()) {
$arguments = $element->childNodes
->item(1)->childNodes;
$constructorArgs = array();
foreach ($arguments as $argument) {
if ($argument instanceof DOMElement) {
$constructorArgs[] = self::xmlToVariable($argument);
}
}
$class = new ReflectionClass($className);
$variable = $class
->newInstanceArgs($constructorArgs);
}
else {
$variable = new $className();
}
break;
case 'boolean':
$variable = $element->nodeValue == 'true' ? TRUE : FALSE;
break;
case 'integer':
case 'double':
case 'string':
$variable = $element->nodeValue;
settype($variable, $element->tagName);
break;
}
return $variable;
}
/**
* Validate list of keys in the associative array.
*
* @param array $hash
* @param array $validKeys
* @return array
* @throws PHPUnit_Framework_Exception
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
*/
public static function assertValidKeys(array $hash, array $validKeys) {
$valids = array();
// Normalize validation keys so that we can use both indexed and
// associative arrays.
foreach ($validKeys as $key => $val) {
is_int($key) ? $valids[$val] = NULL : ($valids[$key] = $val);
}
$validKeys = array_keys($valids);
// Check for invalid keys.
foreach ($hash as $key => $value) {
if (!in_array($key, $validKeys)) {
$unknown[] = $key;
}
}
if (!empty($unknown)) {
throw new PHPUnit_Framework_Exception('Unknown key(s): ' . implode(', ', $unknown));
}
// Add default values for any valid keys that are empty.
foreach ($valids as $key => $value) {
if (!isset($hash[$key])) {
$hash[$key] = $value;
}
}
return $hash;
}
/**
* Parse a CSS selector into an associative array suitable for
* use with findNodes().
*
* @param string $selector
* @param mixed $content
* @return array
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
*/
public static function convertSelectToTag($selector, $content = TRUE) {
$selector = trim(preg_replace("/\\s+/", " ", $selector));
// substitute spaces within attribute value
while (preg_match('/\\[[^\\]]+"[^"]+\\s[^"]+"\\]/', $selector)) {
$selector = preg_replace('/(\\[[^\\]]+"[^"]+)\\s([^"]+"\\])/', "\$1__SPACE__\$2", $selector);
}
if (strstr($selector, ' ')) {
$elements = explode(' ', $selector);
}
else {
$elements = array(
$selector,
);
}
$previousTag = array();
foreach (array_reverse($elements) as $element) {
$element = str_replace('__SPACE__', ' ', $element);
// child selector
if ($element == '>') {
$previousTag = array(
'child' => $previousTag['descendant'],
);
continue;
}
$tag = array();
// match element tag
preg_match("/^([^\\.#\\[]*)/", $element, $eltMatches);
if (!empty($eltMatches[1])) {
$tag['tag'] = $eltMatches[1];
}
// match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
// and classes (\.[^\.#\[]*))
preg_match_all("/(\\[[^\\]]*\\]*|#[^\\.#\\[]*|\\.[^\\.#\\[]*)/", $element, $matches);
if (!empty($matches[1])) {
$classes = array();
$attrs = array();
foreach ($matches[1] as $match) {
// id matched
if (substr($match, 0, 1) == '#') {
$tag['id'] = substr($match, 1);
}
else {
if (substr($match, 0, 1) == '.') {
$classes[] = substr($match, 1);
}
else {
if (substr($match, 0, 1) == '[' && substr($match, -1, 1) == ']') {
$attribute = substr($match, 1, strlen($match) - 2);
$attribute = str_replace('"', '', $attribute);
// match single word
if (strstr($attribute, '~=')) {
list($key, $value) = explode('~=', $attribute);
$value = "regexp:/.*\\b{$value}\\b.*/";
}
else {
if (strstr($attribute, '*=')) {
list($key, $value) = explode('*=', $attribute);
$value = "regexp:/.*{$value}.*/";
}
else {
list($key, $value) = explode('=', $attribute);
}
}
$attrs[$key] = $value;
}
}
}
}
if ($classes) {
$tag['class'] = join(' ', $classes);
}
if ($attrs) {
$tag['attributes'] = $attrs;
}
}
// tag content
if (is_string($content)) {
$tag['content'] = $content;
}
// determine previous child/descendants
if (!empty($previousTag['descendant'])) {
$tag['descendant'] = $previousTag['descendant'];
}
else {
if (!empty($previousTag['child'])) {
$tag['child'] = $previousTag['child'];
}
}
$previousTag = array(
'descendant' => $tag,
);
}
return $tag;
}
/**
* Parse an $actual document and return an array of DOMNodes
* matching the CSS $selector. If an error occurs, it will
* return FALSE.
*
* To only return nodes containing a certain content, give
* the $content to match as a string. Otherwise, setting
* $content to TRUE will return all nodes matching $selector.
*
* The $actual document may be a DOMDocument or a string
* containing XML or HTML, identified by $isHtml.
*
* @param array $selector
* @param string $content
* @param mixed $actual
* @param boolean $isHtml
* @return false|array
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
* @author Tobias Schlitt <toby@php.net>
*/
public static function cssSelect($selector, $content, $actual, $isHtml = TRUE) {
$matcher = self::convertSelectToTag($selector, $content);
$dom = self::load($actual, $isHtml);
$tags = self::findNodes($dom, $matcher, $isHtml);
return $tags;
}
/**
* Parse out the options from the tag using DOM object tree.
*
* @param DOMDocument $dom
* @param array $options
* @param boolean $isHtml
* @return array
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
* @author Tobias Schlitt <toby@php.net>
*/
public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE) {
$valid = array(
'id',
'class',
'tag',
'content',
'attributes',
'parent',
'child',
'ancestor',
'descendant',
'children',
);
$filtered = array();
$options = self::assertValidKeys($options, $valid);
// find the element by id
if ($options['id']) {
$options['attributes']['id'] = $options['id'];
}
if ($options['class']) {
$options['attributes']['class'] = $options['class'];
}
// find the element by a tag type
if ($options['tag']) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $options['tag']);
}
else {
$elements = $dom
->getElementsByTagName($options['tag']);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
if (empty($nodes)) {
return FALSE;
}
}
else {
$tags = array(
'a',
'abbr',
'acronym',
'address',
'area',
'b',
'base',
'bdo',
'big',
'blockquote',
'body',
'br',
'button',
'caption',
'cite',
'code',
'col',
'colgroup',
'dd',
'del',
'div',
'dfn',
'dl',
'dt',
'em',
'fieldset',
'form',
'frame',
'frameset',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'hr',
'html',
'i',
'iframe',
'img',
'input',
'ins',
'kbd',
'label',
'legend',
'li',
'link',
'map',
'meta',
'noframes',
'noscript',
'object',
'ol',
'optgroup',
'option',
'p',
'param',
'pre',
'q',
'samp',
'script',
'select',
'small',
'span',
'strong',
'style',
'sub',
'sup',
'table',
'tbody',
'td',
'textarea',
'tfoot',
'th',
'thead',
'title',
'tr',
'tt',
'ul',
'var',
);
foreach ($tags as $tag) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $tag);
}
else {
$elements = $dom
->getElementsByTagName($tag);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
}
if (empty($nodes)) {
return FALSE;
}
}
// filter by attributes
if ($options['attributes']) {
foreach ($nodes as $node) {
$invalid = FALSE;
foreach ($options['attributes'] as $name => $value) {
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $value, $matches)) {
if (!preg_match($matches[1], $node
->getAttribute($name))) {
$invalid = TRUE;
}
}
else {
if ($name == 'class') {
// split to individual classes
$findClasses = explode(' ', preg_replace("/\\s+/", " ", $value));
$allClasses = explode(' ', preg_replace("/\\s+/", " ", $node
->getAttribute($name)));
// make sure each class given is in the actual node
foreach ($findClasses as $findClass) {
if (!in_array($findClass, $allClasses)) {
$invalid = TRUE;
}
}
}
else {
if ($node
->getAttribute($name) != $value) {
$invalid = TRUE;
}
}
}
}
// if every attribute given matched
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by content
if ($options['content'] !== NULL) {
foreach ($nodes as $node) {
$invalid = FALSE;
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $options['content'], $matches)) {
if (!preg_match($matches[1], self::getNodeText($node))) {
$invalid = TRUE;
}
}
else {
if ($options['content'] === '') {
if (self::getNodeText($node) !== '') {
$invalid = TRUE;
}
}
else {
if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
$invalid = TRUE;
}
}
}
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by parent node
if ($options['parent']) {
$parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
$parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
foreach ($nodes as $node) {
if ($parentNode !== $node->parentNode) {
continue;
}
$filtered[] = $node;
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by child node
if ($options['child']) {
$childNodes = self::findNodes($dom, $options['child'], $isHtml);
$childNodes = !empty($childNodes) ? $childNodes : array();
foreach ($nodes as $node) {
foreach ($node->childNodes as $child) {
foreach ($childNodes as $childNode) {
if ($childNode === $child) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by ancestor
if ($options['ancestor']) {
$ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
$ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
foreach ($nodes as $node) {
$parent = $node->parentNode;
while ($parent && $parent->nodeType != XML_HTML_DOCUMENT_NODE) {
if ($parent === $ancestorNode) {
$filtered[] = $node;
}
$parent = $parent->parentNode;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by descendant
if ($options['descendant']) {
$descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
$descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
foreach ($nodes as $node) {
foreach (self::getDescendants($node) as $descendant) {
foreach ($descendantNodes as $descendantNode) {
if ($descendantNode === $descendant) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by children
if ($options['children']) {
$validChild = array(
'count',
'greater_than',
'less_than',
'only',
);
$childOptions = self::assertValidKeys($options['children'], $validChild);
foreach ($nodes as $node) {
$childNodes = $node->childNodes;
foreach ($childNodes as $childNode) {
if ($childNode->nodeType !== XML_CDATA_SECTION_NODE && $childNode->nodeType !== XML_TEXT_NODE) {
$children[] = $childNode;
}
}
// we must have children to pass this filter
if (!empty($children)) {
// exact count of children
if ($childOptions['count'] !== NULL) {
if (count($children) !== $childOptions['count']) {
break;
}
}
else {
if ($childOptions['less_than'] !== NULL && $childOptions['greater_than'] !== NULL) {
if (count($children) >= $childOptions['less_than'] || count($children) <= $childOptions['greater_than']) {
break;
}
}
else {
if ($childOptions['less_than'] !== NULL) {
if (count($children) >= $childOptions['less_than']) {
break;
}
}
else {
if ($childOptions['greater_than'] !== NULL) {
if (count($children) <= $childOptions['greater_than']) {
break;
}
}
}
}
}
// match each child against a specific tag
if ($childOptions['only']) {
$onlyNodes = self::findNodes($dom, $childOptions['only'], $isHtml);
// try to match each child to one of the 'only' nodes
foreach ($children as $child) {
$matched = FALSE;
foreach ($onlyNodes as $onlyNode) {
if ($onlyNode === $child) {
$matched = TRUE;
}
}
if (!$matched) {
break 2;
}
}
}
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return;
}
}
// return the first node that matches all criteria
return !empty($nodes) ? $nodes : array();
}
/**
* Recursively get flat array of all descendants of this node.
*
* @param DOMNode $node
* @return array
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
*/
protected static function getDescendants(DOMNode $node) {
$allChildren = array();
$childNodes = $node->childNodes ? $node->childNodes : array();
foreach ($childNodes as $child) {
if ($child->nodeType === XML_CDATA_SECTION_NODE || $child->nodeType === XML_TEXT_NODE) {
continue;
}
$children = self::getDescendants($child);
$allChildren = array_merge($allChildren, $children, array(
$child,
));
}
return isset($allChildren) ? $allChildren : array();
}
/**
* Gets elements by case insensitive tagname.
*
* @param DOMDocument $dom
* @param string $tag
* @return DOMNodeList
* @since Method available since Release 3.4.0
*/
protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag) {
$elements = $dom
->getElementsByTagName(strtolower($tag));
if ($elements->length == 0) {
$elements = $dom
->getElementsByTagName(strtoupper($tag));
}
return $elements;
}
/**
* Get the text value of this node's child text node.
*
* @param DOMNode $node
* @return string
* @since Method available since Release 3.3.0
* @author Mike Naberezny <mike@maintainable.com>
* @author Derek DeVries <derek@maintainable.com>
*/
protected static function getNodeText(DOMNode $node) {
if (!$node->childNodes instanceof DOMNodeList) {
return '';
}
$result = '';
foreach ($node->childNodes as $childNode) {
if ($childNode->nodeType === XML_TEXT_NODE || $childNode->nodeType === XML_CDATA_SECTION_NODE) {
$result .= trim($childNode->data) . ' ';
}
else {
$result .= self::getNodeText($childNode);
}
}
return str_replace(' ', ' ', $result);
}
}
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
PHPUnit_Util_XML:: |
public static | function | Validate list of keys in the associative array. | |
PHPUnit_Util_XML:: |
public static | function | Parse a CSS selector into an associative array suitable for use with findNodes(). | |
PHPUnit_Util_XML:: |
public static | function | Parse an $actual document and return an array of DOMNodes matching the CSS $selector. If an error occurs, it will return FALSE. | |
PHPUnit_Util_XML:: |
public static | function | Parse out the options from the tag using DOM object tree. | |
PHPUnit_Util_XML:: |
protected static | function | Recursively get flat array of all descendants of this node. | |
PHPUnit_Util_XML:: |
protected static | function | Gets elements by case insensitive tagname. | |
PHPUnit_Util_XML:: |
protected static | function | Get the text value of this node's child text node. | |
PHPUnit_Util_XML:: |
public static | function | Load an $actual document into a DOMDocument. This is called from the selector assertions. | |
PHPUnit_Util_XML:: |
public static | function | Loads an XML (or HTML) file into a DOMDocument object. | |
PHPUnit_Util_XML:: |
public static | function | @since Method available since Release 3.4.0 | |
PHPUnit_Util_XML:: |
public static | function | @author Kore Nordmann <mail@kore-nordmann.de> @since Method available since Release 3.4.6 | |
PHPUnit_Util_XML:: |
public static | function | @since Method available since Release 3.3.0 @author Mattis Stordalen Flister <mattis@xait.no> | |
PHPUnit_Util_XML:: |
public static | function | "Convert" a DOMElement object into a PHP variable. |