Parse out the options from the tag using DOM object tree.
@since Method available since Release 3.3.0 @author Mike Naberezny <mike@maintainable.com> @author Derek DeVries <derek@maintainable.com> @author Tobias Schlitt <toby@php.net>
DOMDocument $dom:
array $options:
boolean $isHtml:
array
public static function findNodes(DOMDocument $dom, array $options, $isHtml = TRUE) {
$valid = array(
'id',
'class',
'tag',
'content',
'attributes',
'parent',
'child',
'ancestor',
'descendant',
'children',
);
$filtered = array();
$options = self::assertValidKeys($options, $valid);
// find the element by id
if ($options['id']) {
$options['attributes']['id'] = $options['id'];
}
if ($options['class']) {
$options['attributes']['class'] = $options['class'];
}
// find the element by a tag type
if ($options['tag']) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $options['tag']);
}
else {
$elements = $dom
->getElementsByTagName($options['tag']);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
if (empty($nodes)) {
return FALSE;
}
}
else {
$tags = array(
'a',
'abbr',
'acronym',
'address',
'area',
'b',
'base',
'bdo',
'big',
'blockquote',
'body',
'br',
'button',
'caption',
'cite',
'code',
'col',
'colgroup',
'dd',
'del',
'div',
'dfn',
'dl',
'dt',
'em',
'fieldset',
'form',
'frame',
'frameset',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'hr',
'html',
'i',
'iframe',
'img',
'input',
'ins',
'kbd',
'label',
'legend',
'li',
'link',
'map',
'meta',
'noframes',
'noscript',
'object',
'ol',
'optgroup',
'option',
'p',
'param',
'pre',
'q',
'samp',
'script',
'select',
'small',
'span',
'strong',
'style',
'sub',
'sup',
'table',
'tbody',
'td',
'textarea',
'tfoot',
'th',
'thead',
'title',
'tr',
'tt',
'ul',
'var',
);
foreach ($tags as $tag) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $tag);
}
else {
$elements = $dom
->getElementsByTagName($tag);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
}
if (empty($nodes)) {
return FALSE;
}
}
// filter by attributes
if ($options['attributes']) {
foreach ($nodes as $node) {
$invalid = FALSE;
foreach ($options['attributes'] as $name => $value) {
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $value, $matches)) {
if (!preg_match($matches[1], $node
->getAttribute($name))) {
$invalid = TRUE;
}
}
else {
if ($name == 'class') {
// split to individual classes
$findClasses = explode(' ', preg_replace("/\\s+/", " ", $value));
$allClasses = explode(' ', preg_replace("/\\s+/", " ", $node
->getAttribute($name)));
// make sure each class given is in the actual node
foreach ($findClasses as $findClass) {
if (!in_array($findClass, $allClasses)) {
$invalid = TRUE;
}
}
}
else {
if ($node
->getAttribute($name) != $value) {
$invalid = TRUE;
}
}
}
}
// if every attribute given matched
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by content
if ($options['content'] !== NULL) {
foreach ($nodes as $node) {
$invalid = FALSE;
// match by regexp if like "regexp:/foo/i"
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $options['content'], $matches)) {
if (!preg_match($matches[1], self::getNodeText($node))) {
$invalid = TRUE;
}
}
else {
if ($options['content'] === '') {
if (self::getNodeText($node) !== '') {
$invalid = TRUE;
}
}
else {
if (strstr(self::getNodeText($node), $options['content']) === FALSE) {
$invalid = TRUE;
}
}
}
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by parent node
if ($options['parent']) {
$parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
$parentNode = isset($parentNodes[0]) ? $parentNodes[0] : NULL;
foreach ($nodes as $node) {
if ($parentNode !== $node->parentNode) {
continue;
}
$filtered[] = $node;
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by child node
if ($options['child']) {
$childNodes = self::findNodes($dom, $options['child'], $isHtml);
$childNodes = !empty($childNodes) ? $childNodes : array();
foreach ($nodes as $node) {
foreach ($node->childNodes as $child) {
foreach ($childNodes as $childNode) {
if ($childNode === $child) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by ancestor
if ($options['ancestor']) {
$ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
$ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : NULL;
foreach ($nodes as $node) {
$parent = $node->parentNode;
while ($parent && $parent->nodeType != XML_HTML_DOCUMENT_NODE) {
if ($parent === $ancestorNode) {
$filtered[] = $node;
}
$parent = $parent->parentNode;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by descendant
if ($options['descendant']) {
$descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
$descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
foreach ($nodes as $node) {
foreach (self::getDescendants($node) as $descendant) {
foreach ($descendantNodes as $descendantNode) {
if ($descendantNode === $descendant) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return FALSE;
}
}
// filter by children
if ($options['children']) {
$validChild = array(
'count',
'greater_than',
'less_than',
'only',
);
$childOptions = self::assertValidKeys($options['children'], $validChild);
foreach ($nodes as $node) {
$childNodes = $node->childNodes;
foreach ($childNodes as $childNode) {
if ($childNode->nodeType !== XML_CDATA_SECTION_NODE && $childNode->nodeType !== XML_TEXT_NODE) {
$children[] = $childNode;
}
}
// we must have children to pass this filter
if (!empty($children)) {
// exact count of children
if ($childOptions['count'] !== NULL) {
if (count($children) !== $childOptions['count']) {
break;
}
}
else {
if ($childOptions['less_than'] !== NULL && $childOptions['greater_than'] !== NULL) {
if (count($children) >= $childOptions['less_than'] || count($children) <= $childOptions['greater_than']) {
break;
}
}
else {
if ($childOptions['less_than'] !== NULL) {
if (count($children) >= $childOptions['less_than']) {
break;
}
}
else {
if ($childOptions['greater_than'] !== NULL) {
if (count($children) <= $childOptions['greater_than']) {
break;
}
}
}
}
}
// match each child against a specific tag
if ($childOptions['only']) {
$onlyNodes = self::findNodes($dom, $childOptions['only'], $isHtml);
// try to match each child to one of the 'only' nodes
foreach ($children as $child) {
$matched = FALSE;
foreach ($onlyNodes as $onlyNode) {
if ($onlyNode === $child) {
$matched = TRUE;
}
}
if (!$matched) {
break 2;
}
}
}
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return;
}
}
// return the first node that matches all criteria
return !empty($nodes) ? $nodes : array();
}