<?php
/**
* Collects metrics of DOM elements for sandbox usage (no async).
*
* @param string $html The HTML string to analyze.
* @return array An array of DOM element metrics.
*/
function collectDomMetrics(string $html): array
{
$dom = new DOMDocument();
@$dom->loadHTML($html); // Suppress warnings for potentially invalid HTML
$metrics = [];
/**
* Recursive function to traverse the DOM and collect metrics.
*
* @param DOMNode $node The current DOM node.
*/
function traverseDom(DOMNode $node): void
{
switch ($node->nodeType) {
case XML_ELEMENT_NODE:
$element = $node as DOMElement;
$metrics[] = [
'tag' => $element->tagName,
'attributes' => [],
'children_count' => $element->childCount()
];
foreach ($element->attributes as $attr) {
$metrics[count($metrics) - 1]['attributes'][$attr->name] = $attr->value;
}
traverseDom($element); // Recursive call for child elements
break;
case XML_TEXT_NODE:
$text = $node->textContent;
if (!empty($text)) {
$metrics[] = [
'type' => 'text',
'content' => $text
];
}
break;
}
}
$root = $dom->documentElement;
if ($root) {
traverseDom($root);
}
return $metrics;
}
// Example Usage (for testing)
if (isset($_SERVER['argv'][1])) {
$html = $_SERVER['argv'][1];
$metrics = collectDomMetrics($html);
print_r($metrics);
} else {
//Example HTML
$html = '<html><body><h1>Hello World</h1><p>This is a paragraph.</p><a href="#">Link</a></body></html>';
$metrics = collectDomMetrics($html);
print_r($metrics);
}
?>
Add your comment