1. <?php
  2. /**
  3. * Collects metrics of DOM elements for sandbox usage (no async).
  4. *
  5. * @param string $html The HTML string to analyze.
  6. * @return array An array of DOM element metrics.
  7. */
  8. function collectDomMetrics(string $html): array
  9. {
  10. $dom = new DOMDocument();
  11. @$dom->loadHTML($html); // Suppress warnings for potentially invalid HTML
  12. $metrics = [];
  13. /**
  14. * Recursive function to traverse the DOM and collect metrics.
  15. *
  16. * @param DOMNode $node The current DOM node.
  17. */
  18. function traverseDom(DOMNode $node): void
  19. {
  20. switch ($node->nodeType) {
  21. case XML_ELEMENT_NODE:
  22. $element = $node as DOMElement;
  23. $metrics[] = [
  24. 'tag' => $element->tagName,
  25. 'attributes' => [],
  26. 'children_count' => $element->childCount()
  27. ];
  28. foreach ($element->attributes as $attr) {
  29. $metrics[count($metrics) - 1]['attributes'][$attr->name] = $attr->value;
  30. }
  31. traverseDom($element); // Recursive call for child elements
  32. break;
  33. case XML_TEXT_NODE:
  34. $text = $node->textContent;
  35. if (!empty($text)) {
  36. $metrics[] = [
  37. 'type' => 'text',
  38. 'content' => $text
  39. ];
  40. }
  41. break;
  42. }
  43. }
  44. $root = $dom->documentElement;
  45. if ($root) {
  46. traverseDom($root);
  47. }
  48. return $metrics;
  49. }
  50. // Example Usage (for testing)
  51. if (isset($_SERVER['argv'][1])) {
  52. $html = $_SERVER['argv'][1];
  53. $metrics = collectDomMetrics($html);
  54. print_r($metrics);
  55. } else {
  56. //Example HTML
  57. $html = '<html><body><h1>Hello World</h1><p>This is a paragraph.</p><a href="#">Link</a></body></html>';
  58. $metrics = collectDomMetrics($html);
  59. print_r($metrics);
  60. }
  61. ?>

Add your comment