1. <?php
  2. /**
  3. * Archives HTML content with manual overrides.
  4. *
  5. * @param string $url The URL of the HTML page to archive.
  6. * @param string $archiveDir The directory to store the archived content.
  7. * @param array $overrides An associative array of HTML elements to override.
  8. * Key: XPath expression, Value: Replacement HTML.
  9. *
  10. * @return bool True on success, false on failure.
  11. */
  12. function archiveHtmlPage(string $url, string $archiveDir, array $overrides = []): bool
  13. {
  14. // Validate inputs
  15. if (!is_readable($url)) {
  16. error_log("Error: URL is not readable: " . $url);
  17. return false;
  18. }
  19. if (!is_dir($archiveDir) || !is_ writable($archiveDir)) {
  20. error_log("Error: Archive directory is not valid or writable: " . $archiveDir);
  21. return false;
  22. }
  23. // Fetch HTML content
  24. try {
  25. $html = file_get_contents($url);
  26. if ($html === false) {
  27. error_log("Error: Failed to fetch HTML content from: " . $url);
  28. return false;
  29. }
  30. } catch (Exception $e) {
  31. error_log("Error fetching HTML content: " . $e->getMessage());
  32. return false;
  33. }
  34. // Apply overrides
  35. foreach ($overrides as $xpath => $replacement) {
  36. try {
  37. $html = preg_replace($xpath, $replacement, $html);
  38. } catch (Exception $e) {
  39. error_log("Error applying override for XPath '$xpath': " . $e->getMessage());
  40. return false;
  41. }
  42. }
  43. // Generate filename
  44. $filename = preg_replace("/[^a-zA-Z0-9._-]/", "_", basename($url)) . ".html";
  45. $filepath = rtrim($archiveDir, '/') . '/' . $filename;
  46. // Save the archived content
  47. if (file_put_contents($filepath, $html) === false) {
  48. error_log("Error: Failed to save archived content to: " . $filepath);
  49. return false;
  50. }
  51. return true;
  52. }
  53. // Example Usage (replace with your actual values)
  54. $url = 'https://www.example.com';
  55. $archiveDir = '/path/to/archive';
  56. $overrides = [
  57. '//div[@class="header"]' => '<div class="custom-header">Custom Header</div>', //Example override
  58. ];
  59. if (archiveHtmlPage($url, $archiveDir, $overrides)) {
  60. echo "Archive created successfully!\n";
  61. } else {
  62. echo "Archive creation failed.\n";
  63. }
  64. ?>

Add your comment