<?php
/**
* Archives HTML content with manual overrides.
*
* @param string $url The URL of the HTML page to archive.
* @param string $archiveDir The directory to store the archived content.
* @param array $overrides An associative array of HTML elements to override.
* Key: XPath expression, Value: Replacement HTML.
*
* @return bool True on success, false on failure.
*/
function archiveHtmlPage(string $url, string $archiveDir, array $overrides = []): bool
{
// Validate inputs
if (!is_readable($url)) {
error_log("Error: URL is not readable: " . $url);
return false;
}
if (!is_dir($archiveDir) || !is_ writable($archiveDir)) {
error_log("Error: Archive directory is not valid or writable: " . $archiveDir);
return false;
}
// Fetch HTML content
try {
$html = file_get_contents($url);
if ($html === false) {
error_log("Error: Failed to fetch HTML content from: " . $url);
return false;
}
} catch (Exception $e) {
error_log("Error fetching HTML content: " . $e->getMessage());
return false;
}
// Apply overrides
foreach ($overrides as $xpath => $replacement) {
try {
$html = preg_replace($xpath, $replacement, $html);
} catch (Exception $e) {
error_log("Error applying override for XPath '$xpath': " . $e->getMessage());
return false;
}
}
// Generate filename
$filename = preg_replace("/[^a-zA-Z0-9._-]/", "_", basename($url)) . ".html";
$filepath = rtrim($archiveDir, '/') . '/' . $filename;
// Save the archived content
if (file_put_contents($filepath, $html) === false) {
error_log("Error: Failed to save archived content to: " . $filepath);
return false;
}
return true;
}
// Example Usage (replace with your actual values)
$url = 'https://www.example.com';
$archiveDir = '/path/to/archive';
$overrides = [
'//div[@class="header"]' => '<div class="custom-header">Custom Header</div>', //Example override
];
if (archiveHtmlPage($url, $archiveDir, $overrides)) {
echo "Archive created successfully!\n";
} else {
echo "Archive creation failed.\n";
}
?>
Add your comment