TEST VERSION - Weebly Content Extractor";
echo "
Processing " . count($files) . " HTML files...
";
$count = 0;
foreach ($files as $file) {
$html = file_get_contents($file);
if (strpos($html, 'wsite-elements') !== false) {
echo "Found wsite-elements in: " . basename($file) . "
";
} else {
echo "NO wsite-elements in: " . basename($file) . "
";
}
// Get title
$title = basename($file);
if (preg_match('/(.*?)<\/title>/is', $html, $match)) {
$title = trim(strip_tags($match[1]));
}
// Find Weebly content area
$content = "";
if (preg_match(
'/class="wsite-elements"(.*?)(?=wsite-footer|<\/body>)/is',
$html,
$match
)) {
$content = $match[1];
}
if ($content != "") {
$slug = basename($file, ".html");
fputcsv($csv, [
$title,
$slug,
$content,
basename($file)
]);
$count++;
echo "Imported: " . htmlspecialchars($title) . "
";
} else {
echo "No content found: " . basename($file) . "
";
}
}
fclose($csv);
echo "
";
echo "Done!
";
echo "Total pages extracted: $count
";
echo "CSV created: weebly_content.csv
";
?>