function lenovo_firmware_scrape() { global $wpdb; $table_name = $wpdb->prefix . 'lenovo'; echo "

Checking database table: $table_name

"; if ($wpdb->get_var("SHOW TABLES LIKE '$table_name'") != $table_name) { echo "

Table $table_name does not exist. Cannot proceed with scrape.

"; return "Error: Database table does not exist. Please reactivate the plugin to create it."; } echo "

Truncating table $table_name

"; $wpdb->query("TRUNCATE TABLE $table_name"); $base_url = 'https://mirrors.lolinet.com/firmware/lenowow/'; $years = ['2024']; $inserted_count = 0; $batch_count = 0; $batch_limit = 200; $pause_duration = 60; echo "

Starting firmware scrape with slow import: $batch_limit entries per batch, $pause_duration seconds pause

"; $devices_file = LENOVO_PLUGIN_DIR . 'devices.html'; $devices_content = ''; if (file_exists($devices_file)) { $devices_content = file_get_contents($devices_file); echo "

Loaded devices.html from: $devices_file

"; echo "

devices.html content preview: " . esc_html(substr($devices_content, 0, 200)) . "...

"; } else { echo "

devices.html not found in plugin directory: $devices_file

"; return "Error: devices.html not found in plugin directory. Please add it."; } foreach ($years as $year) { echo "

Processing year: $year

"; $year_url = $base_url . $year . '/'; echo "

Fetching URL: $year_url

"; $year_response = wp_remote_get($year_url, ['timeout' => 15]); if (is_wp_error($year_response)) { echo "

Error fetching year $year: " . $year_response->get_error_message() . "

"; continue; } $year_body = wp_remote_retrieve_body($year_response); echo "

Year page fetched successfully. First 200 chars: " . esc_html(substr($year_body, 0, 200)) . "...

"; // Match folder links for devices preg_match_all('/folder<\/td>([^<]+)<\/a><\/td>/', $year_body, $device_matches); $devices = array_filter($device_matches[1], function($item) { return !str_contains($item, '_') && !str_contains($item, '.'); }); echo "

Found devices for $year: " . (empty($devices) ? 'None' : implode(', ', $devices)) . "

"; if (empty($devices)) { echo "

No devices found for $year. Check if the page structure matches the expected HTML.

"; continue; } foreach ($devices as $device_name) { echo "

Processing device: $device_name

"; $device_model = ''; if (!empty($devices_content)) { $pattern = '/
  • ' . preg_quote($device_name, '/') . '\s*\(([^)]+)\)\s*([^\s<]+(?:\s+[^\s<]+)*)\s*<\/li>/i'; if (preg_match($pattern, $devices_content, $model_match)) { $device_model = trim($model_match[1]) . ' ' . trim($model_match[2]); echo "

    Found device model for $device_name: $device_model

    "; } else { echo "

    No device model found for $device_name in devices.html

    "; } } $device_url = $year_url . $device_name . '/'; echo "

    Fetching device URL: $device_url

    "; $device_response = wp_remote_get($device_url, ['timeout' => 15]); if (is_wp_error($device_response)) { echo "

    Error fetching device $device_url: " . $device_response->get_error_message() . "

    "; continue; } $device_body = wp_remote_retrieve_body($device_response); echo "

    Device page fetched successfully. First 200 chars: " . esc_html(substr($device_body, 0, 200)) . "...

    "; // Match folder links for carriers preg_match_all('/folder<\/td>
    ([^<]+)<\/a><\/td>/', $device_body, $carrier_matches); $carriers = $carrier_matches[1]; echo "

    Found carriers for $device_name: " . (empty($carriers) ? 'None' : implode(', ', $carriers)) . "

    "; if (empty($carriers)) { echo "

    No carriers found for $device_name

    "; continue; } foreach ($carriers as $carrier_name) { echo "

    Processing carrier: $carrier_name

    "; $carrier_url = $device_url . $carrier_name . '/'; echo "

    Fetching carrier URL: $carrier_url

    "; $carrier_response = wp_remote_get($carrier_url, ['timeout' => 15]); if (is_wp_error($carrier_response)) { echo "

    Error fetching carrier $carrier_url: " . $carrier_response->get_error_message() . "

    "; continue; } $carrier_body = wp_remote_retrieve_body($carrier_response); echo "

    Carrier page fetched successfully. First 200 chars: " . esc_html(substr($carrier_body, 0, 200)) . "...

    "; // Match ZIP files preg_match_all('/
    ([^<]+)<\/a><\/td>([^<]+)<\/td>/', $carrier_body, $file_matches, PREG_SET_ORDER); $files_found = array_map(function($match) { return $match[1]; }, $file_matches); echo "

    Found ZIP files for $carrier_name: " . (empty($files_found) ? 'None' : implode(', ', $files_found)) . "

    "; $latest_file = null; $latest_time = 0; foreach ($file_matches as $file) { $file_url = $file[1]; $last_modified = isset($file[3]) ? strtotime($file[3]) : 0; if ($last_modified > $latest_time) { $latest_time = $last_modified; $latest_file = [ 'url' => 'https://mirrors.lolinet.com' . $file_url, 'version' => str_replace('.zip', '', basename($file_url)), 'modified' => $last_modified ? date('Y-m-d H:i:s', $last_modified) : current_time('mysql') ]; } } if ($latest_file) { echo "

    Attempting to insert: $device_name ($device_model)/$carrier_name - " . $latest_file['version'] . "

    "; $insert_result = $wpdb->insert($table_name, [ 'file_url' => $latest_file['url'], 'file_version' => $latest_file['version'], 'carrier' => $carrier_name, 'device_name' => $device_name, 'device_model' => $device_model, 'last_modified' => $latest_file['modified'] ]); if ($insert_result === false) { echo "

    Failed to insert: $device_name/$carrier_name - DB Error: " . $wpdb->last_error . "

    "; } else { $inserted_count++; $batch_count++; echo "

    Inserted: $device_name ($device_model)/$carrier_name - " . $latest_file['version'] . " (Total: $inserted_count)

    "; } if ($batch_count >= $batch_limit) { echo "

    Reached batch limit of $batch_limit entries. Pausing for $pause_duration seconds...

    "; sleep($pause_duration); $batch_count = 0; echo "

    Resuming scrape...

    "; } sleep(1); } else { echo "

    No valid ZIP file found for $device_name/$carrier_name

    "; } } } } echo "

    Scrape completed with $inserted_count insertions

    "; lenovo_generate_sitemap(); return "Scraping completed! Inserted $inserted_count records."; }