๐ง Step-by-Step Guide
1. Setting Up the Environment
First, letโs get our environment ready by installing the necessary packages. Open your terminal and run:
pip install selenium webdriver-manager
2. Creating the Script
Now, letโs create a Python script named take_screenshots.py
. This script will:
- Load URLs from a CSV file.
- Use Selenium to open each URL.
- Handle cookie consent pop-ups.
- Take a screenshot and save it with a sanitized filename.
Hereโs the script:
import csv
import os
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
def load_urls_from_csv(csv_filename):
urls = []
with open(csv_filename, newline='', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile)
next(csvreader) # Skip header row
for row in csvreader:
if row: # Ensure row is not empty
urls.append(row[0])
return urls
def accept_cookies(driver):
try:
consent_button_xpaths = [
"//button[contains(text(), 'Accept')]",
"//button[contains(text(), 'accept')]",
"//button[contains(text(), 'Agree')]",
"//button[contains(text(), 'agree')]",
"//button[contains(text(), 'Allow')]",
"//button[contains(text(), 'allow')]",
"//button[contains(text(), 'OK')]",
"//button[contains(text(), 'Ok')]",
"//button[contains(text(), 'Got it')]",
"//button[contains(text(), 'got it')]",
"//button[contains(text(), 'I agree')]",
"//button[contains(text(), 'I accept')]",
"//button[contains(@class, 'accept')]",
"//button[contains(@class, 'agree')]",
"//button[contains(@class, 'allow')]",
"//button[contains(@class, 'ok')]",
"//button[contains(@class, 'consent')]",
"//button[contains(@class, 'cookie')]"
]
for xpath in consent_button_xpaths:
try:
consent_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, xpath))
)
consent_button.click()
print(f"Cookie consent button clicked: {xpath}")
return
except Exception as e:
pass
print("No cookie consent button found.")
except Exception as e:
print(f"Error finding cookie consent button: {e}")
def sanitize_filename(url):
filename = re.sub(r'[^a-zA-Z0-9_\-]', '_', url)
return filename[:255] # Ensure the filename is not too long
def take_screenshot(url, output_path):
try:
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get(url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
accept_cookies(driver)
WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
driver.save_screenshot(output_path)
driver.quit()
print(f"Screenshot saved to {output_path}")
except Exception as e:
print(f"Error taking screenshot of {url}: {e}")
csv_filename = 'urls.csv'
urls = load_urls_from_csv(csv_filename)
os.makedirs('screenshots', exist_ok=True)
for url in urls:
sanitized_filename = sanitize_filename(url) + '.png'
output_path = os.path.join('screenshots', sanitized_filename)
take_screenshot(url, output_path)
print("Screenshots taken for all URLs.")