๐Ÿ“ธ Automate Website Screenshots with Python and Selenium

๐Ÿ”ง Step-by-Step Guide

1. Setting Up the Environment

First, letโ€™s get our environment ready by installing the necessary packages. Open your terminal and run:

pip install selenium webdriver-manager

2. Creating the Script

Now, letโ€™s create a Python script named take_screenshots.py. This script will:

  • Load URLs from a CSV file.
  • Use Selenium to open each URL.
  • Handle cookie consent pop-ups.
  • Take a screenshot and save it with a sanitized filename.

Hereโ€™s the script:

import csv
import os
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

def load_urls_from_csv(csv_filename):
urls = []
with open(csv_filename, newline='', encoding='utf-8') as csvfile:
csvreader = csv.reader(csvfile)
next(csvreader) # Skip header row
for row in csvreader:
if row: # Ensure row is not empty
urls.append(row[0])
return urls

def accept_cookies(driver):
try:
consent_button_xpaths = [
"//button[contains(text(), 'Accept')]",
"//button[contains(text(), 'accept')]",
"//button[contains(text(), 'Agree')]",
"//button[contains(text(), 'agree')]",
"//button[contains(text(), 'Allow')]",
"//button[contains(text(), 'allow')]",
"//button[contains(text(), 'OK')]",
"//button[contains(text(), 'Ok')]",
"//button[contains(text(), 'Got it')]",
"//button[contains(text(), 'got it')]",
"//button[contains(text(), 'I agree')]",
"//button[contains(text(), 'I accept')]",
"//button[contains(@class, 'accept')]",
"//button[contains(@class, 'agree')]",
"//button[contains(@class, 'allow')]",
"//button[contains(@class, 'ok')]",
"//button[contains(@class, 'consent')]",
"//button[contains(@class, 'cookie')]"
]
for xpath in consent_button_xpaths:
try:
consent_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, xpath))
)
consent_button.click()
print(f"Cookie consent button clicked: {xpath}")
return
except Exception as e:
pass
print("No cookie consent button found.")
except Exception as e:
print(f"Error finding cookie consent button: {e}")

def sanitize_filename(url):
filename = re.sub(r'[^a-zA-Z0-9_\-]', '_', url)
return filename[:255] # Ensure the filename is not too long

def take_screenshot(url, output_path):
try:
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920,1080")

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get(url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
accept_cookies(driver)
WebDriverWait(driver, 2).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
driver.save_screenshot(output_path)
driver.quit()
print(f"Screenshot saved to {output_path}")
except Exception as e:
print(f"Error taking screenshot of {url}: {e}")

csv_filename = 'urls.csv'
urls = load_urls_from_csv(csv_filename)
os.makedirs('screenshots', exist_ok=True)
for url in urls:
sanitized_filename = sanitize_filename(url) + '.png'
output_path = os.path.join('screenshots', sanitized_filename)
take_screenshot(url, output_path)

print("Screenshots taken for all URLs.")

We will be happy to hear your thoughts

Leave a reply

Experience the Power of Preinstalled OS Drives and Booting Guides
Logo
Compare items
  • Total (0)
Compare
0
Ninja Silhouette 9 hours ago

Joe Doe in London, England purchased a

Joe Doe in London?

Joe Doe in London, England purchased a

Joe Doe in London?

Joe Doe in London, England purchased a

Joe Doe in London?

Joe Doe in London, England purchased a

Shopping cart