pip install playwright
playwright install chromium # 安装 Chromium 浏览器Playwright 支持 Chromium、Firefox 和 WebKit,这里以 Chromium 为例。

import asyncio
from playwright.async_api import async_playwright
import os
# 要截图的网站列表
URLS = [
"https://www.jsonla.com",
"https://www.jsonla.com/kuaizhao/",
# 添加更多 URL...
]
# 截图保存目录
OUTPUT_DIR = "screenshots"
os.makedirs(OUTPUT_DIR, exist_ok=True)
async def take_screenshot(page, url, filename):
try:
print(f"正在访问: {url}")
await page.goto(url, timeout=30000) # 最多等待30秒
await page.wait_for_load_state("networkidle") # 等待网络空闲
await page.screenshot(path=os.path.join(OUTPUT_DIR, filename), full_page=True)
print(f"✅ 已保存: {filename}")
except Exception as e:
print(f"❌ 失败 ({url}): {e}")
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True) # 无头模式
context = await browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
)
# 可选:限制并发数量(避免资源耗尽)
semaphore = asyncio.Semaphore(5) # 同时最多5个任务
async def bounded_screenshot(url):
async with semaphore:
page = await context.new_page()
domain = url.replace("https://", "").replace("http://", "").rstrip("/")
filename = f"{domain}.png"
await take_screenshot(page, url, filename)
await page.close()
tasks = [bounded_screenshot(url) for url in URLS]
await asyncio.gather(*tasks)
await browser.close()
if __name__ == "__main__":
asyncio.run(main())full_page=True:截取整个页面(包括滚动区域)。wait_for_load_state("networkidle"):确保页面资源基本加载完成再截图。asyncio.Semaphore 控制同时打开的页面数量,防止内存溢出。www.baidu.com.png)。工具 | 优点 | 缺点 |
|---|---|---|
Playwright | 快、现代、支持并发、API 简洁 | 需要安装浏览器 |
Selenium + ChromeDriver | 成熟、文档多 | 较慢、资源占用高 |
requests + html2image | 轻量 | 无法执行 JS,截图不完整 |
对于“真实渲染截图”,必须使用浏览器引擎(Playwright/Selenium),纯 HTTP 请求无法获取动态内容。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。