Playwright
Playwright 是微软开发的现代浏览器自动化工具,比 Selenium 更快、更稳定、功能更强大。
安装
pip install playwright
playwright install # 安装浏览器驱动
基本使用
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
# 启动浏览器
browser = p.chromium.launch(headless=False)
page = browser.new_page()
# 访问网页
page.goto('https://example.com')
# 获取标题
print(f"标题: {page.title()}")
# 截图
page.screenshot(path='example.png')
# 关闭浏览器
browser.close()
元素定位和操作
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto('https://example.com')
# 点击元素
page.click('button#submit')
# 输入文本
page.fill('input#username', '张三')
# 获取文本
text = page.text_content('div.content')
# 获取属性
href = page.get_attribute('a.link', 'href')
# 等待元素
page.wait_for_selector('.dynamic-content')
# 等待导航
page.wait_for_load_state('networkidle')
# 执行 JavaScript
result = page.evaluate('() => document.title')
browser.close()
表单操作
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto('https://example.com/login')
# 填写表单
page.fill('input[name="username"]', 'admin')
page.fill('input[name="password"]', 'password123')
# 选择下拉框
page.select_option('select#country', 'China')
# 勾选复选框
page.check('input[type="checkbox"]')
# 点击单选按钮
page.click('input[type="radio"][value="male"]')
# 提交表单
page.click('button[type="submit"]')
# 等待跳转
page.wait_for_url('**/dashboard')
browser.close()
网络拦截
from playwright.sync_api import sync_playwright
def handle_route(route):
# 修改请求
headers = route.request.headers
headers['X-Custom-Header'] = 'test'
route.continue_(headers=headers)
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
# 拦截请求
page.route('**/*.{png,jpg,jpeg}', lambda route: route.abort())
page.route('**/api/**', handle_route)
# 监听响应
def handle_response(response):
if 'api' in response.url:
print(f"API 响应: {response.status}")
page.on('response', handle_response)
page.goto('https://example.com')
browser.close()
多页面和上下文
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
# 创建多个上下文(类似隐身模式)
context1 = browser.new_context()
context2 = browser.new_context()
# 在不同上下文中创建页面
page1 = context1.new_page()
page2 = context2.new_page()
page1.goto('https://example.com')
page2.goto('https://test.com')
# 处理新标签页
with page.expect_popup() as popup_info:
page.click('a[target="_blank"]')
new_page = popup_info.value
browser.close()
异步模式
import asyncio
from playwright.async_api import async_playwright
async def main():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto('https://example.com')
print(f"标题: {await page.title()}")
await browser.close()
asyncio.run(main())
爬虫示例
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
# 访问网页
page.goto('https://quotes.toscrape.com/js/')
# 等待内容加载
page.wait_for_selector('.quote')
# 获取所有名言
quotes = page.locator('.quote')
count = quotes.count()
for i in range(count):
text = quotes.nth(i).locator('.text').text_content()
author = quotes.nth(i).locator('.author').text_content()
print(f"{author}: {text}")
browser.close()
Playwright 优势:
- ⚡ 更快:比 Selenium 速度快很多
- 🎯 更稳定:内置智能等待机制
- 🌐 多浏览器:支持 Chromium、Firefox、WebKit
- 📱 移动端:支持移动设备模拟
- 🔧 强大的 API:支持网络拦截、表单处理等
💡 提示:Playwright 是现代 Web 自动化的最佳选择,推荐优先使用。