适用场景:爬虫采集、接口监控、内容更新检测
1使用 aiohttp + asyncio 实现高效并发
2封装在 WebCrawler 类中
3自动汇总成功和失败结果
4包含统计信息
5代码简短高效
6提供完整调用实例
import asyncio
import aiohttp
from typing import List, Dict, Any
class WebCrawler:
def __init__(self, urls: List[str], timeout: int = 10):
self.urls = urls
self.timeout = timeout
self.results = []
self.errors = []
async def fetch(self, session: aiohttp.ClientSession, url: str) -> Dict[str, Any]:
try:
async with session.get(url, timeout=self.timeout) as response:
content = await response.text()
return {
'url': url,
'status': response.status,
'content_length': len(content),
'success': True
}
except Exception as e:
return {
'url': url,
'error': str(e),
'success': False
}
async def crawl(self) -> Dict[str, List]:
async with aiohttp.ClientSession() as session:
tasks = [self.fetch(session, url) for url in self.urls]
responses = await asyncio.gather(*tasks, return_exceptions=False)
for res in responses:
if res['success']:
self.results.append(res)
else:
self.errors.append(res)
return {
'success': self.results,
'failed': self.errors,
'total': len(self.urls),
'succeeded': len(self.results),
'failed_count': len(self.errors)
}
# 调用实例
if __name__ == "__main__":
urls = [
"https://url",
"https://url",
"https://url",
"https://url"
]
crawler = WebCrawler(urls)
result = asyncio.run(crawler.crawl())
print(f"总请求数: {result['total']}")
print(f"成功: {result['succeeded']}, 失败: {result['failed_count']}")
print("\n成功结果:")
for r in result['success']:
print(f" {r['url']} -> 状态码: {r['status']}, 内容长度: {r['content_length']}")
print("\n失败结果:")
for e in result['failed']:
print(f" {e['url']} -> 错误: {e['error']}")