crawler_81tv/scrapy_proj/spiders/example.py
2025-06-08 16:25:53 +08:00

32 lines
1.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import scrapy
from typing import Optional
class ExampleSpider(scrapy.Spider):
name = "example"
def __init__(self, url: Optional[str] = None, *args, **kwargs):
"""初始化爬虫
Args:
url: 开始URL可通过API传入
"""
super(ExampleSpider, self).__init__(*args, **kwargs)
self.start_urls = [url] if url else ["http://quotes.toscrape.com"]
def parse(self, response):
"""解析页面数据
这是一个示例解析器从quotes.toscrape.com抓取引用和作者
"""
for quote in response.css('div.quote'):
yield {
'text': quote.css('span.text::text').get(),
'author': quote.css('small.author::text').get(),
'tags': quote.css('div.tags a.tag::text').getall(),
}
# 获取下一页链接
next_page = response.css('li.next a::attr(href)').get()
if next_page is not None:
yield response.follow(next_page, self.parse)