import scrapy from typing import Optional class ExampleSpider(scrapy.Spider): name = "example" def __init__(self, url: Optional[str] = None, *args, **kwargs): """初始化爬虫 Args: url: 开始URL,可通过API传入 """ super(ExampleSpider, self).__init__(*args, **kwargs) self.start_urls = [url] if url else ["http://quotes.toscrape.com"] def parse(self, response): """解析页面数据 这是一个示例解析器,从quotes.toscrape.com抓取引用和作者 """ for quote in response.css('div.quote'): yield { 'text': quote.css('span.text::text').get(), 'author': quote.css('small.author::text').get(), 'tags': quote.css('div.tags a.tag::text').getall(), } # 获取下一页链接 next_page = response.css('li.next a::attr(href)').get() if next_page is not None: yield response.follow(next_page, self.parse)