crawler_81tv/scrapy_proj/settings.py
2025-06-08 16:25:53 +08:00

121 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Scrapy settings for scrapy_proj project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
BOT_NAME = "scrapy_proj"
SPIDER_MODULES = ["scrapy_proj.spiders"]
NEWSPIDER_MODULE = "scrapy_proj.spiders"
# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# Obey robots.txt rules
ROBOTSTXT_OBEY = False
# Configure maximum concurrent requests performed by Scrapy (default: 16)
CONCURRENT_REQUESTS = 8
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 1
# The download delay setting will honor only one of:
CONCURRENT_REQUESTS_PER_DOMAIN = 8
CONCURRENT_REQUESTS_PER_IP = 8
# Disable cookies (enabled by default)
#COOKIES_ENABLED = False
# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False
# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
}
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# "scrapy_proj.middlewares.ScrapyProjSpiderMiddleware": 543,
#}
# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
"scrapy_proj.middlewares.ScrapyProjDownloaderMiddleware": 543,
}
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# "scrapy.extensions.telnet.TelnetConsole": None,
#}
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
# "scrapy_proj.pipelines.AliyunVodPipeline": 300, # 上传到阿里云视频点播
# "scrapy_proj.pipelines.SQLitePipeline": 400, # 保存到SQLite
# "scrapy_proj.pipelines.MariaDBPipeline": 500, # 最后保存到MariaDB
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
AUTOTHROTTLE_ENABLED = True
# The initial download delay
AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = "httpcache"
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
# Set settings whose default value is deprecated to a future-proof value
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
FEED_EXPORT_ENCODING = "utf-8"
# 项目根目录配置用于SQLite数据库文件路径
import os
from dotenv import load_dotenv
load_dotenv() # 加载.env文件中的环境变量
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# 数据库配置
SQLITE_FILE = os.getenv("SQLITE_FILE", "data/videos.db")
# MariaDB配置
MYSQL_HOST = os.getenv("MYSQL_HOST", "localhost") # 数据库主机
MYSQL_PORT = int(os.getenv("MYSQL_PORT", "3306")) # 数据库端口
MYSQL_USER = os.getenv("MYSQL_USER", "root") # 数据库用户名
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD") # 数据库密码
MYSQL_DATABASE = os.getenv("MYSQL_DATABASE", "dev_yszy") # 数据库名称
# 阿里云配置
ALIYUN_ACCESS_KEY_ID = os.getenv("ALIYUN_ACCESS_KEY_ID") # 阿里云AccessKey ID
ALIYUN_ACCESS_KEY_SECRET = os.getenv("ALIYUN_ACCESS_KEY_SECRET") # 阿里云AccessKey Secret
ALIYUN_TEMPLATE_GROUP_ID = os.getenv("ALIYUN_TEMPLATE_GROUP_ID") # 转码模板组ID
# 阿里云OSS配置
ALIYUN_OSS_BUCKET = os.getenv("ALIYUN_OSS_BUCKET") # 阿里云OSS Bucket名称
ALIYUN_OSS_ENDPOINT = os.getenv("ALIYUN_OSS_ENDPOINT") # 阿里云OSS Endpoint