| If you are using Scrapy, you can modify the start_requests method in your
spider to filter out the URLs you don't want:
import scrapy <https://fngames.io>
class MySpider(scrapy.Spider):
name = "my_spider"
def start_requests(self):
urls = ['<URL>']
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
for href in response.css('a::attr(href)').getall():
if 'Wf6hTr0x' not in href: # Exclude unwanted files
yield response.follow(href, self.save_file)
def save_file(self, response):
# Logic to save the file
pass
| |