Traceback (most recent call last): File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\twisted\internet\defer.py", line 2003, in _inlineCallbacks result = context.run(gen.send, result) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\crawler.py", line 158, in crawl self.engine = self._create_engine() File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\crawler.py", line 172, in _create_engine return ExecutionEngine(self, lambda _: self.stop()) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\core\engine.py", line 99, in __init__ self.downloader: Downloader = downloader_cls(crawler) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\core\downloader\__init__.py", line 97, in __init__ DownloaderMiddlewareManager.from_crawler(crawler) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\middleware.py", line 90, in from_crawler return cls.from_settings(crawler.settings, crawler) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\middleware.py", line 67, in from_settings mw = create_instance(mwcls, settings, crawler) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\utils\misc.py", line 188, in create_instance instance = objcls.from_crawler(crawler, *args, **kwargs) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy_selenium\middlewares.py", line 96, in from_crawler middleware = cls( File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy_selenium\middlewares.py", line 76, in __init__ service=ChromeService(ChromeDriverManager().install())) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\chrome.py", line 40, in install driver_path = self._get_driver_binary_path(self.driver) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\core\manager.py", line 40, in _get_driver_binary_path file = self._download_manager.download_file(driver.get_driver_download_url(os_type)) File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\drivers\chrome.py", line 32, in get_driver_download_url driver_version_to_download = self.get_driver_version_to_download() File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\core\driver.py", line 48, in get_driver_version_to_download return self.get_latest_release_version() File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\drivers\chrome.py", line 64, in get_latest_release_version determined_browser_version = ".".join(determined_browser_version.split(".")[:3])AttributeError: 'NoneType' object has no attribute 'split'
I'm working on web scraping a news website using Scrapy and Selenium. The logic of my script seems correct, but I'm encountering issues when trying to integrate the Selenium script for handling infinite scroll.
import scrapyfrom newsscraper.items import NewsItemfrom scrapy_selenium import SeleniumRequestfrom selenium import webdriverfrom selenium.webdriver.chrome.service import Serviceimport timeclass NewsspiderSpider(scrapy.Spider): name = "newsspider" allowed_domains = ["www.elnashra.com"] def start_requests(self): url = "https://www.elnashra.com/" yield SeleniumRequest(url=url, callback=self.parse) def parse(self, response): self.scroll_down(response) news= response.css('div.news') for new in news: url = new.css('a.news-title').attrib['href'] yield scrapy.Request(url, callback=self.parse_news_page) def parse_news_page(self,response): article = response.css('div.news_body')[0] news_item = NewsItem() news_item['url'] = response.url news_item['title'] = article.css('h1.topTitle::text').get() news_item['body'] = article.css('.articleBody p::text').getall() yield news_item def scroll_down(self, response): # Get Selenium WebDriver from the response # Initialize Chrome WebDriver service = Service(executable_path='./chromedriver.exe') options = webdriver.ChromeOptions() driver = webdriver.Chrome(service=service, options=options) # Scroll down to the bottom of the page to trigger infinite scroll last_height = driver.execute_script("return document.body.scrollHeight") while True: # Scroll down to the bottom driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # Wait to load page time.sleep(2) # Adjust sleep time as needed # Calculate new scroll height and compare with last scroll height new_height = driver.execute_script("return document.body.scrollHeight") if new_height == last_height: break`your text` last_height = new_height driver.quit()setting.pySELENIUM_DRIVER_NAME = 'chrome'# SELENIUM_DRIVER_EXECUTABLE_PATH = "D:\\News-Title-Generation\\ElNashra-WebScraping\\newsscraper\\chromedriver.exe"SELENIUM_DRIVER_ARGUMENTS=['--headless'] DOWNLOADER_MIDDLEWARES = {'scrapy_selenium.SeleniumMiddleware': 800 }
Please help me to fix the error. Thank you`