Quantcast
Viewing all articles
Browse latest Browse all 14040

'NoneType' object has no attribute 'split' _Error

Traceback (most recent call last):  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\twisted\internet\defer.py", line 2003, in _inlineCallbacks    result = context.run(gen.send, result)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\crawler.py", line 158, in crawl    self.engine = self._create_engine()  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\crawler.py", line 172, in _create_engine    return ExecutionEngine(self, lambda _: self.stop())  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\core\engine.py", line 99, in __init__    self.downloader: Downloader = downloader_cls(crawler)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\core\downloader\__init__.py", line 97, in __init__    DownloaderMiddlewareManager.from_crawler(crawler)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\middleware.py", line 90, in from_crawler    return cls.from_settings(crawler.settings, crawler)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\middleware.py", line 67, in from_settings    mw = create_instance(mwcls, settings, crawler)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy\utils\misc.py", line 188, in create_instance    instance = objcls.from_crawler(crawler, *args, **kwargs)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy_selenium\middlewares.py", line 96, in from_crawler    middleware = cls(  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\scrapy_selenium\middlewares.py", line 76, in __init__    service=ChromeService(ChromeDriverManager().install()))  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\chrome.py", line 40, in install    driver_path = self._get_driver_binary_path(self.driver)  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\core\manager.py", line 40, in _get_driver_binary_path    file = self._download_manager.download_file(driver.get_driver_download_url(os_type))  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\drivers\chrome.py", line 32, in get_driver_download_url    driver_version_to_download = self.get_driver_version_to_download()  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\core\driver.py", line 48, in get_driver_version_to_download    return self.get_latest_release_version()  File "D:\News-Title-Generation\ElNashra-WebScraping\venv\Lib\site-packages\webdriver_manager\drivers\chrome.py", line 64, in get_latest_release_version    determined_browser_version = ".".join(determined_browser_version.split(".")[:3])AttributeError: 'NoneType' object has no attribute 'split'

I'm working on web scraping a news website using Scrapy and Selenium. The logic of my script seems correct, but I'm encountering issues when trying to integrate the Selenium script for handling infinite scroll.

import scrapyfrom newsscraper.items import NewsItemfrom scrapy_selenium import SeleniumRequestfrom selenium import webdriverfrom selenium.webdriver.chrome.service import Serviceimport timeclass NewsspiderSpider(scrapy.Spider):    name = "newsspider"    allowed_domains = ["www.elnashra.com"]    def start_requests(self):        url = "https://www.elnashra.com/"        yield SeleniumRequest(url=url, callback=self.parse)    def parse(self, response):        self.scroll_down(response)        news= response.css('div.news')        for new in news:            url = new.css('a.news-title').attrib['href']            yield scrapy.Request(url, callback=self.parse_news_page)    def parse_news_page(self,response):        article = response.css('div.news_body')[0]        news_item = NewsItem()        news_item['url'] = response.url        news_item['title'] = article.css('h1.topTitle::text').get()        news_item['body'] = article.css('.articleBody p::text').getall()        yield news_item    def scroll_down(self, response):    # Get Selenium WebDriver from the response        # Initialize Chrome WebDriver        service = Service(executable_path='./chromedriver.exe')        options = webdriver.ChromeOptions()        driver = webdriver.Chrome(service=service, options=options)        # Scroll down to the bottom of the page to trigger infinite scroll        last_height = driver.execute_script("return document.body.scrollHeight")        while True:            # Scroll down to the bottom            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")            # Wait to load page            time.sleep(2)  # Adjust sleep time as needed            # Calculate new scroll height and compare with last scroll height            new_height = driver.execute_script("return document.body.scrollHeight")            if new_height == last_height:                break`your text`            last_height = new_height            driver.quit()setting.pySELENIUM_DRIVER_NAME = 'chrome'# SELENIUM_DRIVER_EXECUTABLE_PATH = "D:\\News-Title-Generation\\ElNashra-WebScraping\\newsscraper\\chromedriver.exe"SELENIUM_DRIVER_ARGUMENTS=['--headless']  DOWNLOADER_MIDDLEWARES = {'scrapy_selenium.SeleniumMiddleware': 800     }

Please help me to fix the error. Thank you`


Viewing all articles
Browse latest Browse all 14040

Trending Articles