I have a Python script that fetches data from an API (specifically www.nseindia.com/api/corporate-announcements). When I run the script locally in PyCharm, it executes without any issues and fetches the data quickly. However, when I run the same script on PythonAnywhere, I encounter a timeout error:
HTTPSConnectionPool(host='www.nseindia.com', port=443): Read timed out. (read timeout=300)My Python Code
import requestsimport pandas as pdimport gspreadfrom oauth2client.service_account import ServiceAccountCredentialsimport scheduleimport timeimport numpy as np# Define Google Sheets credentialsscope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']credentials = ServiceAccountCredentials.from_json_keyfile_name('announcement-capture-7bef8a7ce6eb.json', scope)client = gspread.authorize(credentials)# Google Sheets document IDsheet_id = '1NKSFgnIUB3dW7Y0ZHCfXRd-B6QEYmCdW3km8g61X_bM'headers = {'Accept-Encoding': 'gzip, deflate, br, zstd','Accept-Language': 'en-US,en;q=0.9','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'}baseurl = 'https://www.nseindia.com/'url = 'https://www.nseindia.com/api/corporate-announcements?index=equities'def get_announcements_nse(): try: session = requests.Session() request = session.get(baseurl, headers=headers, timeout=300) cookies = dict(request.cookies) data = session.get(url, headers=headers, timeout=300, cookies=cookies) data.raise_for_status() # Raise HTTPError for bad responses json_data = data.json() if not json_data: # Check if JSON data is empty print("No announcements found.") return pd.DataFrame( columns=['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text', 'Attachment_File']) else: # Select only the desired columns from the JSON response selected_columns = ['symbol', 'sm_name', 'desc', 'an_dt', 'attchmntText', 'attchmntFile'] # Create the DataFrame with selected columns announcements = pd.DataFrame(json_data)[selected_columns] # Rename the columns to match your Google Sheets column names announcements.columns = ['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text','Attachment_File'] # Filter out announcements with specific keywords in 'Description' column keywords_to_skip = ['Disclosure', 'Certificate', 'Spurt', 'Loss', 'Duplicate', 'Share Certificate','Price Movement', 'Price'] announcements_filtered = announcements[ ~announcements['Description'].str.contains('|'.join(keywords_to_skip), case=False)] return announcements_filtered except (requests.exceptions.RequestException, ValueError) as e: print(f"An error occurred: {e}") return pd.DataFrame( columns=['Symbol', 'Name', 'Description', 'Announcement_Date', 'Announcement_Text', 'Attachment_File'])def update_google_sheet(): # Get NSE announcements announcements_nse = get_announcements_nse() # Open the Google Sheets document by ID sheet = client.open_by_key(sheet_id).sheet1 # Define the expected headers expected_headers = ['Symbol', 'Name', 'Description', 'Announcement_Date', 'Attachment_Text', 'Attachment_File'] # If the sheet is empty with only column names, directly add all NSE announcements if len(sheet.get_all_values()) == 1: # Check if the sheet has only column names # Replace NaN values with a placeholder announcements_nse.replace({np.nan: ''}, inplace=True) for index, row in announcements_nse.iterrows(): # Construct the URL with the symbol url = f'https://www.nseindia.com/get-quotes/equity?symbol={row["Symbol"]}' # Insert the URL into the appropriate cell in the sheet sheet.insert_row([url] + row.tolist()[1:], index + 2) else: # Get existing data in the sheet existing_data = sheet.get_all_records(expected_headers=expected_headers) # Convert existing data to DataFrame existing_df = pd.DataFrame(existing_data) # Filter out existing announcements from new NSE announcements new_announcements_nse = announcements_nse.merge(existing_df, on=['Symbol', 'Name', 'Announcement_Date'], how='left', indicator=True) new_announcements_nse = new_announcements_nse[new_announcements_nse['_merge'] == 'left_only'].drop( columns=['_merge']) # Replace NaN values with a placeholder new_announcements_nse.replace({np.nan: ''}, inplace=True) # Append new NSE announcements to the top of the sheet for index, row in new_announcements_nse.iterrows(): # Construct the URL with the symbol url = f'https://www.nseindia.com/get-quotes/equity?symbol={row["Symbol"]}' # Insert the URL into the appropriate cell in the sheet sheet.insert_row([url] + row.tolist()[1:], index + 2) print("NSE data updated successfully!")# Schedule the job to run every 120 secondsschedule.every(120).seconds.do(update_google_sheet)while True: schedule.run_pending() time.sleep(1)The timeout occurs consistently on PythonAnywhere, even after adjusting timeout settings in the script. What could be causing this timeout issue on PythonAnywhere, and how can I troubleshoot or resolve it? Are there any specific configurations or settings I need to adjust on PythonAnywhere to prevent the timeout?
Any insights or suggestions would be greatly appreciated! Thank you.
I tried increasing the time also to 300, but still its giving the timeout error. I am expecting to run this wihtout error same as it was running in my local computer.