I have a python script that executes every time a file is uploaded in a storage container. If I upload many files at once, only 2 rows show, the oldest file and the most recent (I think). Is there a way to have cloud functions pause for a second in between each execution?
I have tried to change the maximum concurrent requests per instance when editing the function and increased the CPU however, I am still missing rows. I changed it to 10 concurrent instances. I uploaded 7 files and only 5 rows appeared.
code example
from google.cloud import storagefrom cloudevents.http import CloudEventimport functions_frameworkimport pandas as pdimport re, datetimeimport gspreadimport globimport gcsfsfrom gspread_dataframe import set_with_dataframefrom google.oauth2.service_account import Credentialsfrom pydrive.auth import GoogleAuthfrom pydrive.drive import GoogleDrivescopes = ['https://www.googleapis.com/auth/spreadsheets','https://www.googleapis.com/auth/drive']credentials = Credentials.from_service_account_file('test.json', scopes=scopes)gc = gspread.authorize(credentials)gauth = GoogleAuth()drive = GoogleDrive(gauth)# open the google sheet we are working with.gs = gc.open_by_url('URL OF SHEET')# select the Total worksheet.main_Sheet = gs.worksheet('Total')# Triggered by a change in a storage bucket@functions_framework.cloud_eventdef find_file(cloud_event: CloudEvent) -> None:"""Cloud Function triggered by Cloud Storage when a file is changed. Gets the names of the newly created object and its bucket then cleans the file and imports a row to a google sheet.""" # Check that the received event is of the expected type, return error if not expected_type = "google.cloud.storage.object.v1.finalized" received_type = cloud_event["type"] if received_type != expected_type: raise ValueError(f"Expected {expected_type} but received {received_type}") # Extract the bucket and file names of the uploaded file data = cloud_event.data bucket = data["bucket"] filename = data["name"] #print(filename) #print(f'gs://{bucket}/{filename}') # Process the information in the new file if the name of the file has the string in its title. if re.search('STRING',filename): #print("working!") hello_gcs(bucket, filename) else: print("no valid file found")def hello_gcs(bucket: str, filename: str): day = re.search('(([0-9])|([0-2][0-9])|([3][0-1]))\-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\-\d{4}',str(filename)) date = datetime.datetime.strptime(day.group(), '%d-%b-%Y').date() #date fs = gcsfs.GCSFileSystem(project='PROJECT_NAME') file = fs.open(f'/bucket_name/{filename}',"r") read = file.readlines() ##This is a safe range for the data we are after and should account for any changes in the rows that may happen. Main_DF = read[50:100] ##Splitting the data and cleaning out the = dividers Main_DF = [row.strip().replace('=','').split(', ') for row in Main_DF] ##Getting rid of whitespace Main_DF = [row[0].split() for row in Main_DF] ##Convert to data frame Main_DF = pd.DataFrame(Main_DF) ## ## ##edited out all of the cleaning and data work here ##Mostly using pandas to manipulate the data and clean it to import it into a google sheet. ## ## #Combine everything into one DF df_Output = pd.concat([var_1,var_2,var_3,var_4,var_5]) df_Output = pd.DataFrame(df_Output) #Add the column for type df_Output['Type'] = ['typeA','typeB','typeC','typeD','typeE']#, ignore_index= True) df_Output = df_Output.rename(columns={0: 'Amount'}) #print(df_output) ##Reshaping and creating date field df_Output_Main = df_Output.set_index('Type').transpose() df_Output_Main['Date'] = str(date) #Write data frame to applicable Gsheet df_values = df_Output_Main.values.tolist() gs.values_append('Total', {'valueInputOption': 'RAW'}, {'values': df_values})