Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 19054

Python script to sort mails by year and month (nested labels & speed)

$
0
0

I need a Python script to automatically sort emails by year and month. In general, it would be possible to perform a search or filter operation directly in Gmail, but because of the huge amount of emails in this inbox all these operations are failing.

Therefor, I'm trying to achieve this with the following script:

import osimport reimport datetimefrom google.oauth2.credentials import Credentialsfrom google.auth.transport.requests import Requestfrom googleapiclient.discovery import buildfrom googleapiclient.errors import HttpErrorSCOPES = ['https://www.googleapis.com/auth/gmail.label']def create_label_if_not_exists(service, user_id, label_name):    try:        service.users().labels().get(userId=user_id, id=label_name).execute()        print('Label already exists:', label_name)    except HttpError as e:        if e.status_code == 404:            create_label(service, user_id, label_name)        elif e.status_code == 409:            print('Label already exists or conflicts:', label_name)            # If label already exists, no need to create it again, just return            return        else:            print('Error checking label:', e)def create_label(service, user_id, label_name):    label = {'name': label_name, 'messageListVisibility': 'show', 'labelListVisibility': 'labelShow'}    try:        created_label = service.users().labels().create(userId=user_id, body=label).execute()        print('Label created:', created_label['name'])    except HttpError as e:        print('Error creating label:', e)def apply_label(service, user_id, message_id, label_id):    body = {'addLabelIds': [label_id]}    try:        service.users().messages().modify(userId=user_id, id=message_id, body=body).execute()        print('Label applied to email:', label_id)    except HttpError as e:        print('Error applying label to email:', e)def extract_year_month(date_str):    try:        # Remove the timezone information (anything within parentheses)        date_str = re.sub(r'(?<=\d{2}:\d{2}:\d{2}).*', '', date_str)        # Attempt to parse the date with the day of the week format specifier        try:            date_obj = datetime.datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')        except ValueError:            # If parsing with day of the week specifier fails, try without it            date_obj = datetime.datetime.strptime(date_str, '%d %b %Y %H:%M:%S')        year = str(date_obj.year)        month = str(date_obj.month).zfill(2)  # Zero-padding the month to ensure two digits        label_name = year +"-" + month  # Modified label format, e.g., "2024-03"        return label_name    except ValueError:        print("Failed to parse 'Created at' date:", date_str)        return Nonedef get_label_id(service, user_id, label_name):    labels_list = service.users().labels().list(userId=user_id).execute()    labels = labels_list.get('labels', [])    for label in labels:        if label['name'] == label_name:            return label['id']    return Nonedef get_emails(service, user_id, query):    messages = []    page_token = None    while True:        response = service.users().messages().list(userId=user_id, q=query, pageToken=page_token).execute()        messages.extend(response.get('messages', []))        page_token = response.get('nextPageToken')        if not page_token:            break    return messagesdef apply_or_create_label(service, user_id, message_id, label_name):    try:        label_id = get_label_id(service, user_id, label_name)        if label_id:            apply_label(service, user_id, message_id, label_id)        else:            create_label(service, user_id, label_name)            label_id = get_label_id(service, user_id, label_name)            if label_id:                apply_label(service, user_id, message_id, label_id)            else:                print("Failed to apply label:", label_name)    except Exception as e:        print("Error applying or creating label:", e)def main():    creds = None    if os.path.exists('token.json'):        creds = Credentials.from_authorized_user_file('token.json')    if not creds or not creds.valid:        if creds and creds.expired and creds.refresh_token:            creds.refresh(Request())        else:            flow = InstalledAppFlow.from_client_secrets_file('credentials.json', SCOPES)            creds = flow.run_local_server(port=0)        with open('token.json', 'w') as token:            token.write(creds.to_json())    service = build('gmail', 'v1', credentials=creds)    user_id = 'me'    query = 'in:all'    messages = get_emails(service, user_id, query)    previous_label = None    for message in messages:        msg = service.users().messages().get(userId=user_id, id=message['id']).execute()        headers = msg['payload'].get('headers', [])        date_header = next((header['value'] for header in headers if header['name'] == 'Date'), None)        if date_header:            label_name = extract_year_month(date_header)            if label_name:                try:                    apply_or_create_label(service, user_id, message['id'], label_name)                    previous_label = label_name                except Exception as e:                    print("Error applying or creating label:", e)                    if previous_label:                        apply_or_create_label(service, user_id, message['id'], previous_label)                    else:                        print("No previous label found to apply.")        else:            print("Date header not found in email, skipping...")            if previous_label:                apply_or_create_label(service, user_id, message['id'], previous_label)            else:                print("No previous label found to apply.")if __name__ == '__main__':    main()

Script briefly explained:

  • Scan "All Mails" and extract the date to sort them by year and month.
  • I tried to normalias the date, as sometimes it shows a week day at the beginning or different time zone values (CET), GMT, etc. at the end.
  • When the date of an email couldn't be fetched, the label of the previeous email will be applied as fallback.

Questions:

  1. I would like to create a parent label "yearly overview", it should contain the years as sub labels and each year should contain the months. I tried to do so, but whenever I created the "yearly overview" parent label, the sub labels kept showing up on the same level. (I can post my non-working code here if needed).

  2. Is there any way to improve the speed of this script or as there is no data stored on the local machine there is not much I can do?

  3. A minor issue I can live with, as emails have different timezones, sometimes around midnight emails are not sorted correctly. If there would be a quick fix I take it, but it is not really bothering me.

Thanks for any help!


Viewing all articles
Browse latest Browse all 19054

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>