im trying to build a streamlit app which accepts a whatsapp chat txt file as input and renders some graphical visualization of analysis i conduct on it in python . it is letting me select files , but showing error 403. i will be honest , i was trying to get a website kinda thing where i would input a text file and output will be graphs and gpt generated the streamlit part of the code ( analysis code is still mine ) .
heres the full code if necessary , ihave started the streamlit part with bold
import streamlit as stimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport refrom collections import Counterfrom nltk.corpus import stopwordsfrom nltk.tokenize import TweetTokenizerfrom wordcloud import WordCloudimport warnings# Silence all warningswarnings.filterwarnings("ignore")st.set_option('deprecation.showPyplotGlobalUse', False)class WhatsappChatAnalysis: def __init__(self, txt): self.txt = txt sns.set(style='whitegrid') def cleaning_organizing_etx(self): content = self.txt.strip() dttpattern = '\d{2}/\d{2}/\d{4},\s\d{2}:\d{2}\s-\s' dates = re.findall(pattern=dttpattern, string=content)[1:] messages = re.split(pattern=dttpattern, string=content)[2:] df = pd.DataFrame({'date': dates, 'msg': messages}) df['datetime'] = pd.to_datetime(df['date'], format='%d/%m/%Y, %H:%M - ') df['time'] = df['datetime'].dt.time df['date'] = df['datetime'].dt.date df['day'] = df['datetime'].dt.day_name() df['yearmonth'] = df['datetime'].dt.to_period('M') df['msg'] = df['msg'].apply(lambda a: a.strip()) df = df[df['msg'].str.contains('^([\s\S]+?):.*')] df['sender'] = df['msg'].apply(lambda a: re.search(pattern='^([\s\S]+?):.*', string=a).groups()[0]) df['msg'] = df['msg'].apply(lambda a: ''.join(re.findall(pattern='.*:\s(.*)', string=a, flags=re.DOTALL))) df['message_word_count'] = df['msg'].apply(lambda a: len(a.split())) df['hour'] = df['datetime'].dt.hour bins = [0, 6, 12, 18, 24] labels = ['00-6', '6-12', '12-18', '18-24'] df['time_of_day'] = pd.cut(df['hour'], bins=bins, labels=labels, right=False) return df def delays_per_month(self, dfm): tempdf = dfm[['sender', 'datetime', 'yearmonth']] f = tempdf[tempdf['sender'].ne(tempdf['sender'].shift())] alldiffs = f['datetime'].diff() s1, s2 = f['sender'].iloc[0], f['sender'].iloc[1] s2delays = alldiffs.iloc[1::2].dt.total_seconds() / 60 s1delays = alldiffs.iloc[0::2].dt.total_seconds() / 60 s2delaysbymonth = pd.DataFrame({'yearmonth': f[f['sender'] == s2]['yearmonth'], 'delays': s2delays}) s1delaysbymonth = pd.DataFrame({'yearmonth': f[f['sender'] == s1]['yearmonth'], 'delays': s1delays}) ys = s2delaysbymonth.groupby(s2delaysbymonth['yearmonth']).mean() xs = [str(i) for i in ys.index] ys1 = s1delaysbymonth.groupby(s1delaysbymonth['yearmonth']).mean() xs1 = [str(i) for i in ys1.index] plt.figure(figsize=(max(len(xs), len(xs1), 10), max(len(xs), len(xs1), 10))) plt.plot(xs, ys, 'black', marker='o', label='{}'.format(s2)) plt.plot(xs1, ys1, 'red', marker='s', label='{}'.format(s1)) plt.title('Average delay between replies per month (in minutes)') plt.yscale('log') plt.legend() st.pyplot() def most_common_words(self, dfm): content = " ".join(dfm['msg']) words = TweetTokenizer().tokenize(content.lower()) stop_words = set(stopwords.words('english')) stop_words.add('omitted') stop_words.add('media') stop_words.update( ['yes', 'no', 'yeah', 'na', 'nah', 'neh', 'ill', "i'll", 'go', 'okay', 'would', 'could', 'lol', 'dont','ig', 'idk', 'imao', 'lmao', 'like', 'u', 'wanna', 'im', "i'm", 'i']) with open('stop_hinglish.txt') as f: hinglish_stopwords = f.read().split('\n') stop_words.update(hinglish_stopwords) filtered_words = [word for word in words if word.isalnum() and word not in stop_words] word_counts = Counter(filtered_words) newdic = {key: value for key, value in word_counts.most_common(200)} wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(newdic) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis('off') st.pyplot() def no_of_convos_by_time_of_day_per_month(self, dfm): fd = pd.DataFrame(dfm.groupby(['yearmonth', 'time_of_day']).size().reset_index(name='count')) xl = len(fd['yearmonth']) plt.figure(figsize=(xl, xl)) plt.title('Number of texts per month grouped by time of day') sns.barplot(data=fd, x=fd['yearmonth'], y=fd['count'], hue=fd['time_of_day']) plt.title('No. of conversations by time of day per month') st.pyplot() def no_of_msgs_user_month(self, dfm): c = dfm.groupby(['yearmonth', 'sender'])['msg'].count().reset_index(name='count') senders = dfm['sender'].unique() cj = c[c['sender'] == senders[0]] ci = c[c['sender'] == senders[1]] x1, y1 = cj['yearmonth'].apply(lambda a: str(a)), cj['count'] x2, y2 = ci['yearmonth'].apply(lambda a: str(a)), ci['count'] plt.figure(figsize=(max(len(x1), len(x2), 10), max(len(x1), len(x2), 10))) sns.set_theme(style='whitegrid') plt.plot(x1, y1, marker='o', color='black', label=senders[0]) plt.plot(x2, y2, marker='o', color='red', label=senders[1]) plt.legend() plt.title('No. of messages per user per month') st.pyplot() def days_with_most_msgs(self, dfm): dfm.groupby('date')['msg'].count().sort_values(ascending=False).head(5).plot.barh() plt.title('Days with most messages') st.pyplot() def analysis(self): df = self.cleaning_organizing_etx() self.days_with_most_msgs(dfm=df) self.delays_per_month(dfm=df) self.most_common_words(dfm=df) self.no_of_convos_by_time_of_day_per_month(dfm=df) self.no_of_msgs_user_month(dfm=df) sns.heatmap( pd.pivot_table(df, values='msg', index='day', columns='time_of_day', aggfunc='count').fillna(0)) st.pyplot()st.title('WhatsApp Chat Analysis App')# st.title('WhatsApp Chat Analysis App')# Flag to check if the analysis has been performedanalysis_done = False# Button to trigger the analysisif st.button("Run Analysis"): # Upload chat data uploaded_file = st.file_uploader("Choose a WhatsApp chat text file") # Check if a file has been uploaded if uploaded_file is not None: # Read chat data content = uploaded_file.read().decode('utf-8',errors='ignore') # Analyze chat using WhatsappChatAnalysis class chat_analysis = WhatsappChatAnalysis(txt=content) chat_analysis.analysis() # Set the flag to indicate that analysis has been performed analysis_done = True# Display additional information after analysisif analysis_done: st.text("Additional information can be displayed here.")