Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 13891

streamlit app for whatsapp chat analysis having error code 403

$
0
0

im trying to build a streamlit app which accepts a whatsapp chat txt file as input and renders some graphical visualization of analysis i conduct on it in python . it is letting me select files , but showing error 403. i will be honest , i was trying to get a website kinda thing where i would input a text file and output will be graphs and gpt generated the streamlit part of the code ( analysis code is still mine ) .

heres the full code if necessary , ihave started the streamlit part with bold

import streamlit as stimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport refrom collections import Counterfrom nltk.corpus import stopwordsfrom nltk.tokenize import TweetTokenizerfrom wordcloud import WordCloudimport warnings# Silence all warningswarnings.filterwarnings("ignore")st.set_option('deprecation.showPyplotGlobalUse', False)class WhatsappChatAnalysis:    def __init__(self, txt):        self.txt = txt        sns.set(style='whitegrid')    def cleaning_organizing_etx(self):        content = self.txt.strip()        dttpattern = '\d{2}/\d{2}/\d{4},\s\d{2}:\d{2}\s-\s'        dates = re.findall(pattern=dttpattern, string=content)[1:]        messages = re.split(pattern=dttpattern, string=content)[2:]        df = pd.DataFrame({'date': dates, 'msg': messages})        df['datetime'] = pd.to_datetime(df['date'], format='%d/%m/%Y, %H:%M - ')        df['time'] = df['datetime'].dt.time        df['date'] = df['datetime'].dt.date        df['day'] = df['datetime'].dt.day_name()        df['yearmonth'] = df['datetime'].dt.to_period('M')        df['msg'] = df['msg'].apply(lambda a: a.strip())        df = df[df['msg'].str.contains('^([\s\S]+?):.*')]        df['sender'] = df['msg'].apply(lambda a: re.search(pattern='^([\s\S]+?):.*', string=a).groups()[0])        df['msg'] = df['msg'].apply(lambda a: ''.join(re.findall(pattern='.*:\s(.*)', string=a, flags=re.DOTALL)))        df['message_word_count'] = df['msg'].apply(lambda a: len(a.split()))        df['hour'] = df['datetime'].dt.hour        bins = [0, 6, 12, 18, 24]        labels = ['00-6', '6-12', '12-18', '18-24']        df['time_of_day'] = pd.cut(df['hour'], bins=bins, labels=labels, right=False)        return df    def delays_per_month(self, dfm):        tempdf = dfm[['sender', 'datetime', 'yearmonth']]        f = tempdf[tempdf['sender'].ne(tempdf['sender'].shift())]        alldiffs = f['datetime'].diff()        s1, s2 = f['sender'].iloc[0], f['sender'].iloc[1]        s2delays = alldiffs.iloc[1::2].dt.total_seconds() / 60        s1delays = alldiffs.iloc[0::2].dt.total_seconds() / 60        s2delaysbymonth = pd.DataFrame({'yearmonth': f[f['sender'] == s2]['yearmonth'], 'delays': s2delays})        s1delaysbymonth = pd.DataFrame({'yearmonth': f[f['sender'] == s1]['yearmonth'], 'delays': s1delays})        ys = s2delaysbymonth.groupby(s2delaysbymonth['yearmonth']).mean()        xs = [str(i) for i in ys.index]        ys1 = s1delaysbymonth.groupby(s1delaysbymonth['yearmonth']).mean()        xs1 = [str(i) for i in ys1.index]        plt.figure(figsize=(max(len(xs), len(xs1), 10), max(len(xs), len(xs1), 10)))        plt.plot(xs, ys, 'black', marker='o', label='{}'.format(s2))        plt.plot(xs1, ys1, 'red', marker='s', label='{}'.format(s1))        plt.title('Average delay between replies per month (in minutes)')        plt.yscale('log')        plt.legend()        st.pyplot()    def most_common_words(self, dfm):        content = " ".join(dfm['msg'])        words = TweetTokenizer().tokenize(content.lower())        stop_words = set(stopwords.words('english'))        stop_words.add('omitted')        stop_words.add('media')        stop_words.update(            ['yes', 'no', 'yeah', 'na', 'nah', 'neh', 'ill', "i'll", 'go', 'okay', 'would', 'could', 'lol', 'dont','ig', 'idk', 'imao', 'lmao', 'like', 'u', 'wanna', 'im', "i'm", 'i'])        with open('stop_hinglish.txt') as f:            hinglish_stopwords = f.read().split('\n')        stop_words.update(hinglish_stopwords)        filtered_words = [word for word in words if word.isalnum() and word not in stop_words]        word_counts = Counter(filtered_words)        newdic = {key: value for key, value in word_counts.most_common(200)}        wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(newdic)        plt.figure(figsize=(10, 5))        plt.imshow(wordcloud, interpolation='bilinear')        plt.axis('off')        st.pyplot()    def no_of_convos_by_time_of_day_per_month(self, dfm):        fd = pd.DataFrame(dfm.groupby(['yearmonth', 'time_of_day']).size().reset_index(name='count'))        xl = len(fd['yearmonth'])        plt.figure(figsize=(xl, xl))        plt.title('Number of texts per month grouped by time of day')        sns.barplot(data=fd, x=fd['yearmonth'], y=fd['count'], hue=fd['time_of_day'])        plt.title('No. of conversations by time of day per month')        st.pyplot()    def no_of_msgs_user_month(self, dfm):        c = dfm.groupby(['yearmonth', 'sender'])['msg'].count().reset_index(name='count')        senders = dfm['sender'].unique()        cj = c[c['sender'] == senders[0]]        ci = c[c['sender'] == senders[1]]        x1, y1 = cj['yearmonth'].apply(lambda a: str(a)), cj['count']        x2, y2 = ci['yearmonth'].apply(lambda a: str(a)), ci['count']        plt.figure(figsize=(max(len(x1), len(x2), 10), max(len(x1), len(x2), 10)))        sns.set_theme(style='whitegrid')        plt.plot(x1, y1, marker='o', color='black', label=senders[0])        plt.plot(x2, y2, marker='o', color='red', label=senders[1])        plt.legend()        plt.title('No. of messages per user per month')        st.pyplot()    def days_with_most_msgs(self, dfm):        dfm.groupby('date')['msg'].count().sort_values(ascending=False).head(5).plot.barh()        plt.title('Days with most messages')        st.pyplot()    def analysis(self):        df = self.cleaning_organizing_etx()        self.days_with_most_msgs(dfm=df)        self.delays_per_month(dfm=df)        self.most_common_words(dfm=df)        self.no_of_convos_by_time_of_day_per_month(dfm=df)        self.no_of_msgs_user_month(dfm=df)        sns.heatmap(            pd.pivot_table(df, values='msg', index='day', columns='time_of_day', aggfunc='count').fillna(0))        st.pyplot()st.title('WhatsApp Chat Analysis App')# st.title('WhatsApp Chat Analysis App')# Flag to check if the analysis has been performedanalysis_done = False# Button to trigger the analysisif st.button("Run Analysis"):    # Upload chat data    uploaded_file = st.file_uploader("Choose a WhatsApp chat text file")    # Check if a file has been uploaded    if uploaded_file is not None:        # Read chat data        content = uploaded_file.read().decode('utf-8',errors='ignore')        # Analyze chat using WhatsappChatAnalysis class        chat_analysis = WhatsappChatAnalysis(txt=content)        chat_analysis.analysis()        # Set the flag to indicate that analysis has been performed        analysis_done = True# Display additional information after analysisif analysis_done:    st.text("Additional information can be displayed here.")

Viewing all articles
Browse latest Browse all 13891

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>