Initial Commit

9bab9fb0 · Siegfried Albrecht · d051a57b · 9bab9fb0
Commit 9bab9fb0 authored Sep 2, 2023 by Siegfried Albrecht
--- a/app.py
+++ b/app.py
+from math import floor
+from dash import Dash, html, dcc, callback, Output, Input, dash_table
+import plotly.express as px
+from dash.exceptions import PreventUpdate
+import pandas as pd
+from helper import load_json, convert_minutes_to_hours_and_minutes
+from datetime import datetime
+
+
+def get_streams_on_daytime(stream_hist: dict, bucket_size_in_min: int = 15) -> dict:
+    datetimes = [datetime.strptime(item["endTime"], "%Y-%m-%d %H:%M") for item in stream_hist ]
+    day_times = [datetime.strftime(date, "%H:%M") for date in datetimes]
+    day_times_in_minutes = [date.hour*60+date.minute for date in datetimes]
+    
+    amount_of_buckets = 96 # 15 minute buckets
+    if bucket_size_in_min > 0 and bucket_size_in_min < 1440:
+        amount_of_buckets = (24*60)//bucket_size_in_min
+    buckets = [0 for x in range(amount_of_buckets)]
+    for i, time in enumerate(day_times_in_minutes):
+        buckets[time//bucket_size_in_min] += 1
+    bucket_dict = dict()
+    for i, stream_count in enumerate(buckets):
+        bucket_dict[f"{convert_minutes_to_hours_and_minutes(i*bucket_size_in_min)} - {convert_minutes_to_hours_and_minutes((i+1)*bucket_size_in_min-1)}"] = stream_count
+    
+    return bucket_dict
+
+stream_hist_json = load_json("./StreamingHistory.json")
+u_data_df = pd.DataFrame(load_json('./Userdata.json').items())
+
+def get_inference_collector(row):
+    if row[0].startswith('3P_'):
+        val = 'Third Party'
+    elif row[0].startswith('1P_'):
+        val = 'Spotify'
+    else: 
+        val = 'Undefined'
+    return val
+inferences_json = load_json('./Inferences.json')
+inferences_df = pd.DataFrame(inferences_json)
+
+inferences_df['Collected By'] = inferences_df.apply(get_inference_collector, axis=1)
+inferences_df = inferences_df.replace({'3P_':'','1P_':'', 'Custom__': '', 'Custom_': ''}, regex=True)
+inferences_df.rename(inplace=True, columns={'inferences': 'Inference'})
+
+stream_hist = pd.read_json("./StreamingHistory.json")
+stream_hist = stream_hist.rename(columns={"endTime": "Stream stopped", "artistName": "Artist", "trackName": "Track", "msPlayed": "Track Duration"})
+stream_hist["Track Duration Formatted"] = pd.to_datetime(stream_hist['Track Duration'], unit='ms').dt.strftime('%M:%S')
+stream_hist['Artist Streams'] = stream_hist.groupby(['Artist'])['Artist'].transform('count')
+stream_hist['Track Streams'] = stream_hist.groupby(['Track'])['Track'].transform('count')
+stream_hist['Date'] = pd.to_datetime(stream_hist["Stream stopped"]).dt.date
+stream_hist['Weekday'] = pd.to_datetime(stream_hist["Stream stopped"]).dt.day_of_week
+stream_hist['Time'] = pd.to_datetime(stream_hist["Stream stopped"]).dt.time
+stream_hist['Streams on Weekday'] = stream_hist.groupby(['Weekday'])['Weekday'].transform('count')
+stream_hist['Streams on Day'] = stream_hist.groupby(['Date'])['Date'].transform('count')
+stream_hist['Streams on Time'] = stream_hist.groupby(['Time'])['Time'].transform('count')
+streams_on_daytime = pd.DataFrame(get_streams_on_daytime(stream_hist=stream_hist_json, bucket_size_in_min=60).items())
+streams_on_daytime.columns = ["Timerange", "Streams"]
+userdata_table = dash_table.DataTable(data = stream_hist, columns = [{"name": i, "id": i} for i in u_data_df.columns])
+
+#####Graphs
+streaming_hist_over_time = px.bar(data_frame=stream_hist.drop_duplicates(subset='Date', keep='first'), 
+                                  x="Date", 
+                                  y='Streams on Day', 
+                                  color_discrete_sequence=["#1ed760"])
+streaming_hist_over_time.update_layout({'plot_bgcolor': '#d8f0d8'})
+streaming_hist_daily = px.bar(data_frame=streams_on_daytime, 
+                              x="Timerange", 
+                              y='Streams', 
+                              labels={"Timerange": "Timerange", "Streams": "Streams per Hour"}, 
+                              color_discrete_sequence=["#1ed760"])
+streaming_hist_daily.update_layout({'plot_bgcolor': '#d8f0d8'})
+streaming_hist_weekly = px.bar(data_frame=stream_hist.drop_duplicates(subset='Weekday', keep='first'), 
+                               x = ["Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun"], 
+                               y='Streams on Weekday', 
+                               color_discrete_sequence=["#1ed760"])
+streaming_hist_weekly.update_layout({'plot_bgcolor': '#d8f0d8'})
+##### app
+
+
+
+
+
+app = Dash(__name__)
+
+app.layout = html.Div(className= 'section', children=[
+    html.H1(children='Spotify Data Dashboard'),
+    # Userdata Section
+    html.Div(id="userdata_container", className='section',
+        children=[
+            html.H2(children='Your Userdata'),
+            html.Table(id="userdata_table", style={'width': '100%',"border-collapse": "collapse", },
+                children=[html.Colgroup(
+                    children=[
+                        html.Col(style={'width': '50%'}), 
+                        html.Col(style={'width': '50%'})]),
+                        html.Tbody(style={"max-inline-size": "100%","inline-size": "100%","overflow-wrap": "break-word"},
+                            children=[
+                                html.Tr(style={"border-bottom": "1px solid var(--decorative-subdued,#dedede)","color": "var(--text-subdued,#6a6a6a)"},
+                                    children=[
+                                        html.Td(style={"text-align": "start","color": "var(--text-subdued,#6a6a6a)","padding": "1em","padding-inline-start": "0px","white-space": "nowrap"},
+                                            children=[
+                                                str(row[0]).capitalize()
+                                                ]), 
+                                        html.Td(style={"text-align": "start","color": "var(--text-base,#000000)","padding": "1em 0px","word-break": "break-all"},
+                                            children=[
+                                                str(row[1])
+                                                ])
+                                        ]) 
+                                        for index, row in u_data_df.iterrows()])])],),
+    # Inferences Section
+    html.Div(id='inferences_container', className='section',
+             children=
+             [
+                 html.H2(children='Inferences'),
+                 html.P(children='Spotify has Inferences, that they conclude themself and Inferences from Third Parties, eg. Google or Facebook or other Data Brokers.'),
+                 html.Div(children=[
+                    dash_table.DataTable(id='inferences_table',
+                                           data=inferences_df.to_dict('records'),
+                                           page_size=10, 
+                                           columns=[{"name": i, "id": i, 'hideable': 'last'} for i in inferences_df.columns],
+                                           style_table={"overflowX": "scroll"},
+                                           hidden_columns=[],
+                                           style_cell={'text-align':'left', 'font': '1em/1.3 My Gill Sans, Lato, sans-serif','font-size-adjust': '0.45'},
+                                           style_header={'text-align':'left','font-weight': 'bold', 'font': '1.156em/1.3 My Gill Sans, Lato, sans-serif'},
+                                           sort_action='native',
+                                           filter_action='native'
+                                           )]),
+                 
+             ]
+             ),
+    # Streaming History Section
+    html.Div(id="streaming_history_container", className='section',
+             children=[html.H2(children='Streaming History'),
+                       html.P(children=['In the following section, you can see and play around with data from your streaming history.']),
+                       html.P(children=['''In this table you can sort each column and filter the data for each column. 
+                                        You can also filter multiple columns. The filter is case sensitive, 
+                                        but can be made case insensitive by clicking on the 'Aa' Button
+                                        
+                                        ''']),
+                       html.Div(children=[
+                           dash_table.DataTable(id='stream_history_table',
+                                            data=stream_hist.to_dict('records'),
+                                           page_size=10, 
+                                           columns=[{"name": i, "id": i, 'hideable': 'last'} for i in stream_hist.columns],
+                                           style_table={"overflowX": "scroll"},
+                                           hidden_columns=[],
+                                           style_cell={'text-align':'left', 'font': '1em/1.3 My Gill Sans, Lato, sans-serif','font-size-adjust': '0.45'},
+                                           style_header={'text-align':'left','font-weight': 'bold', 'font': '1.156em/1.3 My Gill Sans, Lato, sans-serif'},
+                                           sort_action='native',
+                                           filter_action='native'
+                                           )])]),
+                       
+                        html.P(children=['The following Graphs show you when you listened to music on Spotify. ',
+                                         'You can zoom, pan and explore the graph with the menu in the upper right corner.']),
+                        dcc.Dropdown(options = ['Daily Streams', 'Weekly Streams', 'Streams Overall'],
+                                    value = 'Daily Streams',
+                                    id = 'streams_per_x_dropdown'),
+                        dcc.Graph(id='streams_per_x_output'),
+    # Show Top Artists and Tracks Section
+    html.Div(className='section', id='top_section',
+        children=[
+            html.H3(children=['Top Artists']),
+            html.Div(style={ 'display': 'flex'},children=[
+                html.Label(children=['Show Top:']),
+                dcc.Input(style={'margin-left': '1em'}, id='top_x_artist_input', type='number', value=10),
+                html.Label(style={'margin-left': '4em'}, children=['Rate by:']),
+                dcc.RadioItems(style={'margin-left': '1em'},id='switch_top__artist_method_radioItems', options=['Streams', 'Time'], value='Streams', inline=True)    
+                ]),
+            dcc.Graph(id='top_artists_graph'),
+            
+            html.H3(children=['Top Tracks']),
+            html.Div(style={ 'display': 'flex'},children=[
+                html.Label(children=['Show Top:']),
+                dcc.Input(style={'margin-left': '1em'}, id='top_x_track_input', type='number', value=10),    
+                html.Label(style={'margin-left': '4em'}, children=['Rate by:']),
+                dcc.RadioItems(style={'margin-left': '1em'},id='switch_top_track_method_radioItems', options=['Streams', 'Time'], value='Streams', inline=True)
+                ]),
+            
+            dcc.Graph(id='top_tracks_graph'),
+            
+        ]),
+    # SHow, when you listened to which artist and tracks
+    html.Div(className='section', id='listenening_habits_over_time',
+            children=[
+             dcc.DatePickerRange(id='artist_listened_datepicker',
+                                 min_date_allowed=stream_hist['Date'].min(),
+                                 max_date_allowed=stream_hist['Date'].max(),
+                                 start_date=stream_hist['Date'].min(),
+                                 end_date=stream_hist['Date'].max()),
+            ]),
+            dcc.Dropdown(id='artist_listened_dropdown', options=stream_hist['Artist'].drop_duplicates(), multi=True),
+            dcc.Graph(id='artist_listened_timeline')
+                        
+    
+    
+    ])
+@callback(Output('artist_listened_timeline', 'figure'), [Input('artist_listened_dropdown', 'value'), Input('artist_listened_datepicker', 'start_date'), Input('artist_listened_datepicker', 'end_date')])
+def update_artist_listened_timeline(artist_list, start_date, end_date):
+    if artist_list is None:
+        raise PreventUpdate
+    s_date = datetime.strptime(start_date, '%Y-%m-%d').date()
+    e_date = datetime.strptime(end_date, '%Y-%m-%d').date()
+    data = stream_hist.loc[(stream_hist['Date'] >= s_date) & (stream_hist['Date'] <= e_date)]
+    data = data.loc[data['Artist'].isin(artist_list) ]
+    data = data.drop_duplicates(subset=['Artist', 'Date'])
+    data['enddate'] = data["Date"] + pd.Timedelta('1 day')
+    print(data['enddate'])
+    timeline = px.timeline(data_frame=data, x_start="Date", x_end='enddate', y='Artist', color_discrete_sequence=["#1ed760"])
+    timeline.update_layout({'plot_bgcolor': '#d8f0d8'})
+    return timeline
+    
+@callback(Output('top_artists_graph', 'figure'), [Input('top_x_artist_input', 'value'), Input('switch_top__artist_method_radioItems', 'value')])
+def update_top_artists(top_x_number, rating_method):
+    if top_x_number is None:
+        raise PreventUpdate
+    match rating_method:
+        case 'Streams':
+            data = ''
+            if top_x_number >= stream_hist['Artist'].drop_duplicates().count():
+                data = stream_hist.drop_duplicates(subset='Artist').sort_values(by='Artist Streams', ascending=False)
+            else:
+                data = stream_hist.drop_duplicates(subset='Artist').nlargest(top_x_number, columns=['Artist Streams'])
+            fig = px.bar(data_frame=data, x='Artist Streams', y='Artist', color_discrete_sequence=["#1ed760"])
+            fig.update_layout(yaxis=dict(autorange="reversed"))
+            fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return fig
+        case 'Time':
+            stream_hist['Total Time By Artist'] = stream_hist.groupby('Artist')['Track Duration'].transform('sum')
+            stream_hist['Total Time By Artist'] = [floor(int(t) / 1000 / 60) for t in stream_hist['Total Time By Artist']]
+            if top_x_number >= stream_hist['Artist'].drop_duplicates().count():
+                data = stream_hist.drop_duplicates(subset='Artist').sort_values(by='Total Time By Artist', ascending=False)
+            else:
+                data = stream_hist.drop_duplicates(subset='Artist').nlargest(top_x_number, columns=['Total Time By Artist'])
+            fig = px.bar(data_frame=data, x='Total Time By Artist', y='Artist', color_discrete_sequence=["#1ed760"])
+            fig.update_layout(yaxis=dict(autorange="reversed"))
+            fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return fig            
+            
+@callback(Output('top_tracks_graph', 'figure'), [Input('top_x_track_input', 'value'), Input('switch_top_track_method_radioItems', 'value')])
+def update_top_tracks(top_x_number, rating_method):
+    if top_x_number is None:
+        raise PreventUpdate
+    match rating_method:
+        case 'Streams':
+            data = ''
+            if top_x_number >= stream_hist['Track'].drop_duplicates().count():
+                data = stream_hist.drop_duplicates(subset='Track').sort_values(by='Track Streams', ascending=False)
+            else:
+                data = stream_hist.drop_duplicates(subset='Track').nlargest(top_x_number, columns=['Track Streams'])
+            fig = px.bar(data_frame=data, x='Track Streams', y='Track', color_discrete_sequence=["#1ed760"])
+            fig.update_layout(yaxis=dict(autorange="reversed"))
+            fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return fig
+        case 'Time':
+            stream_hist['Total Time By Track'] = stream_hist.groupby('Track')['Track Duration'].transform('sum')
+            stream_hist['Total Time By Track'] = [floor(int(t) / 1000 / 60) for t in stream_hist['Total Time By Track']]
+            if top_x_number >= stream_hist['Track'].drop_duplicates().count():
+                data = stream_hist.drop_duplicates(subset='Track').sort_values(by='Total Time By Track', ascending=False)
+            else:
+                data = stream_hist.drop_duplicates(subset='Track').nlargest(top_x_number, columns=['Total Time By Track'])
+            fig = px.bar(data_frame=data, x='Total Time By Track', y='Track', color_discrete_sequence=["#1ed760"])
+            fig.update_layout(yaxis=dict(autorange="reversed"))
+            fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return fig            
+            
+   
+
+@callback(Output('streams_per_x_output', 'figure'), Input('streams_per_x_dropdown', 'value'))
+def update_streams_over_time_output(value):
+    match value:
+        case 'Daily Streams':
+            streaming_hist_daily_fig = px.bar(data_frame=streams_on_daytime, 
+                              x="Timerange", 
+                              y='Streams',  
+                              color_discrete_sequence=["#1ed760"])
+            streaming_hist_daily_fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return streaming_hist_daily_fig
+        case 'Weekly Streams':
+            streaming_hist_weekly_fig = px.bar(data_frame=stream_hist.drop_duplicates(subset='Weekday', keep='first'), 
+                               x = ["Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun"], 
+                               y='Streams on Weekday',
+                               labels= {'Streams on Weekday': 'Streams', 'x': "Weekday"}, 
+                               color_discrete_sequence=["#1ed760"])
+            streaming_hist_weekly_fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return  streaming_hist_weekly_fig
+        case 'Streams Overall':
+            streaming_hist_over_time_fig = px.bar(data_frame=stream_hist.drop_duplicates(subset='Date', keep='first'), 
+                                  x="Date", 
+                                  y='Streams on Day', 
+                                  labels = {'Streams on Day': 'Streams'},
+                                  color_discrete_sequence=["#1ed760"])
+            streaming_hist_over_time_fig.update_layout({'plot_bgcolor': '#d8f0d8'})
+            return streaming_hist_over_time_fig
+
+if __name__ == '__main__':
+    app.run(debug=True, host='192.168.0.109', port=8050)
+    
+
+
+
+
+