Skip to content
Snippets Groups Projects
Commit 9bab9fb0 authored by Siegfried Albrecht's avatar Siegfried Albrecht
Browse files

Initial Commit

parent d051a57b
No related branches found
No related tags found
No related merge requests found
app.py 0 → 100644
from math import floor
from dash import Dash, html, dcc, callback, Output, Input, dash_table
import plotly.express as px
from dash.exceptions import PreventUpdate
import pandas as pd
from helper import load_json, convert_minutes_to_hours_and_minutes
from datetime import datetime
def get_streams_on_daytime(stream_hist: dict, bucket_size_in_min: int = 15) -> dict:
datetimes = [datetime.strptime(item["endTime"], "%Y-%m-%d %H:%M") for item in stream_hist ]
day_times = [datetime.strftime(date, "%H:%M") for date in datetimes]
day_times_in_minutes = [date.hour*60+date.minute for date in datetimes]
amount_of_buckets = 96 # 15 minute buckets
if bucket_size_in_min > 0 and bucket_size_in_min < 1440:
amount_of_buckets = (24*60)//bucket_size_in_min
buckets = [0 for x in range(amount_of_buckets)]
for i, time in enumerate(day_times_in_minutes):
buckets[time//bucket_size_in_min] += 1
bucket_dict = dict()
for i, stream_count in enumerate(buckets):
bucket_dict[f"{convert_minutes_to_hours_and_minutes(i*bucket_size_in_min)} - {convert_minutes_to_hours_and_minutes((i+1)*bucket_size_in_min-1)}"] = stream_count
return bucket_dict
stream_hist_json = load_json("./StreamingHistory.json")
u_data_df = pd.DataFrame(load_json('./Userdata.json').items())
def get_inference_collector(row):
if row[0].startswith('3P_'):
val = 'Third Party'
elif row[0].startswith('1P_'):
val = 'Spotify'
else:
val = 'Undefined'
return val
inferences_json = load_json('./Inferences.json')
inferences_df = pd.DataFrame(inferences_json)
inferences_df['Collected By'] = inferences_df.apply(get_inference_collector, axis=1)
inferences_df = inferences_df.replace({'3P_':'','1P_':'', 'Custom__': '', 'Custom_': ''}, regex=True)
inferences_df.rename(inplace=True, columns={'inferences': 'Inference'})
stream_hist = pd.read_json("./StreamingHistory.json")
stream_hist = stream_hist.rename(columns={"endTime": "Stream stopped", "artistName": "Artist", "trackName": "Track", "msPlayed": "Track Duration"})
stream_hist["Track Duration Formatted"] = pd.to_datetime(stream_hist['Track Duration'], unit='ms').dt.strftime('%M:%S')
stream_hist['Artist Streams'] = stream_hist.groupby(['Artist'])['Artist'].transform('count')
stream_hist['Track Streams'] = stream_hist.groupby(['Track'])['Track'].transform('count')
stream_hist['Date'] = pd.to_datetime(stream_hist["Stream stopped"]).dt.date
stream_hist['Weekday'] = pd.to_datetime(stream_hist["Stream stopped"]).dt.day_of_week
stream_hist['Time'] = pd.to_datetime(stream_hist["Stream stopped"]).dt.time
stream_hist['Streams on Weekday'] = stream_hist.groupby(['Weekday'])['Weekday'].transform('count')
stream_hist['Streams on Day'] = stream_hist.groupby(['Date'])['Date'].transform('count')
stream_hist['Streams on Time'] = stream_hist.groupby(['Time'])['Time'].transform('count')
streams_on_daytime = pd.DataFrame(get_streams_on_daytime(stream_hist=stream_hist_json, bucket_size_in_min=60).items())
streams_on_daytime.columns = ["Timerange", "Streams"]
userdata_table = dash_table.DataTable(data = stream_hist, columns = [{"name": i, "id": i} for i in u_data_df.columns])
#####Graphs
streaming_hist_over_time = px.bar(data_frame=stream_hist.drop_duplicates(subset='Date', keep='first'),
x="Date",
y='Streams on Day',
color_discrete_sequence=["#1ed760"])
streaming_hist_over_time.update_layout({'plot_bgcolor': '#d8f0d8'})
streaming_hist_daily = px.bar(data_frame=streams_on_daytime,
x="Timerange",
y='Streams',
labels={"Timerange": "Timerange", "Streams": "Streams per Hour"},
color_discrete_sequence=["#1ed760"])
streaming_hist_daily.update_layout({'plot_bgcolor': '#d8f0d8'})
streaming_hist_weekly = px.bar(data_frame=stream_hist.drop_duplicates(subset='Weekday', keep='first'),
x = ["Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun"],
y='Streams on Weekday',
color_discrete_sequence=["#1ed760"])
streaming_hist_weekly.update_layout({'plot_bgcolor': '#d8f0d8'})
##### app
app = Dash(__name__)
app.layout = html.Div(className= 'section', children=[
html.H1(children='Spotify Data Dashboard'),
# Userdata Section
html.Div(id="userdata_container", className='section',
children=[
html.H2(children='Your Userdata'),
html.Table(id="userdata_table", style={'width': '100%',"border-collapse": "collapse", },
children=[html.Colgroup(
children=[
html.Col(style={'width': '50%'}),
html.Col(style={'width': '50%'})]),
html.Tbody(style={"max-inline-size": "100%","inline-size": "100%","overflow-wrap": "break-word"},
children=[
html.Tr(style={"border-bottom": "1px solid var(--decorative-subdued,#dedede)","color": "var(--text-subdued,#6a6a6a)"},
children=[
html.Td(style={"text-align": "start","color": "var(--text-subdued,#6a6a6a)","padding": "1em","padding-inline-start": "0px","white-space": "nowrap"},
children=[
str(row[0]).capitalize()
]),
html.Td(style={"text-align": "start","color": "var(--text-base,#000000)","padding": "1em 0px","word-break": "break-all"},
children=[
str(row[1])
])
])
for index, row in u_data_df.iterrows()])])],),
# Inferences Section
html.Div(id='inferences_container', className='section',
children=
[
html.H2(children='Inferences'),
html.P(children='Spotify has Inferences, that they conclude themself and Inferences from Third Parties, eg. Google or Facebook or other Data Brokers.'),
html.Div(children=[
dash_table.DataTable(id='inferences_table',
data=inferences_df.to_dict('records'),
page_size=10,
columns=[{"name": i, "id": i, 'hideable': 'last'} for i in inferences_df.columns],
style_table={"overflowX": "scroll"},
hidden_columns=[],
style_cell={'text-align':'left', 'font': '1em/1.3 My Gill Sans, Lato, sans-serif','font-size-adjust': '0.45'},
style_header={'text-align':'left','font-weight': 'bold', 'font': '1.156em/1.3 My Gill Sans, Lato, sans-serif'},
sort_action='native',
filter_action='native'
)]),
]
),
# Streaming History Section
html.Div(id="streaming_history_container", className='section',
children=[html.H2(children='Streaming History'),
html.P(children=['In the following section, you can see and play around with data from your streaming history.']),
html.P(children=['''In this table you can sort each column and filter the data for each column.
You can also filter multiple columns. The filter is case sensitive,
but can be made case insensitive by clicking on the 'Aa' Button
''']),
html.Div(children=[
dash_table.DataTable(id='stream_history_table',
data=stream_hist.to_dict('records'),
page_size=10,
columns=[{"name": i, "id": i, 'hideable': 'last'} for i in stream_hist.columns],
style_table={"overflowX": "scroll"},
hidden_columns=[],
style_cell={'text-align':'left', 'font': '1em/1.3 My Gill Sans, Lato, sans-serif','font-size-adjust': '0.45'},
style_header={'text-align':'left','font-weight': 'bold', 'font': '1.156em/1.3 My Gill Sans, Lato, sans-serif'},
sort_action='native',
filter_action='native'
)])]),
html.P(children=['The following Graphs show you when you listened to music on Spotify. ',
'You can zoom, pan and explore the graph with the menu in the upper right corner.']),
dcc.Dropdown(options = ['Daily Streams', 'Weekly Streams', 'Streams Overall'],
value = 'Daily Streams',
id = 'streams_per_x_dropdown'),
dcc.Graph(id='streams_per_x_output'),
# Show Top Artists and Tracks Section
html.Div(className='section', id='top_section',
children=[
html.H3(children=['Top Artists']),
html.Div(style={ 'display': 'flex'},children=[
html.Label(children=['Show Top:']),
dcc.Input(style={'margin-left': '1em'}, id='top_x_artist_input', type='number', value=10),
html.Label(style={'margin-left': '4em'}, children=['Rate by:']),
dcc.RadioItems(style={'margin-left': '1em'},id='switch_top__artist_method_radioItems', options=['Streams', 'Time'], value='Streams', inline=True)
]),
dcc.Graph(id='top_artists_graph'),
html.H3(children=['Top Tracks']),
html.Div(style={ 'display': 'flex'},children=[
html.Label(children=['Show Top:']),
dcc.Input(style={'margin-left': '1em'}, id='top_x_track_input', type='number', value=10),
html.Label(style={'margin-left': '4em'}, children=['Rate by:']),
dcc.RadioItems(style={'margin-left': '1em'},id='switch_top_track_method_radioItems', options=['Streams', 'Time'], value='Streams', inline=True)
]),
dcc.Graph(id='top_tracks_graph'),
]),
# SHow, when you listened to which artist and tracks
html.Div(className='section', id='listenening_habits_over_time',
children=[
dcc.DatePickerRange(id='artist_listened_datepicker',
min_date_allowed=stream_hist['Date'].min(),
max_date_allowed=stream_hist['Date'].max(),
start_date=stream_hist['Date'].min(),
end_date=stream_hist['Date'].max()),
]),
dcc.Dropdown(id='artist_listened_dropdown', options=stream_hist['Artist'].drop_duplicates(), multi=True),
dcc.Graph(id='artist_listened_timeline')
])
@callback(Output('artist_listened_timeline', 'figure'), [Input('artist_listened_dropdown', 'value'), Input('artist_listened_datepicker', 'start_date'), Input('artist_listened_datepicker', 'end_date')])
def update_artist_listened_timeline(artist_list, start_date, end_date):
if artist_list is None:
raise PreventUpdate
s_date = datetime.strptime(start_date, '%Y-%m-%d').date()
e_date = datetime.strptime(end_date, '%Y-%m-%d').date()
data = stream_hist.loc[(stream_hist['Date'] >= s_date) & (stream_hist['Date'] <= e_date)]
data = data.loc[data['Artist'].isin(artist_list) ]
data = data.drop_duplicates(subset=['Artist', 'Date'])
data['enddate'] = data["Date"] + pd.Timedelta('1 day')
print(data['enddate'])
timeline = px.timeline(data_frame=data, x_start="Date", x_end='enddate', y='Artist', color_discrete_sequence=["#1ed760"])
timeline.update_layout({'plot_bgcolor': '#d8f0d8'})
return timeline
@callback(Output('top_artists_graph', 'figure'), [Input('top_x_artist_input', 'value'), Input('switch_top__artist_method_radioItems', 'value')])
def update_top_artists(top_x_number, rating_method):
if top_x_number is None:
raise PreventUpdate
match rating_method:
case 'Streams':
data = ''
if top_x_number >= stream_hist['Artist'].drop_duplicates().count():
data = stream_hist.drop_duplicates(subset='Artist').sort_values(by='Artist Streams', ascending=False)
else:
data = stream_hist.drop_duplicates(subset='Artist').nlargest(top_x_number, columns=['Artist Streams'])
fig = px.bar(data_frame=data, x='Artist Streams', y='Artist', color_discrete_sequence=["#1ed760"])
fig.update_layout(yaxis=dict(autorange="reversed"))
fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return fig
case 'Time':
stream_hist['Total Time By Artist'] = stream_hist.groupby('Artist')['Track Duration'].transform('sum')
stream_hist['Total Time By Artist'] = [floor(int(t) / 1000 / 60) for t in stream_hist['Total Time By Artist']]
if top_x_number >= stream_hist['Artist'].drop_duplicates().count():
data = stream_hist.drop_duplicates(subset='Artist').sort_values(by='Total Time By Artist', ascending=False)
else:
data = stream_hist.drop_duplicates(subset='Artist').nlargest(top_x_number, columns=['Total Time By Artist'])
fig = px.bar(data_frame=data, x='Total Time By Artist', y='Artist', color_discrete_sequence=["#1ed760"])
fig.update_layout(yaxis=dict(autorange="reversed"))
fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return fig
@callback(Output('top_tracks_graph', 'figure'), [Input('top_x_track_input', 'value'), Input('switch_top_track_method_radioItems', 'value')])
def update_top_tracks(top_x_number, rating_method):
if top_x_number is None:
raise PreventUpdate
match rating_method:
case 'Streams':
data = ''
if top_x_number >= stream_hist['Track'].drop_duplicates().count():
data = stream_hist.drop_duplicates(subset='Track').sort_values(by='Track Streams', ascending=False)
else:
data = stream_hist.drop_duplicates(subset='Track').nlargest(top_x_number, columns=['Track Streams'])
fig = px.bar(data_frame=data, x='Track Streams', y='Track', color_discrete_sequence=["#1ed760"])
fig.update_layout(yaxis=dict(autorange="reversed"))
fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return fig
case 'Time':
stream_hist['Total Time By Track'] = stream_hist.groupby('Track')['Track Duration'].transform('sum')
stream_hist['Total Time By Track'] = [floor(int(t) / 1000 / 60) for t in stream_hist['Total Time By Track']]
if top_x_number >= stream_hist['Track'].drop_duplicates().count():
data = stream_hist.drop_duplicates(subset='Track').sort_values(by='Total Time By Track', ascending=False)
else:
data = stream_hist.drop_duplicates(subset='Track').nlargest(top_x_number, columns=['Total Time By Track'])
fig = px.bar(data_frame=data, x='Total Time By Track', y='Track', color_discrete_sequence=["#1ed760"])
fig.update_layout(yaxis=dict(autorange="reversed"))
fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return fig
@callback(Output('streams_per_x_output', 'figure'), Input('streams_per_x_dropdown', 'value'))
def update_streams_over_time_output(value):
match value:
case 'Daily Streams':
streaming_hist_daily_fig = px.bar(data_frame=streams_on_daytime,
x="Timerange",
y='Streams',
color_discrete_sequence=["#1ed760"])
streaming_hist_daily_fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return streaming_hist_daily_fig
case 'Weekly Streams':
streaming_hist_weekly_fig = px.bar(data_frame=stream_hist.drop_duplicates(subset='Weekday', keep='first'),
x = ["Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun"],
y='Streams on Weekday',
labels= {'Streams on Weekday': 'Streams', 'x': "Weekday"},
color_discrete_sequence=["#1ed760"])
streaming_hist_weekly_fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return streaming_hist_weekly_fig
case 'Streams Overall':
streaming_hist_over_time_fig = px.bar(data_frame=stream_hist.drop_duplicates(subset='Date', keep='first'),
x="Date",
y='Streams on Day',
labels = {'Streams on Day': 'Streams'},
color_discrete_sequence=["#1ed760"])
streaming_hist_over_time_fig.update_layout({'plot_bgcolor': '#d8f0d8'})
return streaming_hist_over_time_fig
if __name__ == '__main__':
app.run(debug=True, host='192.168.0.109', port=8050)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment