import os
import contextlib
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture

warnings.filterwarnings('ignore')
sns.set_theme(style='whitegrid')  

DATA_PATH = '../data'
TARGET_COL = 'out.electricity.total.energy_consumption'
WEEKDAY_NAMES = np.array(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
MONTH_NAMES = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

building_data = [
    pd.read_parquet(f'{DATA_PATH}/data/{building_num}.parquet')
    for building_num in range(1, 1278)
]

fig, axes = plt.subplots(2, 1, figsize=(10, 4), sharex=True)
for i, building_num in enumerate([3, 601]):
    df = building_data[building_num - 1]
    sns.lineplot(x='timestamp', y=TARGET_COL, data=df, ax=axes[i],
                 label=f'Building {building_num}', color=f'C{i}', linewidth=0.5)
    axes[i].set_ylabel('Energy (kWh)')
    axes[i].yaxis.set_major_formatter('{x:.2f}' if i == 0 else '{x:0.0f}')
    axes[i].legend(loc='upper right')
plt.xlabel('')
plt.tight_layout()
plt.show()

def plot_building_energy(building_num, color='k'):
    
    df = building_data[building_num - 1]
    dt = df.timestamp.dt
    df['time_of_week'] = (dt.dayofweek * 96) + (dt.hour * 4) + (dt.minute // 15)
    df['month_name'] = dt.month_name().str.slice(0,3)
    df['month'] = dt.month

    tmp = df.groupby(['month_name', 'month', 'time_of_week'])[TARGET_COL].mean()\
        .reset_index().sort_values('month', ignore_index=True)
    
    g = sns.relplot(
        data=tmp,
        x='time_of_week', y=TARGET_COL, col='month_name', color=color,
        kind='line', linewidth=2, zorder=5, label='Current Month',
        col_wrap=3, height=2, aspect=2.0, legend=False,
    )
    
    for i, (month, ax) in enumerate(g.axes_dict.items()):
        ax.text(0.85, 0.85, month, transform=ax.transAxes, fontweight='bold')
        sns.lineplot(
            data=tmp, x='time_of_week', y=TARGET_COL, units='month_name', 
            label='Other Months' if i == 0 else None,
            estimator=None, alpha=0.3, color='0.5', linewidth=1, ax=ax, legend=False,
        )

        ax.xaxis.set_tick_params(which='both', labelbottom=True)
        ax.set_xticks(96*np.arange(8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])
    
    handles, labels = g.axes_dict[list(g.axes_dict.keys())[0]].get_legend_handles_labels()
    g.fig.legend(handles[:2], labels[:2], loc='upper left', ncol=2, bbox_to_anchor=(0.065, 1.05))

    g.fig.subplots_adjust(hspace=0.1, wspace=0.1)
    g.set_axis_labels('', 'Energy (kWh)')    
    g.set_titles('')
    plt.tight_layout()
    plt.show()

    
for i,building_num in enumerate([3, 601]):
    print((2*'\n' if i == 1 else '') + f'Building {building_num}')
    x = plot_building_energy(building_num, color=f'C{i}')

Building 3


Building 601

total_yearly_energy_consumption = np.array([df[TARGET_COL].sum() for df in building_data])


plt.figure(figsize=(10, 3.5))

plt.subplot(1, 2, 1)
plt.semilogy(range(600), total_yearly_energy_consumption[:600], '.', alpha=0.5, label='bldg_id <= 600')
plt.semilogy(range(600, 1277), total_yearly_energy_consumption[600:], '.', alpha=0.5, label='bldg_id > 600')
plt.axhline(35000, linestyle=':', color='k', alpha=0.5)
plt.xlabel('Building Number')
plt.ylabel('Total Yearly Energy\nConsumption (kWh)')
plt.legend()

plt.subplot(1, 2, 2)
sns.kdeplot(np.log10(total_yearly_energy_consumption[:600]), fill=True, alpha=0.5, label='bldg_id <= 600')
sns.kdeplot(np.log10(total_yearly_energy_consumption[600:]), fill=True, alpha=0.5, label='bldg_id > 600')
plt.axvline(np.log10(35000), color='k', linestyle=':', alpha=0.5)
plt.xticks(range(3, 8), [f'1e{x}' for x in range(3, 8)])
plt.xlabel('Total Yearly Energy Consumption (kWh)')
plt.ylabel('Probability Density')
plt.legend()

plt.tight_layout()
plt.show()

temporal_features = []
for df in building_data:    
    dt = df.timestamp.dt
    df['month'] = dt.month
    df['time_of_week'] = (dt.dayofweek * 96) + (dt.hour * 4) + (dt.minute // 15)
    x = df.groupby(['month', 'time_of_week'])[TARGET_COL].mean().values
    temporal_features.append((x - np.mean(x))/np.std(x))
    
temporal_features = np.array(temporal_features)
weekly_average = temporal_features.reshape((1277, -1, 7*96)).mean(1)

df = pd.DataFrame(dict(
    month_name=np.repeat([x[:3] for x in MONTH_NAMES], 7*96),
    time_of_week=np.tile(range(7*96), 12),
    value_mean=temporal_features.mean(0),
    value_10=np.quantile(temporal_features, 0.1, 0),
    value_20=np.quantile(temporal_features, 0.2, 0),
    value_30=np.quantile(temporal_features, 0.3, 0),
    value_40=np.quantile(temporal_features, 0.4, 0),
    value_50=np.quantile(temporal_features, 0.5, 0),   
    value_60=np.quantile(temporal_features, 0.6, 0),
    value_70=np.quantile(temporal_features, 0.7, 0),   
    value_80=np.quantile(temporal_features, 0.8, 0),
    value_90=np.quantile(temporal_features, 0.9, 0),   
))

g = sns.relplot(
    data=df,
    x='time_of_week', y='value_50', col='month_name', color='k', legend=False, label='Median', 
    estimator=None, kind='line', linewidth=2, zorder=5, col_wrap=3, height=2, aspect=2.0, 
)

for month, ax in g.axes_dict.items():
    tmp = df.loc[df.month_name == month]
    ax.fill_between(tmp.time_of_week, tmp.value_10, tmp.value_90, alpha=1.0, color='#fde724', label='10th-90th Percentile')
    ax.fill_between(tmp.time_of_week, tmp.value_20, tmp.value_80, alpha=1.0, color='#79d151', label='20th-80th Percentile')
    ax.fill_between(tmp.time_of_week, tmp.value_30, tmp.value_70, alpha=1.0, color='#22a784', label='30th-70th Percentile')
    ax.fill_between(tmp.time_of_week, tmp.value_40, tmp.value_60, alpha=1.0, color='#29788e', label='40th-60th Percentile')
    ax.text(.8, .85, month, transform=ax.transAxes, fontweight='bold')
    ax.xaxis.set_tick_params(which='both', labelbottom=True)
    ax.set_xticks(96*np.arange(8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])

g.fig.subplots_adjust(hspace=0.1, wspace=0.1)     
g.set_axis_labels('', 'Energy (kWh)')   
g.set_titles('')

handles, labels = g.axes_dict[list(g.axes_dict.keys())[0]].get_legend_handles_labels()
g.fig.legend(handles, labels, loc='upper center', ncol=5, bbox_to_anchor=(0.5, 1.05))
plt.tight_layout() 
plt.show()

weekday_weekend_consumption_ratio = []
for df in building_data:
    x = df.groupby('time_of_week')[TARGET_COL].mean().values
    weekday_weekend_consumption_ratio.append(x[:96*5].mean() / x[96*5:].mean())
weekday_weekend_consumption_ratio = np.array(weekday_weekend_consumption_ratio)    


plt.figure(figsize=(10, 6))
for i, building_num in enumerate(np.argsort(weekday_weekend_consumption_ratio)[:3][::-1]):
    plt.subplot(3, 2, 2*i+1)
    plt.plot(weekly_average[building_num], label=f'Building {building_num + 1}: {weekday_weekend_consumption_ratio[building_num]:0.5g}')
    plt.ylim(None, 1.5 * weekly_average[building_num].max())
    plt.legend(loc=2)
    plt.ylabel('Standardized\nConsumption')
    if i == 0: 
        plt.title('Lowest Weekday Weekend Consumption Ratio')    
    if i == 2:
        plt.xticks(np.linspace(0, 672, 8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])
    else:
        plt.xticks(np.linspace(0, 672, 8), [])
        
for i, building_num in enumerate(np.argsort(-weekday_weekend_consumption_ratio)[:3]):
    plt.subplot(3, 2, 2*i+2)
    plt.plot(weekly_average[building_num], label=f'Building {building_num + 1}: {weekday_weekend_consumption_ratio[building_num]:0.5g}')
    plt.ylim(None, 1.75 * weekly_average[building_num].max())
    plt.legend(loc=1)
    if i == 0: 
        plt.title('Highest Weekday Weekend Consumption Ratio')    
    if i == 2:
        plt.xticks(np.linspace(0, 672, 8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])
    else:
        plt.xticks(np.linspace(0, 672, 8), [])
                
plt.tight_layout()
plt.show()

def calculate_spectral_flatness(signal):
    signal = signal / np.sqrt(np.mean(signal**2))
    spectrum = np.abs(np.fft.rfft(signal))**2
    geometric_mean = np.exp(np.mean(np.log(spectrum)))
    arithmetic_mean = np.mean(spectrum)
    return geometric_mean / arithmetic_mean    

spectral_flatness = np.array([calculate_spectral_flatness(df[TARGET_COL]) for df in building_data])


plt.figure(figsize=(10, 6))
for i, building_num in enumerate(np.argsort(spectral_flatness)[:3][::-1]):
    plt.subplot(3, 2, 2*i+1)
    plt.plot(weekly_average[building_num], label=f'Building {building_num + 1}: {spectral_flatness[building_num]:0.5g}')
    plt.ylim(None, 1.75 * weekly_average[building_num].max())
    plt.legend(loc=2)
    plt.ylabel('Standardized\nConsumption')
    if i == 0: 
        plt.title('Lowest Spectral Flatness')    
    if i == 2:
        plt.xticks(np.linspace(0, 672, 8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])
    else:
        plt.xticks(np.linspace(0, 672, 8), [])
        
for i, building_num in enumerate(np.argsort(-spectral_flatness)[:3]):
    plt.subplot(3, 2, 2*i+2)
    plt.plot(weekly_average[building_num], label=f'Building {building_num + 1}: {spectral_flatness[building_num]:0.5g}')
    plt.ylim(None, 1.4 * weekly_average[building_num].max())
    plt.legend(loc=1)
    if i == 0: 
        plt.title('Highest Spectral Flatness')
    if i == 2:
        plt.xticks(np.linspace(0, 672, 8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])
    else:
        plt.xticks(np.linspace(0, 672, 8), [])
                
plt.tight_layout()
plt.show()

def smooth(signal, n=15):
    kernel = np.full(2*n+1, 1/(2*n+1))
    signal = np.apply_along_axis(np.pad, 0, signal.reshape(-1, 96), (n, n), mode='edge')
    signal = np.apply_along_axis(np.convolve, 0, signal, kernel, mode='valid').flatten()
    return signal

predictability = []
for i, df in enumerate(building_data):
    mu, sigma = df[TARGET_COL].mean(), df[TARGET_COL].std()
    df['standardized_energy'] = (df[TARGET_COL].values - mu) / sigma
    df['prediction'] = smooth(df.standardized_energy.values)
    predictability.append(np.sqrt(np.mean((df.prediction - df.standardized_energy)**2)))
predictability =  1 - np.array(predictability)


month = 3

plt.figure(figsize=(10, 6))

for i, building_num in enumerate(np.argsort(predictability)[:3]):

    df = building_data[building_num]
    df = df.loc[df.timestamp.dt.month == month]

    plt.subplot(3, 2, 2 * i + 1)
    plt.plot(df.timestamp, df[TARGET_COL],
             label=f'Building {building_num + 1}: {predictability[building_num]:0.5f}')
    plt.gca().yaxis.set_major_formatter('{x:.1f}')
    plt.xticks(pd.date_range('2018-03-01', '2018-04-01', periods=4), None if i == 2 else [])
    plt.ylim(None, 1.4 * df[TARGET_COL].max())
    plt.ylabel('Standardized\nConsumption')
    if i == 0: 
        plt.title('Lowest Predictability')
    plt.legend(loc=2)

    
for i, building_num in enumerate(np.argsort(predictability)[-3:][::-1]):

    df = building_data[building_num]
    df = df.loc[df.timestamp.dt.month == month]

    plt.subplot(3, 2, 2 * i + 2)
    plt.plot(df.timestamp, df[TARGET_COL],
             label=f'Building {building_num + 1}: {predictability[building_num]:0.5f}')
    plt.xticks(pd.date_range('2018-03-01', '2018-04-01', periods=4), None if i == 2 else [])
    plt.ylim(None, 1.4 * df[TARGET_COL].max())
    if i == 0: plt.title('Highest Predictability')
    plt.legend(loc=2)

plt.tight_layout()
plt.show()

df = building_data[15]
df = df.loc[df.timestamp.dt.month == month]
anomalies = df.loc[np.abs(df.prediction - df.standardized_energy) > 2.0]

plt.figure(figsize=(10, 3))
plt.plot(df.timestamp, df[TARGET_COL])
plt.plot(anomalies.timestamp, anomalies[TARGET_COL], 'kx')
plt.xticks(pd.date_range('2018-03-01', '2018-04-01', periods=7))
plt.ylabel('Standardized Consumption')
plt.tight_layout()
plt.show()

def summarize_cluster(cluster_metadata, cluster_name):
    
    df = cluster_metadata.groupby(cluster_name).agg(
        building_count=(cluster_name, 'size'),
        residential_count = ('size_cluster', lambda x : sum(x == 'Residential')),
        commercial_count = ('size_cluster', lambda x : sum(x == 'Commercial')),
        sum_total_yearly_energy_consumption=('total_yearly_energy_consumption', 'sum'),
        mean_total_yearly_energy_consumption=('total_yearly_energy_consumption', 'mean'),
        mean_weekday_weekend_consumption_ratio=('weekday_weekend_consumption_ratio', 'mean'),
        mean_spectral_flatness=('spectral_flatness', 'mean'),
        mean_predictability=('predictability', 'mean'),
    )
    
    df['fraction_total_yearly_energy_consumption'] = df['sum_total_yearly_energy_consumption'] / df['sum_total_yearly_energy_consumption'].sum()

    # format
    df['building_count'] = [f'{x} ({100*x/1277:0.2f}%)' for x in df.building_count]
    df['residential_count'] = [f'{x} ({100*(x/1277):0.2f}%)' for x in df.residential_count]
    df['commercial_count'] = [f'{x} ({100*x/1277:0.2f}%)' for x in df.commercial_count]
    df['sum_total_yearly_energy_consumption'] = [f'{x/1e6:0.2f} GWh ({100*f:0.2f}%)' for x,f in zip(df.sum_total_yearly_energy_consumption, df.fraction_total_yearly_energy_consumption)]
    df['mean_total_yearly_energy_consumption'] = [f'{x/1e3:0.2f} MWh' for x in df.mean_total_yearly_energy_consumption]

    # reorder
    df = df[[
        'building_count',
        'residential_count',
        'commercial_count',
        'sum_total_yearly_energy_consumption',
        'mean_total_yearly_energy_consumption', 
        'mean_weekday_weekend_consumption_ratio', 
        'mean_spectral_flatness', 
        'mean_predictability', 
    ]]

    # rename
    df = df.rename(columns={
        'building_count': 'Building Count',
        'residential_count': 'Residential Building Count',
        'commercial_count': 'Commercial Building Count',
        'sum_total_yearly_energy_consumption': 'Group Consumption (yearly)',
        'mean_total_yearly_energy_consumption': 'Average Building Consumption (yearly)',
        'mean_weekday_weekend_consumption_ratio': 'Weekday/Weekend Consumption Ratio',
        'mean_spectral_flatness': 'Log Spectral Flatness',
        'mean_predictability': 'Predictability',       
    }).T

    cluster_display_name = cluster_name.replace('_', ' ').title()
    df.columns = pd.MultiIndex.from_tuples([(cluster_display_name, x) for x in df.columns])
            
    return df


cluster_metadata = pd.DataFrame(dict(
    bldg_id = range(1,1278), 
    total_yearly_energy_consumption=total_yearly_energy_consumption,
    weekday_weekend_consumption_ratio=weekday_weekend_consumption_ratio,
    spectral_flatness=np.log10(spectral_flatness),
    predictability=predictability,
))

cluster_metadata['size_cluster'] = np.where(total_yearly_energy_consumption > 35000, 'Commercial', 'Residential')
large_indices = np.where(total_yearly_energy_consumption > 35000)[0]
small_indices = np.where(total_yearly_energy_consumption < 35000)[0]

plt.figure(figsize=(10, 4))

bins = np.linspace(-2, 3, 51) + np.log10(35000)
plt.hist(np.log10(total_yearly_energy_consumption[large_indices]), bins=bins, label='Large Buildings', color='C0')
plt.hist(np.log10(total_yearly_energy_consumption[small_indices]), bins=bins, label='Small Buildings', color='C1')
plt.axvline(np.log10(35000), linestyle='--', color='k', label='Cutoff Value (35,000 kWh)')
plt.xticks(range(3, 8), [f'1e{x}' for x in range(3, 8)])
plt.xlabel('Total Yearly Energy Consumption (kWh)')
plt.ylabel('Count')
plt.xlim(2.7, 7.6)
plt.legend()

plt.tight_layout()
plt.show()

cluster_metadata['size_cluster'] = np.where(total_yearly_energy_consumption > 35000, 'Commercial', 'Residential')
summarize_cluster(cluster_metadata, 'size_cluster')

n_clusters = 3

with contextlib.redirect_stderr(open(os.devnull, 'w')):    
    kmeans = KMeans(n_clusters=n_clusters, n_init='auto', random_state=1)
    seasonality_cluster_labels = kmeans.fit_predict(temporal_features)
cluster_metadata['seasonality_cluster'] = seasonality_cluster_labels

df = pd.concat([
    pd.DataFrame(dict(
        cluster_num=cluster_num,
        month_name=np.repeat([x[:3] for x in MONTH_NAMES], 7*96),
        time_of_week=np.tile(range(7*96), 12),
        value=temporal_features[seasonality_cluster_labels == cluster_num].mean(0)
    ))
    for cluster_num in range(n_clusters)    
])

color_palette = sns.color_palette('husl', n_colors=n_clusters)

g = sns.relplot(
    data=df,
    x='time_of_week', y='value', col='month_name', hue='cluster_num', units='cluster_num',
    estimator=None, kind='line', linewidth=2, zorder=5, col_wrap=3, height=2, aspect=2.0, 
    palette=color_palette
    
)

for month, ax in g.axes_dict.items():
    ax.text(.8, .85, month, transform=ax.transAxes, fontweight='bold')
    ax.xaxis.set_tick_params(which='both', labelbottom=True)
    ax.set_xticks(96*np.arange(8), [WEEKDAY_NAMES[i % 7][:3] for i in np.arange(8)])
    
g.fig.subplots_adjust(hspace=0.1, wspace=0.1)        
g._legend.set_visible(False)
g.fig.legend(handles=g._legend.legend_handles, ncol=n_clusters, loc='upper center',
             labels=[f'Cluster {i}' for i in range(n_clusters)], bbox_to_anchor=(0.5, 1.05))
g.set_axis_labels('', 'Energy (kWh)')    
g.set_titles('')
plt.tight_layout()
plt.show()

cluster_metadata['seasonality_cluster'] = 'Winter and Summer Noon'
cluster_metadata.loc[seasonality_cluster_labels == 1, 'seasonality_cluster'] = 'Winter Morning'
cluster_metadata.loc[seasonality_cluster_labels == 2, 'seasonality_cluster'] = 'Summer Evening'
summarize_cluster(cluster_metadata, 'seasonality_cluster')

cluster_metadata['weekday_weekend_cluster'] = np.where(weekday_weekend_consumption_ratio > 1, 'Higher Weekday Consumption', 'Higher Weekend Consumption')
weekday_indices = np.where(weekday_weekend_consumption_ratio > 1)[0]
weekend_indices = np.where(weekday_weekend_consumption_ratio <= 1)[0]

plt.figure(figsize=(10, 4))

bins = np.linspace(0.5, 3.0, 51)
plt.hist(weekday_weekend_consumption_ratio[weekday_indices], bins=bins, label='Higher Weekday Consumption', color="C0")
plt.hist(weekday_weekend_consumption_ratio[weekend_indices], bins=bins, label='Higher Weekend Consumption', color="C1")
plt.axvline(1.0, linestyle='--', color='k', label='Cutoff Value (1.0)')
plt.xlabel('Weekday Weekend Consumption Ratio')
plt.ylabel('Density')
plt.legend()

plt.tight_layout()
plt.show()

summarize_cluster(cluster_metadata, 'weekday_weekend_cluster')

n_clusters = 2
uncertainty_features = np.vstack([predictability, np.log10(spectral_flatness)]).T
uncertainty_features -= uncertainty_features.mean(0, keepdims=True)
uncertainty_features /= uncertainty_features.std(0, keepdims=True)

with contextlib.redirect_stderr(open(os.devnull, 'w')):    
    gmm = GaussianMixture(n_components=n_clusters, random_state=1)
    uncertainty_cluster_labels = gmm.fit_predict(uncertainty_features)

data = pd.DataFrame(uncertainty_features, columns=['Predictability', 'Spectral Flatness'])
data['Cluster'] = uncertainty_cluster_labels

sns.pairplot(data, hue='Cluster', palette='tab10', diag_kind='kde', markers='.',
             plot_kws={'alpha': 0.5}, height=2, aspect=2.5) 
plt.suptitle('Pairplot of Uncertainty Features', y=1.02)
plt.show()

cluster_metadata['uncertainty_cluster'] = np.where(uncertainty_cluster_labels == 1, 'Low Uncertainty', 'High Uncertainty')
summarize_cluster(cluster_metadata, 'uncertainty_cluster')

Feature Name	Type	Description
Total Yearly Energy Consumption	Size	Total energy consumption for the year.
Seasonality	Temporal	Average energy consumption for each month, day of week, and time of day (vector of length 8064).
Weekday Weekend Consumption Ratio	Temporal	Average weekday consumption divided by the average weekend consumption.
Spectral Flatness	Uncertainty	Logarithm of the ratio of the geometric mean to the arithmetic mean of the power spectrum of a signal.
Predictability	Uncertainty	One minus RMSE between a baseline model's predictions (seasonal moving average).
Anomaly Detection	Uncertainty	Average consumption of observations that differ significantly from a baseline model's predictions.

Cluster Name	Type	Description
Size-Based Clusters	Size	Groups buildings into small (residential) and large (commercial).
Seasonality Clusters	Temporal	Groups data based on thier seasonal profile.
Weekday vs Weekend Clusters	Temporal	Groups buildings by whether they have higher or lower average weekday consumption.
Uncertainty-Based Clusters	Uncertainty	Groups buildings into low and high uncertainty.

	Size Cluster
	Commercial	Residential
Building Count	661 (51.76%)	616 (48.24%)
Residential Building Count	0 (0.00%)	616 (48.24%)
Commercial Building Count	661 (51.76%)	0 (0.00%)
Group Consumption (yearly)	968.97 GWh (99.41%)	5.72 GWh (0.59%)
Average Building Consumption (yearly)	1465.92 MWh	9.28 MWh
Weekday/Weekend Consumption Ratio	1.357101	1.021364
Log Spectral Flatness	-3.114477	-1.805511
Predictability	0.432953	0.253383

Cluster Name: number (color)	Month-to-Month Peaks	Day-to-Day Pattern	Weekday/Weekend
Winter and Summer Noon: 0 (red)	Winter (Jan); Summer (June, July, Aug, Sep)	Large noon peaks throughout the year.	Different
Winter Morning: 1 (green)	Winter (Jan, Dec)	Large winter morning peaks.	Similar
Summer Evening: 2 (blue)	Summer (Jul, Aug)	Large evening peaks throughout the year.	Similar

	Seasonality Cluster
	Summer Evening	Winter Morning	Winter and Summer Noon
Building Count	339 (26.55%)	430 (33.67%)	508 (39.78%)
Residential Building Count	284 (22.24%)	308 (24.12%)	24 (1.88%)
Commercial Building Count	55 (4.31%)	122 (9.55%)	484 (37.90%)
Group Consumption (yearly)	49.60 GWh (5.09%)	96.41 GWh (9.89%)	828.68 GWh (85.02%)
Average Building Consumption (yearly)	146.33 MWh	224.21 MWh	1631.25 MWh
Weekday/Weekend Consumption Ratio	1.014854	1.08817	1.406015
Log Spectral Flatness	-1.998687	-2.135374	-3.100586
Predictability	0.268443	0.309385	0.429582

Power Patterns: Harnessing Electricity for Innovation¶

Exploratory Data Analysis (EDA)¶

Feature Engineering ¶

Total Yearly Energy Consumption ¶

Temporal Features ¶

Seasonality ¶

Weekday Weekend Consumption Ratio ¶

Uncertainty Features ¶

Spectral Flatness ¶

Predictability ¶

Anomaly Detection ¶

Unsupervised Learning ¶

Size-Based Clusters ¶

Temporal-Based Clusters ¶

Seasonality ¶

Weekday vs Weekend ¶

Uncertainty-Based Clusters ¶

Interpretation ¶

Small Building Aggregation ¶

Seasonal Demand Response ¶

Weekday Peak Reduction ¶

Reducing Grid Uncertainty with Battery Storage ¶

Conclusion ¶

References ¶

	Weekday Weekend Cluster
	Higher Weekday Consumption	Higher Weekend Consumption
Building Count	936 (73.30%)	341 (26.70%)
Residential Building Count	363 (28.43%)	253 (19.81%)
Commercial Building Count	573 (44.87%)	88 (6.89%)
Group Consumption (yearly)	889.18 GWh (91.23%)	85.51 GWh (8.77%)
Average Building Consumption (yearly)	949.98 MWh	250.76 MWh
Weekday/Weekend Consumption Ratio	1.291106	0.931756
Log Spectral Flatness	-2.642379	-2.045738
Predictability	0.362198	0.302781

	Uncertainty Cluster
	High Uncertainty	Low Uncertainty
Building Count	743 (58.18%)	534 (41.82%)
Residential Building Count	565 (44.24%)	51 (3.99%)
Commercial Building Count	178 (13.94%)	483 (37.82%)
Group Consumption (yearly)	162.01 GWh (16.62%)	812.68 GWh (83.38%)
Average Building Consumption (yearly)	218.05 MWh	1521.87 MWh
Weekday/Weekend Consumption Ratio	1.118174	1.302248
Log Spectral Flatness	-1.840491	-3.377114
Predictability	0.267943	0.4554

Program Name	Description
Small Building Aggregation	Aggregates small buildings' energy flexibility through partnerships with VPPs or aggregators.
Seasonal Demand Response	Optimizes energy consumption based on seasonal patterns using targeted pricing and efficiency measures.
Weekday Peak Reduction	Reduces peak weekday demand using time-of-use pricing to incentivize load shifting.
Reducing Grid Uncertainty with Battery Storage	Deploys battery storage in high uncertainty buildings to smooth out demand fluctuations.