import numpy as np
import matplotlib.pyplot as plt

from scipy import stats 
from math import ceil, trunc

plt.rcParams.update({'font.size': 14})

# Import
C, H = np.genfromtxt('dataset_traffic.csv', delimiter=",", unpack=True, skip_header=True)

# plot time series
fig, ax = plt.subplots(2, 1, figsize=(10, 7), layout = 'constrained')
ax[0].plot(H,'k')
ax[0].set_xlabel('Time')
ax[0].set_ylabel('Number of heavy vehicles, H')
ax[0].grid()

ax[1].plot(C,'k')
ax[1].set_xlabel('Time')
ax[1].set_ylabel('Number of cars, C')
ax[1].grid()

# Statistics for H

print(stats.describe(H))
stats.describe(H).mean

DescribeResult(nobs=2976, minmax=(np.float64(0.0), np.float64(68.0)), mean=np.float64(30.604502688172044), variance=np.float64(151.77932761814404), skewness=np.float64(0.12898824408904086), kurtosis=np.float64(-0.4573152711666002))

# Statistics for d

print(stats.describe(C))

DescribeResult(nobs=2976, minmax=(np.float64(6.0), np.float64(180.0)), mean=np.float64(68.69657258064517), variance=np.float64(2102.2860521143944), skewness=np.float64(0.29528679621238874), kurtosis=np.float64(-1.1608245630406528))

def ecdf(var):
    x = np.sort(var) # sort the values from small to large
    n = x.size # determine the number of datapoints
    y = np.arange(1, n+1) / (n+1)
    return [y, x]

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
axes[0].hist(H, edgecolor='k', linewidth=0.2, 
             color='cornflowerblue', label='Number of heavy vehicles, H', density = True)
axes[0].set_xlabel('Random variable (X)')
axes[0].set_ylabel('density')
axes[0].hist(C, edgecolor='k', linewidth=0.2, alpha = 0.5, 
             color='grey', label='Number of cars, C', density = True)
axes[0].set_title('PDF', fontsize=18)
axes[0].grid()
axes[0].legend()

axes[1].step(ecdf(H)[1], ecdf(H)[0], 
             color='cornflowerblue', label='Number of heavy vehicles, H')
axes[1].set_xlabel('Random variable (X)')
axes[1].set_ylabel('${P[X \\leq x]}$')
axes[1].step(ecdf(C)[1], ecdf(C)[0], 
             color='grey', label='Number of cars, C')
axes[1].set_title('CDF', fontsize=18)
axes[1].legend()
axes[1].grid()

params_H = stats.norm.fit(H)
params_C = stats.uniform.fit(C)

#Graphical method

#Logscale

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].step(ecdf(H)[1], 1-ecdf(H)[0], 
             color='k', label='Number of heavy vehicles, H')
axes[0].plot(ecdf(H)[1], 1-stats.norm.cdf(ecdf(H)[1], *params_H),
             '--', color = 'grey', label='Gaussian')
axes[0].set_xlabel('Number of heavy vehicles, H')
axes[0].set_ylabel('${P[X > x]}$')
axes[0].set_title('H', fontsize=18)
axes[0].set_yscale('log')
axes[0].legend()
axes[0].grid()

axes[1].step(ecdf(C)[1], 1-ecdf(C)[0], 
             color='k', label='Number of cars, C')
axes[1].plot(ecdf(C)[1], 1-stats.uniform.cdf(ecdf(C)[1], *params_C),
             '--', color = 'grey', label='Uniform')
axes[1].set_xlabel('Number of cars, C')
axes[1].set_ylabel('${P[X > x]}$')
axes[1].set_title('C', fontsize=18)
axes[1].set_yscale('log')
axes[1].legend()
axes[1].grid()

# QQplot

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].plot([trunc(min(H)), ceil(max(H))], [trunc(min(H)), ceil(max(H))], 'k')
axes[0].scatter(ecdf(H)[1], stats.norm.ppf(ecdf(H)[0], *params_H), 
             color='grey', label='Gaussian')
axes[0].set_xlabel('Observed H (m)')
axes[0].set_ylabel('Estimated H (m)')
axes[0].set_title('H', fontsize=18)
axes[0].set_xlim([trunc(min(H)), ceil(max(H))])
axes[0].set_ylim([trunc(min(H)), ceil(max(H))])
axes[0].legend()
axes[0].grid()

axes[1].plot([trunc(min(C)), ceil(max(C))], [trunc(min(C)), ceil(max(C))], 'k')
axes[1].scatter(ecdf(C)[1], stats.uniform.ppf(ecdf(C)[0], *params_C), 
             color='grey', label='Uniform')
axes[1].set_xlabel('Observed C (s)')
axes[1].set_ylabel('Estimated C (s)')
axes[1].set_title('C', fontsize=18)
axes[1].set_xlim([trunc(min(C)), ceil(max(C))])
axes[1].set_ylim([trunc(min(C)), ceil(max(C))])
axes[1].legend()
axes[1].grid()

#KStest

_, p_H = stats.kstest(H,stats.norm.cdf, args=params_H)
_, p_C = stats.kstest(C,stats.uniform.cdf, args=params_C)

print('The p-value for the fitted Gaussian distribution to H is:', round(p_H, 3))
print('The p-value for the fitted Uniform distribution to C is:', round(p_C, 3))

The p-value for the fitted Gaussian distribution to H is: 0.001
The p-value for the fitted Uniform distribution to C is: 0.0

# Here, the solution is shown for the Lognormal distribution

# Draw random samples
rs_H = stats.norm.rvs(*params_H, size = 10000)
rs_C = stats.uniform.rvs(*params_C, size = 10000)

#Compute Fh
rs_CO2 = 469 * rs_H + 143 * rs_C

#repeat for observations
CO2 = 469 * H + 143 * C

#plot the PDF and the CDF
fig, axes = plt.subplots(1, 2, figsize=(12, 7))
axes[0].hist(rs_CO2, edgecolor='k', linewidth=0.2, density = True, label = 'From simulations')
axes[0].hist(CO2, edgecolor='k', facecolor = 'orange', alpha = 0.5, linewidth=0.2, 
             density = True, label = 'From observations')
axes[0].set_xlabel('Emissions of CO2 (g)')
axes[0].set_ylabel('density')
axes[0].set_title('PDF', fontsize=18)
axes[0].legend()
axes[0].grid()

axes[1].step(ecdf(rs_CO2)[1], 1-ecdf(rs_CO2)[0], label = 'From simulations')
axes[1].step(ecdf(CO2)[1], 1-ecdf(CO2)[0], color = 'orange', label = 'From observations')
axes[1].set_xlabel('Emissions of CO2 (g)')
axes[1].set_ylabel('${P[X > x]}$')
axes[1].set_title('Exceedance plot', fontsize=18)
axes[1].set_yscale('log')
axes[1].legend()
axes[1].grid()

fig, axes = plt.subplots(1, 1, figsize=(7, 7))
axes.scatter(rs_H, rs_C, 40, 'k', label = 'Simulations')
axes.scatter(H, C, 40, 'r','x', label = 'Observations')
axes.set_xlabel('Number of heavy vehicles, H ')
axes.set_ylabel('Number of cars, C')
axes.legend()
axes.grid()

#Correlation
correl = stats.pearsonr(H, C)
correl_rs = stats.pearsonr(rs_H, rs_C)
print('The correlation between the observations is:', correl[0])
print('The correlation between the simulations is:', correl_rs[0])

The correlation between the observations is: 0.2660768676548393
The correlation between the simulations is: -0.013637688852272484

Group Assignment 1.7: Distribution Fitting¶

Case 2: $CO_2$ emissions from traffic¶

Importing packages¶

1. Explore the data¶

2. Empirical distribution functions¶

3. Fitting a distribution¶

4. Assessing goodness of fit¶

5. Propagating the uncertainty¶

Group Assignment 1.7: Distribution Fitting¶

.markdown {width:100%; position: relative} article { position: relative }

Case 2: $CO_2$ emissions from traffic¶

Importing packages¶

1. Explore the data¶

2. Empirical distribution functions¶

3. Fitting a distribution¶

4. Assessing goodness of fit¶

5. Propagating the uncertainty¶

.markdown {width:100%; position: relative} article { position: relative }