import numpy as np
import matplotlib.pyplot as plt

# Close all, clear all equivalent (fresh start)
# Data: [square footage, price]
X = np.array([
    [1100, 199000],
    [1400, 245000],
    [1425, 319000],
    [1550, 240000],
    [1600, 312000],
    [1700, 279000],
    [1700, 310000],
    [1875, 308000],
    [2350, 405000],
    [2450, 324000]
])

# Step 0: Preprocess the data
plt.figure(1)
plt.plot(X[:, 0], X[:, 1], 'k^-')
plt.title("Original Data")

# Normalize
mx = np.mean(X, axis=0)
sx = np.std(X, axis=0)
Xs = (X - mx) / sx

# Plot normalized data
plt.figure(1)
plt.plot(Xs[:, 0], Xs[:, 1], 'k^-')
plt.title("Normalized Data")

# Split input and target
x = Xs[:, 0]
t = Xs[:, 1]

# Step 1: Initialize parameters
m = np.random.randn()
b = np.random.randn()
MaxIters = 150
alpha = 0.01
tol = 1e-6
Error = []

# Main Loop: Stochastic Gradient Descent
for i in range(MaxIters):
    y = m * x + b
    ErrVec = t - y
    Error.append(np.sum(ErrVec ** 2))

    # Step 3: Stochastic update using 1 random data point
    r = np.random.randint(len(x))
    Em = 2 * (-x[r]) * ErrVec[r]
    Eb = 2 * (-1) * ErrVec[r]

    # Step 4: Gradient descent update
    m -= alpha * Em
    b -= alpha * Eb

    grad_norm_sq = Em**2 + Eb**2
    print(f'Gradient is {grad_norm_sq:.6f}')
    if grad_norm_sq <= tol:
        print(f'Solution found in {i + 1} steps')
        break

# Closing: Visualization
plt.figure(2)
plt.plot(Error)
plt.title("Error vs Iteration")
plt.xlabel("Iteration")
plt.ylabel("Error")

plt.figure(1)
t_vals = np.linspace(np.min(x), np.max(x), 100)
z = m * t_vals + b
plt.plot(t_vals, z, 'r-')
plt.legend(['Data', 'Fitted Line'])
plt.show()