import numpy as np import matplotlib.pyplot as plt ############################### #Datos originales ############################### X = 2 * np.random.rand(100, 1) y = 4 + 3 * X + np.random.randn(100,1) ############################### eta = 0.1 n_iterations = 1000 m = 100 X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance theta = np.random.randn(2,1) X_new = np.array([[0], [2]]) X_new_b = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance for iteration in range(n_iterations): gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y) theta = theta - eta * gradients theta_path_bgd = [] def plot_gradient_descent(theta, eta, theta_path=None): m = len(X_b) plt.plot(X, y, "b.") n_iterations = 1000 for iteration in range(n_iterations): if iteration < 10: y_predict = X_new_b.dot(theta) style = "b-" if iteration > 0 else "r--" plt.plot(X_new, y_predict, style) gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y) theta = theta - eta * gradients if theta_path is not None: theta_path.append(theta) plt.xlabel("$x_1$", fontsize=18) plt.axis([0, 2, 0, 15]) plt.title(r"$\eta = {}$".format(eta), fontsize=16) np.random.seed(42) theta = np.random.randn(2,1) # random initialization plt.figure(figsize=(10,4)) plt.subplot(131); plot_gradient_descent(theta, eta=0.02) plt.ylabel("$y$", rotation=0, fontsize=18) plt.subplot(132); plot_gradient_descent(theta, eta=0.1, theta_path=theta_path_bgd) plt.subplot(133); plot_gradient_descent(theta, eta=0.5) plt.show() theta_path_sgd = [] m = len(X_b) np.random.seed(42) n_epochs = 50 t0, t1 = 5, 50 # learning schedule hyperparameters def learning_schedule(t): return t0 / (t + t1) theta = np.random.randn(2,1) # random initialization for epoch in range(n_epochs): for i in range(m): if epoch == 0 and i < 20: # not shown in the book y_predict = X_new_b.dot(theta) # not shown style = "b-" if i > 0 else "r--" # not shown plt.plot(X_new, y_predict, style) # not shown random_index = np.random.randint(m) xi = X_b[random_index:random_index+1] yi = y[random_index:random_index+1] gradients = 2 * xi.T.dot(xi.dot(theta) - yi) eta = learning_schedule(epoch * m + i) theta = theta - eta * gradients theta_path_sgd.append(theta) # not shown plt.plot(X, y, "b.") # not shown plt.xlabel("$x_1$", fontsize=18) # not shown plt.ylabel("$y$", rotation=0, fontsize=18) # not shown plt.axis([0, 2, 0, 15]) # not shown plt.show() # not shown theta_path_mgd = [] n_iterations = 50 minibatch_size = 20 np.random.seed(42) theta = np.random.randn(2,1) # random initialization t0, t1 = 200, 1000 def learning_schedule(t): return t0 / (t + t1) t = 0 for epoch in range(n_iterations): shuffled_indices = np.random.permutation(m) X_b_shuffled = X_b[shuffled_indices] y_shuffled = y[shuffled_indices] for i in range(0, m, minibatch_size): t += 1 xi = X_b_shuffled[i:i+minibatch_size] yi = y_shuffled[i:i+minibatch_size] gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi) eta = learning_schedule(t) theta = theta - eta * gradients theta_path_mgd.append(theta) theta_path_bgd = np.array(theta_path_bgd) theta_path_sgd = np.array(theta_path_sgd) theta_path_mgd = np.array(theta_path_mgd) plt.figure(figsize=(7,4)) plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], "r-s", linewidth=1, label="Stochastic") plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], "g-+", linewidth=2, label="Mini-batch") plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], "b-o", linewidth=3, label="Batch") plt.legend(loc="upper left", fontsize=16) plt.xlabel(r"$\theta_0$", fontsize=20) plt.ylabel(r"$\theta_1$ ", fontsize=20, rotation=0) plt.axis([2.5, 4.5, 2.3, 3.9]) plt.show()