| @ -0,0 +1,131 @@ | |||
| import numpy as np | |||
| import matplotlib.pyplot as plt | |||
| ############################### | |||
| #Datos originales | |||
| ############################### | |||
| X = 2 * np.random.rand(100, 1) | |||
| y = 4 + 3 * X + np.random.randn(100,1) | |||
| ############################### | |||
| eta = 0.1 | |||
| n_iterations = 1000 | |||
| m = 100 | |||
| X_b = np.c_[np.ones((100, 1)), X] # add x0 = 1 to each instance | |||
| theta = np.random.randn(2,1) | |||
| X_new = np.array([[0], [2]]) | |||
| X_new_b = np.c_[np.ones((2, 1)), X_new] # add x0 = 1 to each instance | |||
| for iteration in range(n_iterations): | |||
| gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y) | |||
| theta = theta - eta * gradients | |||
| theta_path_bgd = [] | |||
| def plot_gradient_descent(theta, eta, theta_path=None): | |||
| m = len(X_b) | |||
| plt.plot(X, y, "b.") | |||
| n_iterations = 1000 | |||
| for iteration in range(n_iterations): | |||
| if iteration < 10: | |||
| y_predict = X_new_b.dot(theta) | |||
| style = "b-" if iteration > 0 else "r--" | |||
| plt.plot(X_new, y_predict, style) | |||
| gradients = 2/m * X_b.T.dot(X_b.dot(theta) - y) | |||
| theta = theta - eta * gradients | |||
| if theta_path is not None: | |||
| theta_path.append(theta) | |||
| plt.xlabel("$x_1$", fontsize=18) | |||
| plt.axis([0, 2, 0, 15]) | |||
| plt.title(r"$\eta = {}$".format(eta), fontsize=16) | |||
| np.random.seed(42) | |||
| theta = np.random.randn(2,1) # random initialization | |||
| plt.figure(figsize=(10,4)) | |||
| plt.subplot(131); plot_gradient_descent(theta, eta=0.02) | |||
| plt.ylabel("$y$", rotation=0, fontsize=18) | |||
| plt.subplot(132); plot_gradient_descent(theta, eta=0.1, theta_path=theta_path_bgd) | |||
| plt.subplot(133); plot_gradient_descent(theta, eta=0.5) | |||
| plt.show() | |||
| theta_path_sgd = [] | |||
| m = len(X_b) | |||
| np.random.seed(42) | |||
| n_epochs = 50 | |||
| t0, t1 = 5, 50 # learning schedule hyperparameters | |||
| def learning_schedule(t): | |||
| return t0 / (t + t1) | |||
| theta = np.random.randn(2,1) # random initialization | |||
| for epoch in range(n_epochs): | |||
| for i in range(m): | |||
| if epoch == 0 and i < 20: # not shown in the book | |||
| y_predict = X_new_b.dot(theta) # not shown | |||
| style = "b-" if i > 0 else "r--" # not shown | |||
| plt.plot(X_new, y_predict, style) # not shown | |||
| random_index = np.random.randint(m) | |||
| xi = X_b[random_index:random_index+1] | |||
| yi = y[random_index:random_index+1] | |||
| gradients = 2 * xi.T.dot(xi.dot(theta) - yi) | |||
| eta = learning_schedule(epoch * m + i) | |||
| theta = theta - eta * gradients | |||
| theta_path_sgd.append(theta) # not shown | |||
| plt.plot(X, y, "b.") # not shown | |||
| plt.xlabel("$x_1$", fontsize=18) # not shown | |||
| plt.ylabel("$y$", rotation=0, fontsize=18) # not shown | |||
| plt.axis([0, 2, 0, 15]) # not shown | |||
| plt.show() # not shown | |||
| theta_path_mgd = [] | |||
| n_iterations = 50 | |||
| minibatch_size = 20 | |||
| np.random.seed(42) | |||
| theta = np.random.randn(2,1) # random initialization | |||
| t0, t1 = 200, 1000 | |||
| def learning_schedule(t): | |||
| return t0 / (t + t1) | |||
| t = 0 | |||
| for epoch in range(n_iterations): | |||
| shuffled_indices = np.random.permutation(m) | |||
| X_b_shuffled = X_b[shuffled_indices] | |||
| y_shuffled = y[shuffled_indices] | |||
| for i in range(0, m, minibatch_size): | |||
| t += 1 | |||
| xi = X_b_shuffled[i:i+minibatch_size] | |||
| yi = y_shuffled[i:i+minibatch_size] | |||
| gradients = 2/minibatch_size * xi.T.dot(xi.dot(theta) - yi) | |||
| eta = learning_schedule(t) | |||
| theta = theta - eta * gradients | |||
| theta_path_mgd.append(theta) | |||
| theta_path_bgd = np.array(theta_path_bgd) | |||
| theta_path_sgd = np.array(theta_path_sgd) | |||
| theta_path_mgd = np.array(theta_path_mgd) | |||
| plt.figure(figsize=(7,4)) | |||
| plt.plot(theta_path_sgd[:, 0], theta_path_sgd[:, 1], "r-s", linewidth=1, label="Stochastic") | |||
| plt.plot(theta_path_mgd[:, 0], theta_path_mgd[:, 1], "g-+", linewidth=2, label="Mini-batch") | |||
| plt.plot(theta_path_bgd[:, 0], theta_path_bgd[:, 1], "b-o", linewidth=3, label="Batch") | |||
| plt.legend(loc="upper left", fontsize=16) | |||
| plt.xlabel(r"$\theta_0$", fontsize=20) | |||
| plt.ylabel(r"$\theta_1$ ", fontsize=20, rotation=0) | |||
| plt.axis([2.5, 4.5, 2.3, 3.9]) | |||
| plt.show() | |||