Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 23160

batch gradient descent using softmax regression - stuck

$
0
0

I've wrote batch gradient descent with softmax regression, however, the results are either 0 or 6 always.

I think I'm missing something but can't figure out where!

import numpy as npdef softmax(z):    total = sum(np.exp(x) for x in z)    return np.array([(np.exp(k)/total) for k in z])def one_hot_encoding(ys):    if len(ys) == 0:        return np.array([])    k = np.max(ys)    array = []    for y in ys:        row = np.zeros(k + 1, dtype=int)        row[y] = 1        array.append(row)    return np.array(array).reshape(len(ys),k+1)def softmax_regression(xs, ys, learning_rate, num_iterations):    tau = one_hot_encoding(ys)    # number of feature = xs.shape[1], number of classes = tau.shape[1]    theta = np.zeros((xs.shape[1], tau.shape[1]))    bias = np.zeros(tau.shape[1])    for _ in range(num_iterations):        total_mean_error = 0        total_gradient = 0        for i in range(xs.shape[0]):            z = np.dot(xs[i], theta) + bias            o = softmax(z)            error = o - tau[i]            total_mean_error += np.mean(error)            total_gradient += (error * xs[i])        theta += (learning_rate * total_gradient) + (theta * learning_rate)        bias += learning_rate * total_mean_error     print(theta, bias)    def model(xs, theta=theta, bias=bias):        z = (theta.T * xs) + bias        soft = softmax(z)        return soft.argmax()    return model#Testtraining_data = np.array([    (0.17, 0),    (0.79, 0),    (2.66, 2),    (2.81, 2),    (1.58, 1),    (1.86, 1),    (2.97, 2),    (2.70, 2),    (1.64, 1),    (1.68, 1)])xs = training_data[:,0].reshape((-1, 1)) # a 2D n-by-1 arrayys = training_data[:,1].astype(int)      # a 1D array of length nh = softmax_regression(xs, ys, 0.05, 750)test_inputs = [(1.30, 1), (2.25, 2), (0.97, 0), (1.07, 1), (1.51, 1)]print(f"{'prediction':^10}{'true':^10}")for x, y in test_inputs:    print(f"{h(x):^10}{y:^10}")# prediction   true   #     1         1     #     2         2     #     0         0     #     1         1     #     1         1 

switching theta and bias update from addition to subtraction results in 6 being the outcome always and vice versa.I've read through the course notes and all other documents, but seriously can't spot where it's going sideways


Viewing all articles
Browse latest Browse all 23160

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>