Decision Tree Regression Without Sklearn

Finally finished one split optimization.


#Imports from matplotlib import pyplot as plt from math import sqrt import numpy as np # Data X = np.array([1, 1, 2, 1, 2, 5, 6, 5, 7, 5]) y = np.array([2, 3, 2, 3, 3, 6, 7, 6 , 7, 7]) # Euclidean Distance def distance(x1, x2, y1, y2): return sqrt((x2 - x1)**2+(y2 - y1)**2) # Sum def sum(a): final = 0 for i in a: final += i return final # Calculate standard deviation def deviate(xleft, xright, yleft, yright, centroids): distance1 = [] distance2 = [] lxc, lyc = centroids[0] rxc, ryc = centroids[1] for i in range(0, len(xleft)-1): x = xleft[i] y = yleft[i] d = distance(lxc, x, lyc, y) distance1.append(d) for i in range(0, len(xright)-1): x = xright[i] y = yright[i] d = distance(rxc, x, ryc, y) distance2.append(d) return (np.std(distance1), np.std(distance2)) # Train function def train(X, y, split=2, step=1): split = split + 1 # Splits xleft, xright = X[:split], X[split:] yleft, yright = y[:split], y[split:] # Calculate centroids of each side x1, x2 = sum(xleft)/len(xleft), sum(xright)/len(xright) y1, y2 = sum(yleft)/len(yleft), sum(yright)/len(yright) # plot split plt.plot(np.append(split-1, split-1), np.array([0, 10])) centers = [(x1, y1), (x2, y2)] std = deviate(xleft, xright, yleft, yright, centers) # Plot data plt.scatter(X, y, marker="o") # plot centers plt.scatter(np.array(centers[0][0]), np.array(centers[0][1]), marker="v") plt.scatter(np.array(centers[1][0]), np.array(centers[1][1]), marker="v") plt.show() # Call recursive function "regressor" return _train(np.delete(X, 0), np.delete(y, 0), split+1, opt=(0, split), optdev=std, step=step, initX=X, inity=y) # Recursive function def _train(X, y, split, opt=None, optdev=None, step=None, initX=None, inity=None): if len(X) < split+step: print("DONE WITH RECURSIVE PROCESS") print(f"RESULTS: Optimal Split:{opt}, Optimized Standard Deviation: left:{optdev[0]} right:{optdev[1]}") return (opt, optdev) # Splits xleft, xright = initX[:split], initX[split:] yleft, yright = inity[:split], inity[split:] # Calculate centroids of each side x1, x2 = sum(xleft)/len(xleft), sum(xright)/len(xright) y1, y2 = sum(yleft)/len(yleft), sum(yright)/len(yright) # plot split plt.plot(np.append(split-1, split-1), np.array([0, 10])) centers = [(x1, y1), (x2, y2)] std = deviate(xleft, xright, yleft, yright, centers) # Plot data plt.scatter(initX, inity, marker="o") # plot centers plt.scatter(np.array(centers[0][0]), np.array(centers[0][1]), marker="v") plt.scatter(np.array(centers[1][0]), np.array(centers[1][1]), marker="v") plt.show() # Call recursive function "regressor" return _train(np.delete(X, 0), np.delete(y, 0), split+1, opt=opt, optdev=optdev, step=step, initX=initX, inity=inity) x = train(X, y, split=4)

Decision Tree Regression Without Sklearn

The main purpose of this program is to do decision tree regression without using any machine learning libraries (Keras, Sklearn, etc.) I'm currently in the process of creating the regressor. I'm using a recursive function to act as the regressor for my task.


#Imports from matplotlib import pyplot as plt from math import sqrt import numpy as np # Data X = np.array([1, 1, 2, 1, 2, 5, 6, 5, 7, 5]) y = np.array([2, 3, 2, 3, 3, 6, 7, 6 , 7, 7]) # Euclidean Distance def distance(x1, x2, y1, y2): return sqrt((x2 - x1)**2+(y2 - y1)**2) # Sum def sum(a): final = 0 for i in a: final += i return final # Calculate standard deviation for both sides def deviate(xleft, xright, yleft, yright, centroids): distance1 = [] distance2 = [] lxc, lyc = centroids[0] rxc, ryc = centroids[1] for i in range(0, len(xleft)-1): x = xleft[i] y = yleft[i] d = distance(lxc, x, lyc, y) distance1.append(d) for i in range(0, len(xright)-1): x = xright[i] y = yright[i] d = distance(rxc, x, ryc, y) distance2.append(d) return (np.std(distance1), np.std(distance2)) # Helper function that calculates initial split and calls recersive function def train(X, y, split=2): # Splits xleft, xright = X[:split], X[split:] yleft, yright = y[:split], y[split:] # Calculate centroids of each side x1, x2 = sum(xleft)/len(xleft), sum(xright)/len(xright) y1, y2 = sum(yleft)/len(yleft), sum(yright)/len(yright) # plot split plt.plot(np.append(split-1, split-1), np.array([0, 10])) centers = [(x1, y1), (x2, y2)] std = deviate(xleft, xright, yleft, yright, centers) # Plot data plt.scatter(X, y, marker="o") # plot centers plt.scatter(np.array(centers[0][0]), np.array(centers[0][1]), marker="v") plt.scatter(np.array(centers[1][0]), np.array(centers[1][1]), marker="v") plt.show() # Recursive Function def _train(X, y, split, opt=None, optdev=None): pass train(X, y, split=5)
Akhil Apr 11

nice work!

Sports Clothing Image Recognition

A project I worked on early last year involved taking an image of sports clothing and matching the image to the actual product on Amazon. It would be cool to see if someone could add extra features to this such that the program could retrieve the product from other clothing sites as well and filter for the cheapest ones. The video shows the current status of the project and the github link is here: https://github.com/akhily1/Sports-Apparel-Matching


1 ... 31 32