Logistic Regression

This is logistic regression from scratch using python.

x = [0, 1, 2, 3, 5, 6, 7, 8] y = [0, 0, 0, 0, 1, 1, 1, 1] import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.plot(x, y) plt.savefig('plot.png') def slopeDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def interceptDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 c = 0 l = 0.0001 iterations = 100000 for i in range(iterations): derivativeSlope = slopeDerivative(x, y, m ,c) derivativeIntercept = interceptDerivative(x, y, m ,c) m = m - (l * derivativeSlope) print (m) c = c - (l * derivativeIntercept) m = round(m, 3) c = round(c, 3) print ("The slope is " + str(m) + " and the y - intercept = " + str(c)) newY = [] biggest = [] howBigIsTheTumor = float(input("How big is the tumor in cm? ")) userPrediction = 1 / (1 + (2.71828 ** (-1 * ((howBigIsTheTumor*m)+c)))) print ("There is a " + str(userPrediction) + " that the tumor is malignant!") for f in range(0, 8): xValues = [] recentValue = f for i in range(10000): xValues.append(recentValue) recentValue += 0.0001 for i in range(len(xValues)): yprime = 1 / (1 + (2.71828 ** (-1 * ((xValues[i]*m)+c)))) newY.append(yprime) for n in range(len(xValues)): biggest.append(xValues[n]) plt.plot(biggest, newY) plt.savefig("graph")

Linear Regression with any amount of variables

hm = int(input("Enter how many categories or inputs do you want: ")) boo = int(input("How many inputs per category do you want: ")) dataSet = [] for be in range(hm): print ("This is your " + str(be) + "th input set.") inputer = [] for i in range(boo): point = float(input("Enter value: ")) inputer.append(point) dataSet.append(inputer) outputs = [] for i in range(boo): print ("Enter you output.") output = float(input("Enter value: ")) outputs.append(output) dataSet.append(outputs) values = [] for i in range(hm): values.append(0) yIntercept = 0 def slopeDerivativeOne(dataSet, values, hm, boo, yIntercept, wanted): length = hm total = 0 wantedValues = [] a = dataSet[wanted] for i in range(len(a)): wantedValues.append(a[i]) for f in range(length): innerValues = [] for i in range(len(dataSet)): b = dataSet[i] val = b[f] innerValues.append(val) innerTotal = 0 for i in range(len(values)): bobby = values[i] * innerValues[i] innerTotal += bobby innerTotal += yIntercept jj = len(dataSet) - 1 nn = dataSet[jj] difference = (nn[f] - innerTotal) * wantedValues[f] total += difference returnValue = (-2/length) * total return returnValue def DerivativeOne(dataSet, values, hm, boo, yIntercept, wanted): length = hm total = 0 wantedValues = [] a = dataSet[wanted] for i in range(len(a)): wantedValues.append(a[i]) for f in range(length): innerValues = [] for i in range(len(dataSet)): z = dataSet[i] val = z[f] innerValues.append(val) innerTotal = 0 for i in range(len(values)): bobby = values[i] * innerValues[i] innerTotal += bobby innerTotal += yIntercept jj = len(dataSet) - 1 nn = dataSet[jj] difference = (nn[f] - innerTotal) total += difference print (total) returnValue = (-2/length) * total return returnValue def getCost(dataSet, values, hm, boo, yIntercept, wanted): total = 0 length = hm for f in range(length): innerValues = [] for i in range(len(dataSet)): z = dataSet[i] val = z[f] innerValues.append(val) innerTotal = 0 for i in range(len(values)): bobby = values[i] * innerValues[i] innerTotal += bobby innerTotal += yIntercept jj = len(dataSet) - 1 nn = dataSet[jj] difference = (nn[f] - innerTotal) total += difference print (total) return (abs(total)) l = 0.0001 iterations = 100000 previousValues = [] for x in range(len(values)): print (x) previousValues.append(5) previousYInt = 5 j = 0 while (True): j += 1 ds = [] for i in range(len(values)): d = slopeDerivativeOne(dataSet, values, hm, boo, yIntercept, i) word = values[i] - (l * d) values[i] = word d1 = DerivativeOne(dataSet, values, hm, boo, yIntercept, i) yIntercept = yIntercept - (l * d1) print (j) if j > 1000000: False break previousValues = (values) previousYInt = yIntercept for i in range(len(values)): bob = round(values[i], 2) print (bob) bobby = round(yIntercept, 2) print ("y-intercept" + str(bobby))

Multi-variable linear regression

This is linear regression with more than one input variable.

x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] y = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100] z = [1, 8, 27, 64, 125, 216, 343, 512, 729, 1000] def slopeDerivativeOne(x, y, z, slope, slope1, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + (slope1 * y[i]) + yInt difference = (z[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def slopeDerivativeTwo(x, y, z, slope, slope1, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + (slope1 * y[i]) + yInt difference = (z[i] - predicted) * y[i] total += difference returnValue = (-2/length) * total return returnValue def interceptDerivative(x, y, z, slope, slope1, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + (slope1 * y[i]) + yInt difference = (z[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 m1 = 0 c = 0 l = 0.0001 iterations = 1000000 for i in range(iterations): d = slopeDerivativeOne(x, y, z, m, m1, c) d1 = slopeDerivativeTwo(x, y, z, m, m1, c) d2 = interceptDerivative(x, y, z, m, m1, c) m = m - (l * d) m1 = m1 - (l * d1) c = c - (l * d2) print (m, m1, c)

Linear Regression

This is linear regression from scratch. I used a Coursera course for help with this.

import numpy as np import pandas as pd import matplotlib.pyplot as plt x = [1, 5, 3, 4, 7, 9, 12, 13, 15, 16, 17, 4, 5, 2, 10, 23, 25] y = [5, 12, 23, 14, 17, 8, 20, 21, 25, 38, 42, 10, 13, 7, 23, 50, 55] plt.plot(x, y) plt.show() def slopeDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def interceptDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 c = 0 l = 0.0001 iterations = 1000000 for i in range(iterations): derivativeSlope = slopeDerivative(x, y, m ,c) derivativeIntercept = interceptDerivative(x, y, m ,c) m = m - (l * derivativeSlope) c = c - (l * derivativeIntercept) print (m, c)

Naive Bayes Classifier

This is an example of the naive Bayes classifier. In this code, I used old data to determine whether it was a good day to golf.


import math from statistics import mean # here we will find the answer to playing assuming the outlook is sunny, the temperature is cool, the humidity is high, and the wind is strong outlook = ["sunny", "sunny", "overcast", "rainy", "rainy", "rainy", "overcast", "sunny", "sunny", "rainy", "sunny", "overcast", "overcast", "rainy"] temperature = ["hot", "hot", "hot", "mild", "cool", "cool", "cool", "mild", "cool", "mild", "mild", "mild", "hot", "mild"] humidity = ["high", "high", "high", "high", "normal", "normal","normal", "high", "normal", "normal", "normal", "high", "normal", "high"] windy = ["false", "true", "false", "false", "false", "true", "true", "false", "false", "false", "true", "true", "false", "true"] play = ["no", "no", "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes", "yes", "no"] #print (len(temperature)) #print (len(outlook)) #print (len(humidity)) #print (len(windy)) #print (len(play)) sunny = 0 cool = 0 high = 0 strong = 0 for i in range(14): if outlook[i] == "sunny": sunny+= 1 if temperature[i] == "cool": cool += 1 if humidity[i] == "high": high+= 1 if windy[i] == "true": strong+= 1 print (sunny) print (cool) print (high) print (strong) countYes = 0 countNo = 0 length = len(play) for i in range(length): if play[i] == "no": countNo += 1 else: countYes += 1 print (countNo) print (countYes) probYesWindy = 0 probNoWindy = 0 probYesNoWindy = 0 probNoNoWindy = 0 probYes = countYes / 14 probNo = countNo / 14 for i in range(length): if play[i] == "yes": if windy[i] == "false": probYesNoWindy += 1 if windy[i] == "true": probYesWindy += 1 if play[i] == "no": if windy[i] == "false": probNoNoWindy += 1 if windy[i] == "true": probNoWindy += 1 YesWindy = probYesWindy/countYes NoWindy = probNoWindy / countNo YesNoWindy = probYesNoWindy/countYes NoNoWindy = probNoNoWindy / countNo print (YesWindy) print (NoWindy) probYesHigh = 0 probNoHigh = 0 probYesNormal = 0 probNoNormal = 0 for i in range(length): if play[i] == "yes": if humidity[i] == "high": probYesHigh += 1 if humidity[i] == "normal": probYesNormal += 1 if play[i] == "no": if humidity[i] == "high": probNoHigh += 1 if humidity[i] == "normal": probNoNormal += 1 yesHigh = probYesHigh/countYes noHigh = probNoHigh / countNo yesNormal = probYesNormal/countYes noNormal = probNoNormal / countNo print (yesHigh) print (noHigh) probYesSunny = 0 probNoSunny = 0 probYesOvercast = 0 probNoOvercast = 0 probYesRainy = 0 probNoRainy = 0 for i in range(length): if play[i] == "yes": if outlook[i] == "sunny": probYesSunny += 1 if outlook[i] == "overcast": probYesOvercast += 1 if outlook[i] == "rainy": probYesRainy += 1 if play[i] == "no": if outlook[i] == "sunny": probNoSunny += 1 if outlook[i] == "overcast": probNoOvercast += 1 if outlook[i] == "rainy": probNoRainy += 1 YesSunny = probYesSunny/countYes NoSunny = probNoSunny / countNo YesOvercast = probYesOvercast/countYes NoOvercast = probNoOvercast / countNo YesRainy = probYesRainy / countYes NoRainy = probNoRainy / countNo print (YesSunny) print (NoSunny) probYesHot = 0 probNoHot = 0 probYesMild = 0 probNoMild = 0 probYesCool = 0 probNoCool = 0 for i in range(length): if play[i] == "yes": if temperature[i] == "hot": probYesHot += 1 if temperature[i] == "mild": probYesMild += 1 if temperature[i] == "cool": probYesCool += 1 if play[i] == "no": if temperature[i] == "hot": probNoHot += 1 if temperature[i] == "mild": probNoMild += 1 if temperature[i] == "cool": probNoCool += 1 YesHot = probYesHot/countYes NoHot = probNoHot / countNo YesMild = probYesMild /countYes NoMild = probNoMild / countNo YesCool = probYesCool / countYes NoCool = probNoCool / countNo print (YesCool) print (NoCool) pYes = (YesSunny) * (YesCool) * (yesHigh) * (YesWindy) * (probYes) pNo = (NoSunny) * (NoCool) * (noHigh) * (NoWindy) * (probNo) print (pYes) print (pNo) commonP = (sunny/14) * (high / 14) * (cool /14) * (strong/ 14) finalYes= pYes / commonP finalNo = pNo / commonP if finalYes > finalNo: print ("Go out and play. The chances of you playing are " + str(finalYes) + " and the chances you don't are " + str(finalNo)) else: print ("Do not go out and play. The chances of you playing are " + str(finalYes) + " and the chances you don't are " + str(finalNo))

K Means Clustering

import math import statistics import random centroids = [(random.randint(0, 12), random.randint(0, 12), random.randint(0, 12), "blue"), (random.randint(0, 12), random.randint(0, 12), random.randint(0, 12), "green"), (random.randint(0, 12), random.randint(0, 12), random.randint(0, 12), "red")] data = [(3,5 , 10), (4, 12, 1), (7, 8, 5), (2, 6, 11), (3, 1, 4), (4, 1, 7), (1, 2, 3), (5, 1, 3), (6, 2, 3)] length2 = len(data) NewData = [] def FindClosest(centroid, xvalue, yvalue, zvalue): base = 100000 length = len(centroid) for i in range(length): a = centroids[i] x = a[0] y = a[1] z = a[2] distance = math.sqrt(abs(xvalue - x)**2 + abs(yvalue-y)**2 + abs(zvalue-z)**2) if distance < base: base = distance place = i return (place) def ChooseColor(place, centroids): a = centroids[place] color = a[3] return (color) for i in range(length2): a = data[i] xvalue = a[0] yvalue = a[1] zvalue = a[2] NewData.append((xvalue, yvalue, zvalue, ChooseColor(FindClosest(centroids, xvalue, yvalue, zvalue), centroids))) for i in range(len(NewData)): a = NewData[i] print ("(" + str(a[0]) + ", " + str(a[1]) + ", " + str(a[2]) + ") , color = " + a[3])

K Nearest Neighbor Algorithm

KNN without using sklearn

data = [(3,5 , 10, "red"), (4, 12, 1, "blue"), (7, 8, 5, "blue"), (2, 6, 11, "red"), (3, 1, 4, "yellow"), (4, 1, 7, "yellow"), (1, 2, 3, "green"), (5, 1, 3, "green"), (6, 2, 3, "purple")] def findClosest(x, y, z): length = len(data) distances = [] for i in range(length): a = data[i] x1 = a[0] y1 = a[1] z1 = a[2] distance = math.sqrt(((x-x1) **2) + ((y-y1)**2) + ((z-z1)**2)) distances.append((distance, i)) base = 1000000000 hello = 0 for i in range(length): a = distances[i] if a[0] < base: base = a[1] return (base) def D(n, data): colors = [] for i in range(n): # random point I chose was (3, 4, 5) a = data[findClosest(3, 4, 5)] colors.append(a[3]) data.pop(findClosest(3, 4, 5)) return (statistics.mode(colors)) print (D(3, data))
1