Guessing Game

Guess the random number the computer generates. Python code using the while loop.

Correlation Coefficient

Used for loops and lists to find the correlation coefficient of two given lists

Correlation Coefficient trading view

This program displays the data of Correlation Coefficient trading views. In this program, I used for loops to loop through the original lists of data that was provided on the website in order to find variances, covariances, and the Correlation Coefficient of the data.

Security_1 = [170.66, 170.95, 170.70, 169.73, 169.18, 169.80, 169.31, 169.11, 169.61,168.74, 166.38, 165.83] Security_2 = [56.54, 56.40, 56.10, 55.49, 55.30, 54.83, 54.52, 54.09, 54.29, 54.15, 53.29, 51.83] squared1 = [] squared2 = [] multiply = [] Totalsecurity1 = 0 Totalsecurity2 = 0 Totalsquare1 = 0 Totalsquare2 = 0 Totalmultiply = 0 for x in range(0, len(Security_1), 1): item = Security_1[x] squared1.append(item**2) for x in range(0, len(Security_1), 1): item = Security_2[x] squared2.append(item**2) for x in range(0, len(Security_1), 1): item1 = Security_1[x] item2 = Security_2[x] stuff = item1 * item2 multiply.append(stuff) for x in range(0, len(Security_1), 1): Totalsecurity1=Totalsecurity1 + Security_1[x] AvgSecurity1 = Totalsecurity1 /len(Security_1) print('Average of SPY =', AvgSecurity1) for x in range(0, len(Security_2), 1): Totalsecurity2 = Totalsecurity2 + Security_2[x] AvgSecurity2 = Totalsecurity2 /len(Security_2) print('Average of JPM =', AvgSecurity2) for x in range(0, len(squared1), 1): Totalsquare1 = Totalsquare1 + squared1[x] Avgsquare1 = Totalsquare1 /len(squared1) print('Average of SPY squared =', Avgsquare1) for x in range(0, len(squared2), 1): Totalsquare2 = Totalsquare2 + squared2[x] Avgsquare2 = Totalsquare2 /len(squared2) print('Average of JPM squared =', Avgsquare2) for x in range(0, len(multiply), 1): Totalmultiply = Totalmultiply + multiply[x] Avgmultiply = Totalmultiply /len(multiply) print('The average of SPY and JPM multiplyed is ', Avgmultiply) AvgSPYsquared = AvgSecurity1 **2 SPYvariance = Avgsquare1 - AvgSPYsquared print('SPY variance is ', SPYvariance) AvgJPMsquared = AvgSecurity2 **2 JPMvariance = Avgsquare2 - AvgJPMsquared print('JPM variance is ', JPMvariance) AvgSPYJPM = AvgSecurity1 * AvgSecurity2 SPYJPMcovariance = Avgmultiply - AvgSPYJPM print('SPY and JPM covariance is ', SPYJPMcovariance) SPYJPM = SPYvariance * JPMvariance stuff = SPYJPM ** 0.5 final = SPYJPMcovariance / stuff print('SPY & JPM Correlation Coefficient = ', final)

Logistic Regression

This is logistic regression from scratch using python.

x = [0, 1, 2, 3, 5, 6, 7, 8] y = [0, 0, 0, 0, 1, 1, 1, 1] import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.plot(x, y) plt.savefig('plot.png') def slopeDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def interceptDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 c = 0 l = 0.0001 iterations = 100000 for i in range(iterations): derivativeSlope = slopeDerivative(x, y, m ,c) derivativeIntercept = interceptDerivative(x, y, m ,c) m = m - (l * derivativeSlope) print (m) c = c - (l * derivativeIntercept) m = round(m, 3) c = round(c, 3) print ("The slope is " + str(m) + " and the y - intercept = " + str(c)) newY = [] biggest = [] howBigIsTheTumor = float(input("How big is the tumor in cm? ")) userPrediction = 1 / (1 + (2.71828 ** (-1 * ((howBigIsTheTumor*m)+c)))) print ("There is a " + str(userPrediction) + " that the tumor is malignant!") for f in range(0, 8): xValues = [] recentValue = f for i in range(10000): xValues.append(recentValue) recentValue += 0.0001 for i in range(len(xValues)): yprime = 1 / (1 + (2.71828 ** (-1 * ((xValues[i]*m)+c)))) newY.append(yprime) for n in range(len(xValues)): biggest.append(xValues[n]) plt.plot(biggest, newY) plt.savefig("graph")

Linear Regression with any amount of variables

hm = int(input("Enter how many categories or inputs do you want: ")) boo = int(input("How many inputs per category do you want: ")) dataSet = [] for be in range(hm): print ("This is your " + str(be) + "th input set.") inputer = [] for i in range(boo): point = float(input("Enter value: ")) inputer.append(point) dataSet.append(inputer) outputs = [] for i in range(boo): print ("Enter you output.") output = float(input("Enter value: ")) outputs.append(output) dataSet.append(outputs) values = [] for i in range(hm): values.append(0) yIntercept = 0 def slopeDerivativeOne(dataSet, values, hm, boo, yIntercept, wanted): length = hm total = 0 wantedValues = [] a = dataSet[wanted] for i in range(len(a)): wantedValues.append(a[i]) for f in range(length): innerValues = [] for i in range(len(dataSet)): b = dataSet[i] val = b[f] innerValues.append(val) innerTotal = 0 for i in range(len(values)): bobby = values[i] * innerValues[i] innerTotal += bobby innerTotal += yIntercept jj = len(dataSet) - 1 nn = dataSet[jj] difference = (nn[f] - innerTotal) * wantedValues[f] total += difference returnValue = (-2/length) * total return returnValue def DerivativeOne(dataSet, values, hm, boo, yIntercept, wanted): length = hm total = 0 wantedValues = [] a = dataSet[wanted] for i in range(len(a)): wantedValues.append(a[i]) for f in range(length): innerValues = [] for i in range(len(dataSet)): z = dataSet[i] val = z[f] innerValues.append(val) innerTotal = 0 for i in range(len(values)): bobby = values[i] * innerValues[i] innerTotal += bobby innerTotal += yIntercept jj = len(dataSet) - 1 nn = dataSet[jj] difference = (nn[f] - innerTotal) total += difference print (total) returnValue = (-2/length) * total return returnValue def getCost(dataSet, values, hm, boo, yIntercept, wanted): total = 0 length = hm for f in range(length): innerValues = [] for i in range(len(dataSet)): z = dataSet[i] val = z[f] innerValues.append(val) innerTotal = 0 for i in range(len(values)): bobby = values[i] * innerValues[i] innerTotal += bobby innerTotal += yIntercept jj = len(dataSet) - 1 nn = dataSet[jj] difference = (nn[f] - innerTotal) total += difference print (total) return (abs(total)) l = 0.0001 iterations = 100000 previousValues = [] for x in range(len(values)): print (x) previousValues.append(5) previousYInt = 5 j = 0 while (True): j += 1 ds = [] for i in range(len(values)): d = slopeDerivativeOne(dataSet, values, hm, boo, yIntercept, i) word = values[i] - (l * d) values[i] = word d1 = DerivativeOne(dataSet, values, hm, boo, yIntercept, i) yIntercept = yIntercept - (l * d1) print (j) if j > 1000000: False break previousValues = (values) previousYInt = yIntercept for i in range(len(values)): bob = round(values[i], 2) print (bob) bobby = round(yIntercept, 2) print ("y-intercept" + str(bobby))

Linear Regression

A linear regression program which shows the slope of the line and y-intercept

import numpy as np import matplotlib.pyplot as plt def estimate_coef(x, y): n = np.size(x) m_x, m_y = np.mean(x), np.mean(y) SS_xy = np.sum(y*x) - n*m_y*m_x SS_xx = np.sum(x*x) - n*m_x*m_x b_1 = SS_xy / SS_xx b_0 = m_y - b_1*m_x return(b_0, b_1) def plot_regression_line(x, y, b): plt.scatter(x, y, color = "m", marker = "o", s = 30) y_pred = b[0] + b[1]*x plt.plot(x, y_pred, color = "g") plt.xlabel('x') plt.ylabel('y') plt.show() def main(): x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) b = estimate_coef(x, y) print("Estimated coefficients:\nb_0 = {} \" \nb_1 = {}".format(b[0], b[1]")) plot_regression_line(x, y, b) if __name__ == "__main__": main()

Linear Regression

This is Linear Regression


x = [15, 9, 12, 1, 10, 11, 4, 16, 2, 30, 4, 15, 18, 12, 14] y = [13 ,19, 16, 15, 24, 7, 18, 23, 28, 2, 26, 12, 18, 24, 17] def DerivativeSlope(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def DerivativeIntercept(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 c = 0 l = 0.000001 iterations = 99999 for i in range(iterations): slope = DerivativeSlope(x, y, m ,c) intercept = DerivativeIntercept(x, y, m ,c) m = m - (l * slope) c = c - (l * intercept) print (m, c)

Multi-variable linear regression

This is linear regression with more than one input variable.

x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] y = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100] z = [1, 8, 27, 64, 125, 216, 343, 512, 729, 1000] def slopeDerivativeOne(x, y, z, slope, slope1, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + (slope1 * y[i]) + yInt difference = (z[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def slopeDerivativeTwo(x, y, z, slope, slope1, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + (slope1 * y[i]) + yInt difference = (z[i] - predicted) * y[i] total += difference returnValue = (-2/length) * total return returnValue def interceptDerivative(x, y, z, slope, slope1, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + (slope1 * y[i]) + yInt difference = (z[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 m1 = 0 c = 0 l = 0.0001 iterations = 1000000 for i in range(iterations): d = slopeDerivativeOne(x, y, z, m, m1, c) d1 = slopeDerivativeTwo(x, y, z, m, m1, c) d2 = interceptDerivative(x, y, z, m, m1, c) m = m - (l * d) m1 = m1 - (l * d1) c = c - (l * d2) print (m, m1, c)

Linear Regression

This is linear regression from scratch. I used a Coursera course for help with this.

import numpy as np import pandas as pd import matplotlib.pyplot as plt x = [1, 5, 3, 4, 7, 9, 12, 13, 15, 16, 17, 4, 5, 2, 10, 23, 25] y = [5, 12, 23, 14, 17, 8, 20, 21, 25, 38, 42, 10, 13, 7, 23, 50, 55] plt.plot(x, y) plt.show() def slopeDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) * x[i] total += difference returnValue = (-2/length) * total return returnValue def interceptDerivative(x, y, slope, yInt): length = len(x) total = 0 for i in range(length): predicted = (slope * x[i]) + yInt difference = (y[i] - predicted) total += difference returnValue = (-2/length) * total return returnValue m = 0 c = 0 l = 0.0001 iterations = 1000000 for i in range(iterations): derivativeSlope = slopeDerivative(x, y, m ,c) derivativeIntercept = interceptDerivative(x, y, m ,c) m = m - (l * derivativeSlope) c = c - (l * derivativeIntercept) print (m, c)

Naive Bayes Classifier

This is an example of the naive Bayes classifier. In this code, I used old data to determine whether it was a good day to golf.


import math from statistics import mean # here we will find the answer to playing assuming the outlook is sunny, the temperature is cool, the humidity is high, and the wind is strong outlook = ["sunny", "sunny", "overcast", "rainy", "rainy", "rainy", "overcast", "sunny", "sunny", "rainy", "sunny", "overcast", "overcast", "rainy"] temperature = ["hot", "hot", "hot", "mild", "cool", "cool", "cool", "mild", "cool", "mild", "mild", "mild", "hot", "mild"] humidity = ["high", "high", "high", "high", "normal", "normal","normal", "high", "normal", "normal", "normal", "high", "normal", "high"] windy = ["false", "true", "false", "false", "false", "true", "true", "false", "false", "false", "true", "true", "false", "true"] play = ["no", "no", "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "yes", "yes", "yes", "no"] #print (len(temperature)) #print (len(outlook)) #print (len(humidity)) #print (len(windy)) #print (len(play)) sunny = 0 cool = 0 high = 0 strong = 0 for i in range(14): if outlook[i] == "sunny": sunny+= 1 if temperature[i] == "cool": cool += 1 if humidity[i] == "high": high+= 1 if windy[i] == "true": strong+= 1 print (sunny) print (cool) print (high) print (strong) countYes = 0 countNo = 0 length = len(play) for i in range(length): if play[i] == "no": countNo += 1 else: countYes += 1 print (countNo) print (countYes) probYesWindy = 0 probNoWindy = 0 probYesNoWindy = 0 probNoNoWindy = 0 probYes = countYes / 14 probNo = countNo / 14 for i in range(length): if play[i] == "yes": if windy[i] == "false": probYesNoWindy += 1 if windy[i] == "true": probYesWindy += 1 if play[i] == "no": if windy[i] == "false": probNoNoWindy += 1 if windy[i] == "true": probNoWindy += 1 YesWindy = probYesWindy/countYes NoWindy = probNoWindy / countNo YesNoWindy = probYesNoWindy/countYes NoNoWindy = probNoNoWindy / countNo print (YesWindy) print (NoWindy) probYesHigh = 0 probNoHigh = 0 probYesNormal = 0 probNoNormal = 0 for i in range(length): if play[i] == "yes": if humidity[i] == "high": probYesHigh += 1 if humidity[i] == "normal": probYesNormal += 1 if play[i] == "no": if humidity[i] == "high": probNoHigh += 1 if humidity[i] == "normal": probNoNormal += 1 yesHigh = probYesHigh/countYes noHigh = probNoHigh / countNo yesNormal = probYesNormal/countYes noNormal = probNoNormal / countNo print (yesHigh) print (noHigh) probYesSunny = 0 probNoSunny = 0 probYesOvercast = 0 probNoOvercast = 0 probYesRainy = 0 probNoRainy = 0 for i in range(length): if play[i] == "yes": if outlook[i] == "sunny": probYesSunny += 1 if outlook[i] == "overcast": probYesOvercast += 1 if outlook[i] == "rainy": probYesRainy += 1 if play[i] == "no": if outlook[i] == "sunny": probNoSunny += 1 if outlook[i] == "overcast": probNoOvercast += 1 if outlook[i] == "rainy": probNoRainy += 1 YesSunny = probYesSunny/countYes NoSunny = probNoSunny / countNo YesOvercast = probYesOvercast/countYes NoOvercast = probNoOvercast / countNo YesRainy = probYesRainy / countYes NoRainy = probNoRainy / countNo print (YesSunny) print (NoSunny) probYesHot = 0 probNoHot = 0 probYesMild = 0 probNoMild = 0 probYesCool = 0 probNoCool = 0 for i in range(length): if play[i] == "yes": if temperature[i] == "hot": probYesHot += 1 if temperature[i] == "mild": probYesMild += 1 if temperature[i] == "cool": probYesCool += 1 if play[i] == "no": if temperature[i] == "hot": probNoHot += 1 if temperature[i] == "mild": probNoMild += 1 if temperature[i] == "cool": probNoCool += 1 YesHot = probYesHot/countYes NoHot = probNoHot / countNo YesMild = probYesMild /countYes NoMild = probNoMild / countNo YesCool = probYesCool / countYes NoCool = probNoCool / countNo print (YesCool) print (NoCool) pYes = (YesSunny) * (YesCool) * (yesHigh) * (YesWindy) * (probYes) pNo = (NoSunny) * (NoCool) * (noHigh) * (NoWindy) * (probNo) print (pYes) print (pNo) commonP = (sunny/14) * (high / 14) * (cool /14) * (strong/ 14) finalYes= pYes / commonP finalNo = pNo / commonP if finalYes > finalNo: print ("Go out and play. The chances of you playing are " + str(finalYes) + " and the chances you don't are " + str(finalNo)) else: print ("Do not go out and play. The chances of you playing are " + str(finalYes) + " and the chances you don't are " + str(finalNo))

Implementing Hierarchical Clustering into a dataset

This code implements Hierarchical Clustering into a real dataset of information regarding credit cards. It outputs the centroids of the number of clusters that the user wants the data to be split into.


import pandas as pd import math import random csv = pd.read_csv('Card.csv', header = 0) data = [] for x in range(len(csv)): balance = csv.iloc[x].iloc[1] balanceFrequency = csv.iloc[x].iloc[2] purchases = csv.iloc[x].iloc[3] oneOffPurchases = csv.iloc[x].iloc[4] installmentsPurchases = csv.iloc[x].iloc[5] cashAdvance = csv.iloc[x].iloc[6] accountTuple = (balance, balanceFrequency, purchases, oneOffPurchases, installmentsPurchases, cashAdvance) data.append(accountTuple) k = int(input("How many final clusters do you want(max is 97):")) def distance(x, y): return math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 + (x[2] - y[2]) ** 2 + (x[3] - y[3]) ** 2 + (x[4] - y[4]) ** 2 + (x[5] - y[5]) ** 2) def minDistance(clusters): minDist = 10000 for x in data: for y in data: dist = distance(x, y) if dist < minDist and dist != 0: minDist = dist minX = x minY = y minPoints = (minX, minY) return minPoints def findCentroid(minPoints): x = minPoints[0] y = minPoints[1] centroid = [] for d in range(len(x)): centroid.append((x[d] + y[d])/2) return centroid data1 = data while k < len(data): minPoints = minDistance(data) data.append(findCentroid(minPoints)) data.remove(minPoints[0]), data.remove(minPoints[1]) print("Centroids:", data)

Implementing K Means Clustering into a dataset

This code takes a real dataset of information of credit card users and uses K means Clustering to categorize the users into separate clusters. It outputs the lowest variation and the centroids of the clusters


import pandas as pd import math import random csv = pd.read_csv('Card.csv', header = 0) data = [] for x in range(len(csv)): balance = csv.iloc[x].iloc[1] balanceFrequency = csv.iloc[x].iloc[2] purchases = csv.iloc[x].iloc[3] oneOffPurchases = csv.iloc[x].iloc[4] installmentsPurchases = csv.iloc[x].iloc[5] cashAdvance = csv.iloc[x].iloc[6] accountTuple = (balance, balanceFrequency, purchases, oneOffPurchases, installmentsPurchases, cashAdvance) data.append(accountTuple) clstr1 = [] lowestVarClstr1 = [] clstr2 = [] lowestVarClstr2 = [] clstr3 = [] lowestVarClstr3 = [] centroids = [] lowestVarCentroids = [] def orignalCentroids(): for a in range(3): rand = data[random.randrange(len(data))] centroids.append(rand) def findDistance(x, y): return math.sqrt((x[0] - y[0]) ** 2 + (x[1] - y[1]) ** 2 + (x[2] - y[2]) ** 2 + (x[3] - y[3]) ** 2 + (x[4] - y[4]) ** 2 + (x[5] - y[5]) ** 2) def sortIntoClstr(): clstr1.clear() clstr2.clear() clstr3.clear() for d in data: coorCentClstr1 = centroids[0] coorCentClstr2 = centroids[1] coorCentClstr3 = centroids[2] distClstr1 = findDistance(coorCentClstr1, d) distClstr2 = findDistance(coorCentClstr2, d) distClstr3 = findDistance(coorCentClstr3, d) if distClstr1 < distClstr2 and distClstr1 < distClstr3: clstr1.append(d) elif distClstr2 < distClstr1 and distClstr2 < distClstr3: clstr2.append(d) elif distClstr3 < distClstr1 and distClstr3 < distClstr2: clstr3.append(d) def newCentroid(cluster): x1 = 0 x2 = 0 x3 = 0 x4 = 0 x5 = 0 x6 = 0 for c in cluster: coor = c x1 += coor[0] x2 += coor[1] x3 += coor[2] x4 += coor[3] x5 += coor[4] x6 += coor[5] x1 /= (len(cluster) + 1) x2 /= (len(cluster) + 1) x3 /= (len(cluster) + 1) x4 /= (len(cluster) + 1) x5 /= (len(cluster) + 1) x6 /= (len(cluster) + 1) return (x1, x2, x3, x4, x5, x6) def findNewCentroids(): centroids.clear() centroids.append(newCentroid(clstr1)) centroids.append(newCentroid(clstr2)) centroids.append(newCentroid(clstr3)) def varCluster(cluster, centroid): var = 0 for c in cluster: var += findDistance(centroid, c) return var def findVariation(): variation = 0 variation += varCluster(clstr1, centroids[0]) variation += varCluster(clstr2, centroids[1]) variation += varCluster(clstr3, centroids[2]) return variation def oneCycle(): for x in range(0, 100, 1): sortIntoClstr() findNewCentroids() return findVariation() orignalCentroids() for c in range(21): variation = oneCycle() lowestVariation = 99999999999999999 if variation < lowestVariation: lowestVariation = variation lowestVarCentroids = centroids print('Lowest Variation: ') print(lowestVariation) print('Centroids: ') print(lowestVarCentroids)
1 2 ... 5