##k means clustering


Infinite-dimensional K means Clustering

import math k = int(input("what is k")) attempts = int(input("How many attempts?")) sample = [[2,1,1,1,0,0],[1,3,4,5,0,0],[4,3,5,2,0,0],[2,6,0,2,0,0],[0,2,3,3,0,0],[3,1,1,2,0,0],[7,5,0,2,0,0],[1,1,2,1,0,0],[3,2,2,2,0,0],[2,5,6,3,0,0]] dimension = int(input("how many dimesions u want?")) totalgraph= [] totalvariance = [] for guns in range(0,attempts,1): graph = [[2,1,1,1,0,0],[1,3,4,5,0,0],[4,3,5,2,0,0],[2,6,0,2,0,0],[0,2,3,3,0,0],[3,1,1,2,0,0],[7,5,0,2,0,0],[1,1,2,1,0,0],[3,2,2,2,0,0],[2,5,6,3,0,0]] for a in range(1,k+1,1): x =random.randint(0,len(graph)-1) while graph[x][dimension] != 0: x =random.randint(0,len(graph)-1) graph[x][dimension] = a graph[x][dimension+1] = a for a in range(0,len(graph),1): if graph[a][dimension] == 0: data2 = [] for b in range(1,k+1,1): for c in range(0,len(graph),1): if graph[c][dimension] == b: data1 = [] for d in range(0,dimension,1): data1.append((graph[a][d]-graph[c][d])**2) data2.append(sum(data1)/len(data1)) f = data2[0] for e in range(0,len(data2),1): if f > data2[e]: f = data2[e] for e in range(0,len(data2),1): if f == data2[e]: graph[a][dimension+1] = e+1 def realign(graph): kpoints = [] for a in range(1,k+1,1): data = [] data1 = [] for b in range(0,len(graph),1): if graph[b][dimension+1] == a: data.append(graph[b]) for b in range(0,dimension,1): d = 0 for c in range(0,len(data),1): d = d + data[c][b] d = d/len(data) data1.append(d) kpoints.append(data1) from copy import deepcopy graph1 = deepcopy(graph) for c in range(0,len(graph1),1): data2 = [] for a in range(0,len(kpoints),1): data1 = [] for d in range(0,dimension,1): data1.append((kpoints[a][d]-graph1[c][d])**2) data2.append(sum(data1)/len(data1)) f = data2[0] for e in range(0,len(data2),1): if f > data2[e]: f = data2[e] for e in range(0,len(data2),1): if f == data2[e]: graph1[c][dimension+1] = e+1 if graph1 == graph: return graph1 else: realign(graph1) return graph1 realign(graph) kpoints = [] for a in range(1,k+1,1): data = [] data1 = [] for b in range(0,len(graph),1): if graph[b][dimension+1] == a: data.append(graph[b]) for b in range(0,dimension,1): d = 0 for c in range(0,len(data),1): d = d + data[c][b] d = d/len(data) data1.append(d) kpoints.append(data1) varianc = 0 data = [] data1 = [] for a in range(1,k+1,1): for b in range(0,len(graph),1): if graph[b][dimension+1] == a: for c in range(0,dimension,1): data.append((graph[b][c]-kpoints[a-1][c])**2) data1.append(math.sqrt((sum(data)))) print(data1) varianc = varianc + sum(data1) print(varianc) print("hi") data = [] data1 = [] totalvariance.append(varianc) totalgraph.append(graph) print(totalvariance) print(graph)
1