K-Means Clustering


/ Published in: Python
Save to your folder(s)

This is from the book: Programming Collective Intelligence


Copy this code and paste it in your HTML
  1. def kcluster(rows,distance=pearson,k=4):
  2. # Determine the minimum and maximum values for each point
  3. ranges=[(min([row[i] for row in rows]),max([row[i] for row in rows]))
  4. for i in range(len(rows[0]))]
  5.  
  6. # Create k randomly placed centroids
  7. clusters=[[random.random()*(ranges[i][1]-ranges[i][0])+ranges[i][0]
  8. for i in range(len(rows[0]))] for j in range(k)]
  9.  
  10. lastmatches=None
  11. for t in range(100):
  12. print 'Iteration %d' % t
  13. bestmatches=[[] for i in range(k)]
  14.  
  15. # Find which centroid is the closest for each row
  16. for j in range(len(rows)):
  17. row=rows[j]
  18. bestmatch=0
  19. for i in range(k):
  20. d=distance(clusters[i],row)
  21. if d<distance(clusters[bestmatch],row): bestmatch=i
  22. bestmatches[bestmatch].append(j)
  23.  
  24. # If the results are the same as last time, this is complete
  25. if bestmatches==lastmatches: break
  26. lastmatches=bestmatches
  27.  
  28. # Move the centroids to the average of their members
  29. for i in range(k):
  30. avgs=[0.0]*len(rows[0])
  31. if len(bestmatches[i])>0:
  32. for rowid in bestmatches[i]:
  33. for m in range(len(rows[rowid])):
  34. avgs[m]+=rows[rowid][m]
  35. for j in range(len(avgs)):
  36. avgs[j]/=len(bestmatches[i])
  37. clusters[i]=avgs
  38.  
  39. return bestmatches

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.