PageRenderTime 590ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/clarans/clarans.py

https://bitbucket.org/Rebane/dm_project
Python | 131 lines | 91 code | 18 blank | 22 comment | 18 complexity | 0855ff18619d0e47c0b1041817a187a4 MD5 | raw file
  1. import math
  2. import random
  3. import numpy as np
  4. import sys
  5. """
  6. 1. Input parameters numlocal and maxneighbor. Initialize i to 1, and mincost to a large number.
  7. 2. Set current to an arbitrary node in G_{n,k}
  8. 3. Set j to 1.
  9. 4. Consider a random neighbor S of current, and based on Equation (5) calculate the cost differential
  10. of the two nodes.
  11. 5. If S has a lower cost, set current to S, and go to Step (3).
  12. 6. Otherwise, increment j by 1. If j<=maxneighbor,go to Step (4).
  13. 7. Otherwise, when j > maxneighbor, compare the cost of current with mincost. If the former is less than mincost,
  14. set mincost to the cost of current, and set bestnode to current.
  15. 8. Increment i by 1. If i > numlocal, output bestnode and halt. Otherwise, go to Step (2).
  16. """
  17. def clarans_basic(points, numlocal, maxneighbor, mincost,k):
  18. # random.seed(1)
  19. # np.random.seed(1)
  20. i=1
  21. N = len(points)
  22. d_mat = np.asmatrix(np.empty((k,N)))
  23. local_best = []
  24. bestnode = []
  25. while i<=numlocal:
  26. #Step 2 - pick k random medoids from data points - medoids_nr from points
  27. node = np.random.permutation(range(N))[:k]
  28. fill_distances(d_mat, points, node)
  29. cls = assign_to_closest(points, node, d_mat)
  30. cost = total_dist(d_mat, cls)
  31. copy_node = node.copy()
  32. print 'new start \n'
  33. #increase neighbor count
  34. j = 1
  35. while j<=maxneighbor:
  36. #Step 4 - pick a random neighbor of current node - i.e change randomly one medoid
  37. #calculate the cost differential of the initial node and the random neighbor
  38. changing_node = copy_node.copy()
  39. idx = pick_random_neighbor(copy_node, N)
  40. update_distances(d_mat, points, copy_node, idx)
  41. cls = assign_to_closest(points, copy_node, d_mat)
  42. new_cost = total_dist(d_mat, cls)
  43. #check if new cost is smaller
  44. if new_cost < cost:
  45. cost = new_cost
  46. local_best = copy_node.copy()
  47. print 'Best cost: ' + str(cost) + ' '
  48. print local_best
  49. print '\n'
  50. j = 1
  51. continue
  52. else:
  53. #copy_node = changing_node
  54. j=j+1
  55. if j<=maxneighbor:
  56. continue
  57. elif j>maxneighbor:
  58. if mincost>cost:
  59. mincost = cost
  60. print "change bestnode "
  61. print bestnode
  62. print " into"
  63. bestnode = local_best.copy()
  64. print bestnode
  65. print '\n'
  66. i = i+1
  67. if i>numlocal:
  68. fill_distances(d_mat, points, bestnode)
  69. cls = assign_to_closest(points, bestnode, d_mat)
  70. print "Final cost: " + str(mincost) + ' '
  71. print bestnode
  72. print '\n'
  73. return cls, bestnode
  74. else:
  75. break
  76. def pick_random_neighbor(current_node, set_size):
  77. #pick a random item from the set and check that it is not selected
  78. node = random.randrange(0, set_size, 1)
  79. while node in current_node:
  80. node = random.randrange(0, set_size, 1)
  81. #replace a random node
  82. i = random.randrange(0, len(current_node))
  83. current_node[i]=node
  84. return i
  85. def dist_euc((x1, y1), (x2, y2)):
  86. return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
  87. def assign_to_closest(points, meds, d_mat):
  88. cluster =[]
  89. for i in xrange(len(points)):
  90. if i in meds:
  91. cluster.append(np.where(meds==i))
  92. continue
  93. d = sys.maxint
  94. idx=i
  95. for j in xrange(len(meds)):
  96. d_tmp = d_mat[j,i]
  97. if d_tmp < d:
  98. d = d_tmp
  99. idx=j
  100. cluster.append(idx)
  101. return cluster
  102. def fill_distances(d_mat, points, current_node):
  103. for i in range(len(points)):
  104. for k in range(len(current_node)):
  105. d_mat[k,i]=dist_euc(points[current_node[k]], points[i])
  106. def total_dist(d_mat, cls):
  107. tot_dist = 0
  108. for i in xrange(len(cls)):
  109. tot_dist += d_mat[cls[i],i]
  110. return tot_dist
  111. def update_distances(d_mat, points, node, idx):
  112. for j in range(len(points)):
  113. d_mat[idx,j]=dist_euc(points[node[idx]], points[j])