bokeh.models import ColumnDataSource, Button, Select, Div from bokeh.sampledata.

data mining


#!/usr/bin/env python
# coding: utf-8

# In[1]:

import numpy as np
from bokeh.models import ColumnDataSource, Button, Select, Div
from bokeh.sampledata.iris import flowers
from bokeh.plotting import figure, curdoc, show
from bokeh.layouts import column, row

# In[2]:

# read and store the dataset
data = flowers.copy(deep=True)
data = data.drop(['species'], axis=1)

# In[194]:

dist_matrix = np.empty((m, k))
for i in range(m):
    dist = np.linalg.norm(pca_data[i, :] - initial_medoids, ord=1, axis=1)
    dist_matrix[i, :] = dist
dist_another = np.repeat(np.sum(np.abs(pca_data - initial_medoids), axis=-1),3)
dist_matrix_another = dist_another.reshape((m,k))

# In[6]:

#k-medoid algorithm using given medoids

m = len(data)

#Dimension Reduction
from sklearn.decomposition import PCA
pca_components = PCA(n_components=3)
pca_data = pca_components.fit_transform(data)

#initialize the given medoids
medoids = [24, 74, 124]
initial_medoids = np.array([[24,74,124]])

Related Questions in data mining category