K-Mean

K-Means Algorithm is very simplest unsupervised learning algorithm that is used to solve clustering problem in data Mining. lets first understand how K-Mean algorithm works with example , lets say we have random data like this ,
random Cluster data
it appears to be two clusters in this data and we can easily group them using K-Mean algorithm . K-Mean algorithm create clusters and add each data items into these clusters based on minimum value of  difference between cluster centroids and data item , in this example k-mean algorithm will create two clusters and add each data into its own cluster based on difference  centroid 1 and 2 with data item,
K-Mean Applied data
here is the implementation of this algorithm in java.

C++ Code

#include <iostream>
#include <stdlib.h>
#include<conio.h>
#include<math.h>
#include<vector>
using namespace std;

int min(int arr[], int maxIndex)
{
	int min=100000000000;
	for(int i=0;i<maxIndex;i++)
	{
		if(arr[i]<min)
		min=arr[i];
	}
	return min;
}
int indexOf(int number,int arr[], int maxIndex)
{
	int index;
	for(int i=0;i<maxIndex;i++)
	{
		if(number==arr[i])
		{
		index=i;
		break;	
		}
	}
	return index;
}
int mean(vector<int> vc )
{
int sum=0;
for(int i=0;i<vc.size();i++)
sum=sum+vc[i];
return sum/vc.size();
}
void show(vector<int> vc )
{
for(int i=0;i<vc.size();i++){
cout<<vc[i]<<",";	
}
}
bool isEqual(int arr1[], int arr2[], int maxIndex){
	for(int i=0;i<maxIndex;i++)
	{
		if(arr1[i]!=arr2[i])
		return false;
	}
	return true;
}
int main()
{
	
	int noOfItems;
	int k;	
	cout<<"Total numbers you want enter?"<<endl;
	cin>> noOfItems;
	cout<<"Enter value of K:"<<endl;
	cin>> k;
	int cluster[k];
	int oldCluster[k];
	int objects[noOfItems];
	int row[k];
	vector< vector<int> > groups; 
	
	for(int i=0;i<noOfItems;i++) 
	{
		cout<<"Enter Value "<<(i+1)<<endl;
		cin>>objects[i];
		if(i<k) 
		cluster[i]=objects[i];
	}
	for(int i=0;i<k;i++)
	{
	vector<int> newGroup;
	groups.push_back(newGroup);
	}
	int iter =1;
	do
	{
	for(int i=0;i<noOfItems;i++)
	{
		for(int j=0;j<k;j++){
		row[j] = abs(cluster[j]-objects[i]); 
		}		
	    groups[indexOf(min(row,k),row,k)].push_back(objects[i]); 
	}
	
	for(int j=0;j<k;j++)
	{
    	if(!groups[j].empty())
		{
		oldCluster[j]=cluster[j]; 
		cluster[j] = mean(groups[j]); 	
		}
	}
	if(!isEqual(oldCluster,cluster,k))
	{
		for(int i=0;i<k;i++)
		groups[i].clear();
	}
	iter++;	
	}while(!isEqual(oldCluster,cluster,k)); 
	cout<<"nn"; 
	for(int i=0;i<k;i++) 
	{
		cout<<"C"<<(i+1)<<" : "<<cluster[i]<<endl;
	}
	for(int i=0;i<k;i++)
	{
		cout<<"nnGroup "<<(i+1)<<" : n"<<endl;
		show(groups[i]);
	}
	cout<<"nnNumber of Iterations "<<iter<<endl;
	getch();
	return 0;
}

Java Code

import static java.lang.Math.abs;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Scanner;

public class KMean {

    int k;
    int noOfItems;
    ArrayList<Integer> dataItems;
    ArrayList<Integer> cz;
    ArrayList<Integer> oldCz;
    ArrayList<Integer> row;
    ArrayList<ArrayList<Integer>> groups;
    Scanner input;

    public KMean(int k, int noOfItems) {
        this.k = k;
        this.noOfItems = noOfItems;
        dataItems = new ArrayList<>();
        cz = new ArrayList<>();
        oldCz = new ArrayList<>();
        row = new ArrayList<>();
        groups = new ArrayList<>();
        input = new Scanner(System.in);

        for (int i = 0; i < k; i++) {
            groups.add(new ArrayList<>());
        }

        for (int i = 0; i < noOfItems; i++) {
            System.out.println("Enter Value for: " + (i + 1) + " item");
            dataItems.add(input.nextInt());
            if (i < k) {
                cz.add(dataItems.get(i));
                System.out.println("C" + (i + 1) + " is " + cz.get(i));
            }
        }
        int iter = 1;
        do {
            for (int aItem : dataItems) {
                for (int c : cz) {
                    row.add(abs(c - aItem));
                }
                groups.get(row.indexOf(Collections.min(row))).add(aItem);
                row.removeAll(row);
            }
            for (int i = 0; i < k; i++) {
                if (iter == 1) {
                    oldCz.add(cz.get(i));
                } else {
                    oldCz.set(i, cz.get(i));
                }
                if (!groups.get(i).isEmpty()) {
                    cz.set(i, average(groups.get(i)));
                }
            }
            if (!cz.equals(oldCz)) {
                for (int i = 0; i < groups.size(); i++) {
                    groups.get(i).removeAll(groups.get(i));
                }
            }
            iter++;
        } while (!cz.equals(oldCz));
        for (int i = 0; i < cz.size(); i++) {
            System.out.println("New C" + (i + 1) + " " + cz.get(i));
        }
        for (int i = 0; i < groups.size(); i++) {
            System.out.println("Group " + (i + 1));
            System.out.println(groups.get(i).toString());
        }
        System.out.println("Number of Itrations: " + iter);
    }

    public static void main(String[] args) {
        Scanner input = new Scanner(System.in);
        System.out.println("Enter Value of K");
        int k = input.nextInt();
        System.out.println("Enter No of Data Items");
        int noOfItems = input.nextInt();
        new KMean(k, noOfItems);
    }

    public static int average(ArrayList<Integer> list) {
        int sum = 0;
        for (Integer value : list) {
            sum = sum + value;
        }
        return sum / list.size();
    }
}
i hope this post helped you to understand k-mean algorithm :)