PageRenderTime 367ms CodeModel.GetById 201ms app.highlight 17ms RepoModel.GetById 147ms app.codeStats 0ms

/articles/machine-learning-sample-color-quantization-using-k-means-clustering.md

https://github.com/jeffwilcox/azure-content
Markdown | 148 lines | 107 code | 41 blank | 0 comment | 0 complexity | d73cfe633ca71ff1c1e7c4d1fe211e48 MD5 | raw file
  1<properties title="Azure Machine Learning Sample: Color quantization using K-Means clustering" pageTitle="Machine Learning Sample: Color quantization using K-Means clustering | Azure" description="A sample Azure Machine Learning experiment that evaluates using different K-Means clustering values for quantizing a color image." metaKeywords="" services="machine-learning" solutions="" documentationCenter="" authors="garye" manager="paulettm" editor="cgronlun"  videoId="" scriptId="" />
  2
  3<tags ms.service="machine-learning" ms.workload="data-services" ms.tgt_pltfrm="na" ms.devlang="na" ms.topic="article" ms.date="09/19/2014" ms.author="garye" />
  4
  5
  6# Azure Machine Learning Sample: Color quantization using K-Means clustering
  7
  8*You can find the sample experiment associated with this model in ML Studio in the **EXPERIMENTS** section under the **SAMPLES** tab. The experiment name is:*
  9
 10	Sample Experiment - Color Based Image Compression using K-Means Clustering - Development
 11
 12##Problem description
 13
 14[Color quantization](http://en.wikipedia.org/wiki/Color_quantization "Color quantization") is the process of reducing the number of distinct colors in an image hence, compressing it. Normally, the intent is to preserve the color appearance of the image as much as possible, while reducing the number of colors, whether for memory limitations or compression. 
 15
 16##Data
 17
 18In this sample experiment, we are assuming any given 24-bit RGB image has 256 x 256 x 256 possible colors. And sure, we can build standard color histograms based on these intensity values. But another approach is to explicitly quantize the image and *reduce* the number of colors to say, 16 or 64. This creates a substantially smaller space and (ideally) less noise and variance. For this, we passed the pixel data (each pixel as a dataset row) to our K-Means clustering Module. 
 19
 20##Model
 21
 22The model is created as shown in the image below:
 23
 24![Model][image1]
 25
 26We ran K-Means clustering with K=10 through 500 in 5 different branches. First we calculated the clusters and then aggregated the clustering to the mean of their pixels colors (using an R Script). 
 27Finally, we associated each pixel with the aggregated cluster color and sent the new image out in CSV format. Meanwhile, we also calculated the Root Mean Squared Difference of the new pixel colors with the original image and shown them in a R plot (using Execute R Script). 
 28
 29##Results
 30
 31We tested the outcome on different number of clusters (colors) as shown on the experiment model below. As you can see below, more clustering creates higher quality images with less compression:
 32
 33||
 34------------ | ---------
 35**Original** | ![Original][image2a]
 36**K=10**     | ![K=10][image2b]
 37**K=20**     | ![K=20][image2c]
 38**K=50**     | ![K=50][image2d]
 39**K=100**    | ![K=100][image2e]
 40**K=500**    | ![K=500][image2f]
 41
 42
 43We have also measured the accuracy using Root Mean Squared Difference to the Original Image Colors which can be seen from the second output port of the "Execute R Script" Module:
 44
 45![Output of Execute R Script module][image3]
 46
 47As it's visible, the more color clusters, the more colors match the original images (better quality). 
 48
 49##Code to convert images to CSV and reverse
 50
 51In order to feed the images into ML Studio, we wrote a simple convertor code which can convert image files to a csv format that ML Studio can use, and also one which converts them back to an image. Please feel free to use the following code. In the future we are planning to add a module for reading in images as well. 
 52
 53	using System;
 54	using System.Collections.Generic;
 55	using System.Linq;
 56	using System.Text;
 57	using System.Threading.Tasks;
 58	using System.Drawing;
 59	using System.Drawing.Imaging;
 60	using System.IO;
 61	 
 62	namespace Text2Image
 63	{
 64	    class Program
 65	    {
 66	        static void img2csv(string img_path)
 67	        {
 68	            FileInfo img_info = new FileInfo(img_path);
 69	            string destination_file_directory = img_info.DirectoryName + "\\";
 70	            string destination_file_name = img_info.Name.Remove(img_info.Name.LastIndexOf('.'), 4);
 71	            string destination_file_path = destination_file_directory + destination_file_name + ".csv";
 72	 
 73	            // Read the image
 74	            Bitmap img = new Bitmap(img_path);
 75	 
 76	            // Create the CSV File and write the header values
 77	            System.IO.StreamWriter file = new System.IO.StreamWriter(destination_file_path);
 78	            file.WriteLine("X,Y,R,G,B");
 79	 
 80	            // Write the Pixel values
 81	            for (int x = 0; x < img.Width; x++)
 82	                for (int y = 0; y < img.Height; y++)
 83	                {
 84	                    string line = x + "," + y + "," + img.GetPixel(x, y).R + "," + img.GetPixel(x, y).G + "," + img.GetPixel(x, y).B ;
 85	                    file.WriteLine(line);
 86	                }
 87	 
 88	            file.Close();
 89	        }
 90	 
 91	        static void csv2img(string csv_path)
 92	        {
 93	            FileInfo csv_info = new FileInfo(csv_path);
 94	            string destination_file_directory = csv_info.DirectoryName + "\\";
 95	            string destination_file_name = csv_info.Name.Remove(csv_info.Name.LastIndexOf('.'), 4);
 96	            string destination_file_path = destination_file_directory + destination_file_name + ".png";
 97	            
 98	            // Read all the lines in the CSV file
 99	            string[] lines = System.IO.File.ReadAllLines(csv_path);
100	 
101	            // set a new bitmap image with the provided width and height in the header
102	            string[] wh = lines.Last().Split(new Char[] { ' ', ',', '.', ':', '\t', '{', '}' });
103	            int img_width = Convert.ToInt32(wh[0])+1;
104	            int img_height = Convert.ToInt32(wh[1])+1;
105	 
106	            Bitmap bmp_img = new Bitmap(img_width, img_height);
107	 
108	            for (int i = 1; i < lines.Length ;i++ )
109	            {
110	                string[] values = lines[i].Split(new Char[] { ' ', ',', '.', ':', '\t', '{', '}' });
111	                if (values.Length < 3)
112	                    continue;
113	 
114	                int x = Convert.ToInt16(values[0]);
115	                int y = Convert.ToInt32(values[1]);
116	                int r = Convert.ToInt32(values[2]);
117	                int g = Convert.ToInt32(values[3]);
118	                int b = Convert.ToInt32(values[4]);
119	 
120	                bmp_img.SetPixel(x, y, Color.FromArgb(r, g, b));
121	            }
122	 
123	            bmp_img.Save(destination_file_path);
124	        }
125	 
126	        static void Main(string[] args)
127	        {
128	            string source_path = args[1];
129	            FileInfo source_info = new FileInfo(source_path);
130	 
131	            if (source_info.Extension == ".csv")
132	                csv2img(source_path);
133	            else
134	                img2csv(source_path);
135	        }
136	    }
137	}
138
139
140
141[image1]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image1.png
142[image2a]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2a.jpg
143[image2b]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2b.png
144[image2c]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2c.png
145[image2d]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2d.png
146[image2e]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2e.png
147[image2f]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2f.png
148[image3]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image3.png