PageRenderTime 587ms CodeModel.GetById 483ms app.highlight 19ms RepoModel.GetById 82ms app.codeStats 0ms

/articles/machine-learning-sample-color-quantization-using-k-means-clustering.md

https://github.com/bgoldy/azure-content
Markdown | 154 lines | 117 code | 37 blank | 0 comment | 0 complexity | 484c2023841a7de41f58b4ca00f3c714 MD5 | raw file
  1<properties title="Azure Machine Learning Sample: Color quantization using K-Means clustering" pageTitle="Machine Learning Sample: Color quantization using K-Means clustering | Azure" description="A sample Azure Machine Learning experiment that evaluates using different K-Means clustering values for quantizing a color image." metaKeywords="" services="" solutions="" documentationCenter="" authors="garye" videoId="" scriptId="" />
  2
  3<tags ms.service="machine-learning" ms.workload="tbd" ms.tgt_pltfrm="na" ms.devlang="na" ms.topic="article" ms.date="01/01/1900" ms.author="garye" />
  4
  5#Azure Machine Learning Sample: Color quantization using K-Means clustering
  6
  7##Problem description
  8
  9[Color quantization](http://en.wikipedia.org/wiki/Color_quantization "Color quantization") is the process of reducing the number of distinct colors in an image hence, compressing it. Normally, the intent is to preserve the color appearance of the image as much as possible, while reducing the number of colors, whether for memory limitations or compression. 
 10
 11##Data
 12
 13In this sample experiment, we are assuming any given 24-bit RGB image has 256 x 256 x 256 possible colors. And sure, we can build standard color histograms based on these intensity values. But another approach is to explicitly quantize the image and *reduce* the number of colors to say, 16 or 64. This creates a substantially smaller space and (ideally) less noise and variance. For this, we passed the pixel data (each pixel as a dataset row) to our K-Means clustering Module. 
 14
 15##Model
 16
 17The model is created as shown in the image below:
 18
 19![Model][image1]
 20
 21We ran K-Means clustering with K=10 through 500 in 5 different branches. First we calculated the clusters and then aggregated the clustering to the mean of their pixels colors (using an R Script). 
 22Finally, we associated each pixel with the aggregated cluster color and sent the new image out in CSV format. Meanwhile, we also calculated the Root Mean Squared Difference of the new pixel colors with the original image and shown them in a R plot (using Execute R Script). 
 23
 24##Results
 25
 26We tested the outcome on different number of clusters (colors) as shown on the experiment model below. As it's visible the more clustering create higher quality images with less compression:
 27
 28<table>
 29<tr><th>Original</th>
 30<td><img alt="Original" src="./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2a.jpg"></td>
 31</tr>
 32<tr><th>K=10</th>
 33<td><img alt="K=10" src="./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2b.png"></td>
 34</tr>
 35<tr><th>K=20</th>
 36<td><img alt="K=20" src="./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2c.png"></td>
 37</tr>
 38<tr><th>K=50</th>
 39<td><img alt="K=50" src="./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2d.png"></td>
 40</tr>
 41<tr><th>K=100</th>
 42<td><img alt="K=100" src="./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2e.png"></td>
 43</tr>
 44<tr><th>K=500</th>
 45<td><img alt="K=500" src="./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2f.png"></td>
 46</tr>
 47</table>
 48
 49We have also measured the accuracy using Root Mean Squared Difference to the Original Image Colors which can be seen from the second output port of the "Execute R Script" Module:
 50
 51![Output of Execute R Script module][image3]
 52
 53As it's visible, the more color clusters, the more colors match the original images (better quality). 
 54
 55##Code to convert images to CSV and reverse
 56
 57In order to feed the images into ML Studio, we wrote a simple convertor code which can convert image files to a csv format that ML Studio can use, and also one which converts them back to an image. Please feel free to use the following code. In the future we are planning to add a module for reading in images as well. 
 58
 59	using System;
 60	using System.Collections.Generic;
 61	using System.Linq;
 62	using System.Text;
 63	using System.Threading.Tasks;
 64	using System.Drawing;
 65	using System.Drawing.Imaging;
 66	using System.IO;
 67	 
 68	namespace Text2Image
 69	{
 70	    class Program
 71	    {
 72	        static void img2csv(string img_path)
 73	        {
 74	            FileInfo img_info = new FileInfo(img_path);
 75	            string destination_file_directory = img_info.DirectoryName + "\\";
 76	            string destination_file_name = img_info.Name.Remove(img_info.Name.LastIndexOf('.'), 4);
 77	            string destination_file_path = destination_file_directory + destination_file_name + ".csv";
 78	 
 79	            // Read the image
 80	            Bitmap img = new Bitmap(img_path);
 81	 
 82	            // Create the CSV File and write the header values
 83	            System.IO.StreamWriter file = new System.IO.StreamWriter(destination_file_path);
 84	            file.WriteLine("X,Y,R,G,B");
 85	 
 86	            // Write the Pixel values
 87	            for (int x = 0; x < img.Width; x++)
 88	                for (int y = 0; y < img.Height; y++)
 89	                {
 90	                    string line = x + "," + y + "," + img.GetPixel(x, y).R + "," + img.GetPixel(x, y).G + "," + img.GetPixel(x, y).B ;
 91	                    file.WriteLine(line);
 92	                }
 93	 
 94	            file.Close();
 95	        }
 96	 
 97	        static void csv2img(string csv_path)
 98	        {
 99	            FileInfo csv_info = new FileInfo(csv_path);
100	            string destination_file_directory = csv_info.DirectoryName + "\\";
101	            string destination_file_name = csv_info.Name.Remove(csv_info.Name.LastIndexOf('.'), 4);
102	            string destination_file_path = destination_file_directory + destination_file_name + ".png";
103	            
104	            // Read all the lines in the CSV file
105	            string[] lines = System.IO.File.ReadAllLines(csv_path);
106	 
107	            // set a new bitmap image with the provided width and height in the header
108	            string[] wh = lines.Last().Split(new Char[] { ' ', ',', '.', ':', '\t', '{', '}' });
109	            int img_width = Convert.ToInt32(wh[0])+1;
110	            int img_height = Convert.ToInt32(wh[1])+1;
111	 
112	            Bitmap bmp_img = new Bitmap(img_width, img_height);
113	 
114	            for (int i = 1; i < lines.Length ;i++ )
115	            {
116	                string[] values = lines[i].Split(new Char[] { ' ', ',', '.', ':', '\t', '{', '}' });
117	                if (values.Length < 3)
118	                    continue;
119	 
120	                int x = Convert.ToInt16(values[0]);
121	                int y = Convert.ToInt32(values[1]);
122	                int r = Convert.ToInt32(values[2]);
123	                int g = Convert.ToInt32(values[3]);
124	                int b = Convert.ToInt32(values[4]);
125	 
126	                bmp_img.SetPixel(x, y, Color.FromArgb(r, g, b));
127	            }
128	 
129	            bmp_img.Save(destination_file_path);
130	        }
131	 
132	        static void Main(string[] args)
133	        {
134	            string source_path = args[1];
135	            FileInfo source_info = new FileInfo(source_path);
136	 
137	            if (source_info.Extension == ".csv")
138	                csv2img(source_path);
139	            else
140	                img2csv(source_path);
141	        }
142	    }
143	}
144
145
146
147[image1]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image1.png
148[image2a]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2a.jpg
149[image2b]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2b.png
150[image2c]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2c.png
151[image2d]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2d.png
152[image2e]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2e.png
153[image2f]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image2f.png
154[image3]:./media/machine-learning-sample-color-quantization-using-k-means-clustering/image3.png