PageRenderTime 34ms CodeModel.GetById 20ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 0ms

/Spss/SpssConvert.cs

#
C# | 390 lines | 202 code | 43 blank | 145 comment | 29 complexity | 7e4b1e17900db3683568ac2dd0c5da98 MD5 | raw file
  1using System;
  2using System.Data;
  3using System.Data.SqlClient;
  4using System.IO;
  5using System.Threading;
  6using System.Diagnostics;
  7using System.CodeDom.Compiler;
  8using System.Linq;
  9using System.Xml;
 10using System.Collections;
 11using System.Collections.Generic;
 12using System.Globalization;
 13
 14namespace Spss
 15{
 16	/// <summary>
 17	/// Used to convert a DataTable into a persisted SPSS.SAV file. 
 18	/// </summary>
 19	public class SpssConvert
 20	{
 21		private EventHandler notifyDoneCallback;
 22		/// <summary>
 23		/// The callback method for when the process is complete.
 24		/// </summary>
 25		protected EventHandler NotifyDoneCallback { get { return notifyDoneCallback; } }
 26
 27		#region Construction
 28		/// <summary>
 29		/// Creates an instance of the <see cref="SpssConvert"/> class.
 30		/// </summary>
 31		/// <param name="notifyDoneCallback">
 32		/// The callback method for when the process is complete.
 33		/// </param>
 34		/// <remarks>
 35		/// Used internally for asynchronous operations.
 36		/// </remarks>
 37		internal SpssConvert(EventHandler notifyDoneCallback) 
 38		{
 39			this.notifyDoneCallback = notifyDoneCallback;
 40		}
 41		#endregion
 42
 43		private delegate void ToFileAsyncDelegate(DataTable dataTable, IEnumerable<DataRow> data,
 44			 string spssSavFilename, Action<SpssVariable> fillInMetaDataCallBack);
 45
 46		/// <summary>
 47		/// Call to convert data to SPSS format using a passed in SQL query to provide the data.
 48		/// </summary>
 49		/// <param name="dataTable">The DataTable to convert to SPSS format</param>
 50		/// <param name="spssSavFilename">The fully-qualified target .SAV file to save results to</param>
 51		/// <param name="fillInMetaDataCallBack">Callback function to provide per-variable metadata</param>
 52		public static void ToFile(DataTable dataTable,
 53			string spssSavFilename, Action<SpssVariable> fillInMetaDataCallBack)
 54		{
 55			ToFile(dataTable, dataTable.Rows.Cast<DataRow>(), spssSavFilename, fillInMetaDataCallBack);
 56		}
 57
 58		/// <summary>
 59		/// Call to convert data to SPSS format using a passed in SQL query to provide the data.
 60		/// </summary>
 61		/// <param name="dataTable">The DataTable to convert to SPSS format</param>
 62		/// <param name="data">An enumerable list of DataRows.</param>
 63		/// <param name="spssSavFilename">The fully-qualified target .SAV file to save results to</param>
 64		/// <param name="fillInMetaDataCallBack">Callback function to provide per-variable metadata</param>
 65		public static void ToFile(DataTable dataTable, IEnumerable<DataRow> data,
 66			string spssSavFilename, Action<SpssVariable> fillInMetaDataCallBack)
 67		{
 68			// Remove the file if it already exists.
 69			if( File.Exists( spssSavFilename ) ) File.Delete( spssSavFilename );
 70			// Open up the document with "using" so that it will definitely close afterward.
 71			using( SpssDataDocument Sav = SpssDataDocument.Create( spssSavFilename ) )
 72			{
 73				// Create the schema from the table, passing in a callback
 74				// function for filling in each variable's metadata
 75				Sav.Variables.ImportSchema( dataTable, fillInMetaDataCallBack );
 76				// Import data
 77				Sav.CommitDictionary();
 78				Sav.ImportData( dataTable, data );
 79			} 
 80		}
 81
 82		/// <summary>
 83		/// Call to asynchronously convert data to SPSS format using a passed in SQL query 
 84		/// to provide the data.
 85		/// </summary>
 86		/// <param name="dataTable">
 87		/// The DataTable to convert to SPSS format
 88		/// </param>
 89		/// <param name="data">An enumerable list of DataRows.</param>
 90		/// <param name="spssSavFilename">
 91		/// The fully-qualified target .SAV file to save results to
 92		/// </param>
 93		/// <param name="fillInMetaDataCallback">
 94		/// Callback function to provide per-variable metadata
 95		/// </param>
 96		/// <param name="notifyDoneCallback">
 97		/// The method to call when the process is complete.
 98		/// </param>
 99		///	<returns>
100		///	Returns a handle to poll the status of the conversion.
101		///	</returns>
102		public static IAsyncResult ToFileAsync(DataTable dataTable, IEnumerable<DataRow> data, string spssSavFilename,
103			Action<SpssVariable> fillInMetaDataCallback, EventHandler notifyDoneCallback)
104		{
105			// Spin off an asynchronous thread to do the work 
106			// Be sure to use a callback function, even if we don't care when this 
107			// conversion is done, since we must call EndInvoke.
108			ToFileAsyncDelegate dlgt = new ToFileAsyncDelegate( ToFile );
109
110			// Instantiate an instance of this class, to save the vbNotifyDone parameter
111			// so that we know who to tell when this operation is complete.
112			SpssConvert instance = new SpssConvert( notifyDoneCallback );
113			return dlgt.BeginInvoke( dataTable, data, spssSavFilename, fillInMetaDataCallback, 
114				new AsyncCallback( instance.ToFileAsyncCB ), dlgt );
115		}
116
117		private void ToFileAsyncCB(IAsyncResult ar)
118		{
119			ToFileAsyncDelegate dlgt = (ToFileAsyncDelegate) ar.AsyncState;
120
121			// Call EndInvoke, since the docs say we MUST
122			dlgt.EndInvoke(ar);
123
124			// Inform caller that the asynchronous operation is now complete
125			if( NotifyDoneCallback != null ) NotifyDoneCallback(this, null);
126		}
127
128		/// <summary>
129		/// Converts a <see cref="DataTable"/> to an SPSS .SAV file.
130		/// </summary>
131		/// <param name="dataTable">
132		/// The <see cref="DataTable"/> with the schema and data to fill into the SPSS .SAV file.
133		/// </param>
134		/// <param name="data">An enumerable list of DataRows.</param>
135		/// <param name="fillInMetaDataCallBack">
136		/// The callback method that will provide additional metadata on each column.
137		/// </param>
138		/// <returns>
139		/// A <see cref="MemoryStream"/> containing the contents of the SPSS .SAV data file.
140		/// </returns>
141		/// <remarks>
142		/// A temporary file is created during this process, but is guaranteed to be removed
143		/// as the method returns.
144		/// </remarks>
145		public static MemoryStream ToStream(DataTable dataTable, IEnumerable<DataRow> data, Action<SpssVariable> fillInMetaDataCallBack)
146		{
147			// Create a temporary file for the SPSS data that we will generate.
148			using( TempFileCollection tfc = new TempFileCollection() )
149			{
150				string filename = tfc.AddExtension("sav", false);
151				ToFile(dataTable, data, filename, fillInMetaDataCallBack);
152
153				// Now read the file into memory
154				using( FileStream fs = File.OpenRead(filename) )
155				{
156					MemoryStream ms = new MemoryStream((int)fs.Length);
157					int b = 0;
158					while( (b = fs.ReadByte()) >= 0 )
159						ms.WriteByte((byte)b);
160
161					// reset to start of stream.
162					ms.Position = 0;
163
164					// return the memory stream.  All temporary files will delete as we exit.
165					return ms;
166				}
167			}
168		}
169		
170		/// <summary>
171		/// Converts the metadata in an SPSS .SAV data file into a DDI codebook.
172		/// </summary>
173		/// <param name="spssSav">
174		/// The stream containing the SPSS .SAV data file.
175		/// </param>
176		/// <returns>
177		/// The <see cref="XmlDocument"/> containing all the metadata.
178		/// </returns>
179		public static XmlDocument ToDdi(Stream spssSav)
180		{
181			// To read an SPSS file, spssio32.dll requires that the file is actually on disk, 
182			// so persist this stream to a temporary file on disk.
183			using( TempFileCollection tfc = new TempFileCollection() )
184			{
185				string filename = tfc.AddExtension("sav", false);
186				using( FileStream fs = new FileStream(filename, FileMode.CreateNew) )
187				{
188					int b;
189					while( (b = spssSav.ReadByte()) >= 0 )
190						fs.WriteByte((byte)b);
191				}
192				return ToDdi(filename);
193				// leaving this block will remove the temporary file automatically
194			}
195		}
196
197		/// <summary>
198		/// Converts the metadata in an SPSS .SAV data file into a DDI codebook.
199		/// </summary>
200		/// <param name="spssSavFilename">
201		/// The filename of the SPSS .SAV data file.
202		/// </param>
203		/// <returns>
204		/// The <see cref="XmlDocument"/> containing all the metadata.
205		/// </returns>
206		public static XmlDocument ToDdi(string spssSavFilename)
207		{
208						
209			const string ddiNamespace = "http://www.icpsr.umich.edu/DDI";
210			if( spssSavFilename == null ) throw new ArgumentNullException( "spssSavFilename" );
211			XmlDocument ddi = new XmlDocument();
212			// Build initial ddi document up.
213			// Open up SPSS file and fill in the ddi var tags.
214			using (SpssDataDocument doc = SpssDataDocument.Open(spssSavFilename, SpssFileAccess.Read)) {
215				ddi.PreserveWhitespace = true;
216				//Read from the embedded xml file: blankDdi.xml into the ddi document
217				ddi.LoadXml(EmbeddedResources.LoadFileFromAssemblyWithNamespace("/blankDdi.xml", Project.DefaultNamespace));
218				//This is where the hard coding ends and methods are called to extract data from the sav file
219
220				XmlElement xmlRoot = ddi.DocumentElement;
221				XmlNamespaceManager xmlNS = new XmlNamespaceManager(ddi.NameTable);
222				xmlNS.AddNamespace("ddi", ddiNamespace);
223				XmlNode nData = xmlRoot.SelectSingleNode(@"ddi:dataDscr", xmlNS);
224
225				foreach (SpssVariable var in doc.Variables) {
226					string nameOfVar = var.Name;
227
228					//variable name and its ID and then if its a numeric : its interval
229					XmlElement variable = ddi.CreateElement("ddi:var", ddiNamespace);
230					variable.SetAttribute("ID", string.Empty, nameOfVar);
231					variable.SetAttribute("name", string.Empty, nameOfVar);
232
233					//This is the variable that holds the characteristic whether the variable has discrete or continuous interval
234					int Dec;
235					if (var is SpssNumericVariable) {
236						Dec = ((SpssNumericVariable)var).PrintDecimal;
237						string interval = string.Empty;
238						if (Dec == 0) {
239							interval = "discrete";
240						} else {
241							interval = "contin";
242						}
243						variable.SetAttribute("intrvl", string.Empty, interval);
244					}
245
246					//for the location width part
247					XmlElement location = ddi.CreateElement("ddi:location", ddiNamespace);
248					int Wid = var.ColumnWidth;
249					location.SetAttribute("width", Wid.ToString());
250					variable.AppendChild(location);
251
252					//label of the variable is set in "varlabel" and extracted using var.Label 
253					XmlElement varLabel = ddi.CreateElement("ddi:labl", ddiNamespace);
254					varLabel.InnerText = var.Label;
255					variable.AppendChild(varLabel);
256
257					foreach (var response in var.GetValueLabels()) {
258						XmlElement answer = ddi.CreateElement("ddi:catgry", ddiNamespace);
259
260						//catValue(category Value) is the element storing the text i.e. option number
261						XmlElement catValue = ddi.CreateElement("ddi:catValu", ddiNamespace);
262						catValue.InnerText = response.Key;
263						answer.AppendChild(catValue);
264
265						//catLabel(category Label) is the element storing the text i.e. name of answer
266						XmlElement catLabel = ddi.CreateElement("ddi:labl", ddiNamespace);
267						catLabel.InnerText = response.Value;
268						answer.AppendChild(catLabel);
269
270						//appending the answer option to the parent "variable" node i.e. the question node
271						variable.AppendChild(answer);
272					}
273
274					// end of extracting the response values for each variable 
275
276					XmlElement varFormat = ddi.CreateElement("ddi:varFormat", ddiNamespace);
277
278					if (var is SpssNumericVariable) {
279						varFormat.SetAttribute("type", "numeric");
280					} else if (var is SpssStringVariable) {
281						varFormat.SetAttribute("type", "character");
282					} else {
283						throw new NotSupportedException("Variable " + nameOfVar + " is not a string or a numeric variable type.");
284					}
285					variable.AppendChild(varFormat);
286
287					nData.AppendChild(variable);
288
289				}
290				
291				//end of extraction of each variable and now we have put all the variable data into ndata
292				// Return the completed ddi file.
293				return ddi;
294			}
295		}
296
297		/// <summary>
298		/// Converts the metadata in an SPSS .SAV data file into a DataTable.
299		/// </summary>
300		/// <param name="spssSav">
301		/// The stream containing the SPSS .SAV data file.
302		/// </param>
303		/// <returns>
304		/// The <see cref="DataTable"/> containing all the metadata.
305		/// </returns>
306		public static DataTable ToDataTable(Stream spssSav)
307		{
308			if (spssSav == null) throw new ArgumentNullException("spssSav");
309			// To read an SPSS file, spssio32.dll requires that the file is actually on disk, 
310			// so persist this stream to a temporary file on disk.
311			using (TempFileCollection tfc = new TempFileCollection())
312			{
313				string filename = tfc.AddExtension("sav", false);
314				using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
315				{
316					int b;
317					while ((b = spssSav.ReadByte()) >= 0) {
318						fs.WriteByte((byte)b);
319					}
320				}
321				return ToDataTable(filename);
322
323				// leaving this block will remove the temporary file automatically
324			}
325		}
326
327		/// <summary>
328		/// Converts the metadata in an SPSS .SAV data file into a DataTable.
329		/// </summary>
330		/// <param name="spssSavFilename">
331		/// The filename of the SPSS .SAV data file.
332		/// </param>
333		/// <returns>
334		/// The <see cref="DataTable"/> containing all the metadata.
335		/// </returns>
336		public static DataTable ToDataTable(string spssSavFilename)
337		{
338			if (spssSavFilename == null) throw new ArgumentNullException("spssSavFilename");
339			DataTable dataTable = new DataTable();
340			using (SpssDataDocument doc = SpssDataDocument.Open(spssSavFilename, SpssFileAccess.Read)) {
341				ToDataTable(doc, dataTable);
342			}
343
344			// Return the completed DataTable.
345			return dataTable;
346		}
347
348		public static void ToDataTable(SpssDataDocument doc, DataTable dataTable) {
349			if (doc == null) {
350				throw new ArgumentNullException("doc");
351			}
352			if (dataTable == null) {
353				throw new ArgumentNullException("dataTable");
354			}
355
356			// Build initial DataTable up.
357			// Fill in the metadata.
358			//set up the columns with the metadata
359			foreach (SpssVariable var in doc.Variables) {
360				string nameOfVar = var.Name;
361
362				//add a column of the variable name to the DataTable
363				DataColumn dataColumn = dataTable.Columns.Add(nameOfVar);
364
365				//label of the variable is set in "varlabel" and extracted using var.Label 
366				dataColumn.Caption = var.Label;
367
368				//set the type of the column
369				if (var is SpssNumericVariable) {
370					dataColumn.DataType = typeof(double);
371				} else if (var is SpssStringVariable) {
372					dataColumn.DataType = typeof(string);
373				} else if (var is SpssDateVariable) {
374					dataColumn.DataType = typeof(DateTime);
375				} else {
376					throw new NotSupportedException("Variable " + nameOfVar + " is not a string or a numeric variable type.");
377				}
378			}//end of extraction of metadata
379
380			//add data into the DataTable
381			var values = new object[doc.Variables.Count];
382			foreach (SpssCase rowCase in doc.Cases) {
383				for (int i = 0; i < doc.Variables.Count; i++) {
384					values[i] = rowCase[i];
385				}
386				dataTable.Rows.Add(values);
387			}
388		}
389	}
390}