PageRenderTime 243ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/Spss/SpssConvert.cs

#
C# | 390 lines | 202 code | 43 blank | 145 comment | 29 complexity | 7e4b1e17900db3683568ac2dd0c5da98 MD5 | raw file
Possible License(s): LGPL-2.1
  1. using System;
  2. using System.Data;
  3. using System.Data.SqlClient;
  4. using System.IO;
  5. using System.Threading;
  6. using System.Diagnostics;
  7. using System.CodeDom.Compiler;
  8. using System.Linq;
  9. using System.Xml;
  10. using System.Collections;
  11. using System.Collections.Generic;
  12. using System.Globalization;
  13. namespace Spss
  14. {
  15. /// <summary>
  16. /// Used to convert a DataTable into a persisted SPSS.SAV file.
  17. /// </summary>
  18. public class SpssConvert
  19. {
  20. private EventHandler notifyDoneCallback;
  21. /// <summary>
  22. /// The callback method for when the process is complete.
  23. /// </summary>
  24. protected EventHandler NotifyDoneCallback { get { return notifyDoneCallback; } }
  25. #region Construction
  26. /// <summary>
  27. /// Creates an instance of the <see cref="SpssConvert"/> class.
  28. /// </summary>
  29. /// <param name="notifyDoneCallback">
  30. /// The callback method for when the process is complete.
  31. /// </param>
  32. /// <remarks>
  33. /// Used internally for asynchronous operations.
  34. /// </remarks>
  35. internal SpssConvert(EventHandler notifyDoneCallback)
  36. {
  37. this.notifyDoneCallback = notifyDoneCallback;
  38. }
  39. #endregion
  40. private delegate void ToFileAsyncDelegate(DataTable dataTable, IEnumerable<DataRow> data,
  41. string spssSavFilename, Action<SpssVariable> fillInMetaDataCallBack);
  42. /// <summary>
  43. /// Call to convert data to SPSS format using a passed in SQL query to provide the data.
  44. /// </summary>
  45. /// <param name="dataTable">The DataTable to convert to SPSS format</param>
  46. /// <param name="spssSavFilename">The fully-qualified target .SAV file to save results to</param>
  47. /// <param name="fillInMetaDataCallBack">Callback function to provide per-variable metadata</param>
  48. public static void ToFile(DataTable dataTable,
  49. string spssSavFilename, Action<SpssVariable> fillInMetaDataCallBack)
  50. {
  51. ToFile(dataTable, dataTable.Rows.Cast<DataRow>(), spssSavFilename, fillInMetaDataCallBack);
  52. }
  53. /// <summary>
  54. /// Call to convert data to SPSS format using a passed in SQL query to provide the data.
  55. /// </summary>
  56. /// <param name="dataTable">The DataTable to convert to SPSS format</param>
  57. /// <param name="data">An enumerable list of DataRows.</param>
  58. /// <param name="spssSavFilename">The fully-qualified target .SAV file to save results to</param>
  59. /// <param name="fillInMetaDataCallBack">Callback function to provide per-variable metadata</param>
  60. public static void ToFile(DataTable dataTable, IEnumerable<DataRow> data,
  61. string spssSavFilename, Action<SpssVariable> fillInMetaDataCallBack)
  62. {
  63. // Remove the file if it already exists.
  64. if( File.Exists( spssSavFilename ) ) File.Delete( spssSavFilename );
  65. // Open up the document with "using" so that it will definitely close afterward.
  66. using( SpssDataDocument Sav = SpssDataDocument.Create( spssSavFilename ) )
  67. {
  68. // Create the schema from the table, passing in a callback
  69. // function for filling in each variable's metadata
  70. Sav.Variables.ImportSchema( dataTable, fillInMetaDataCallBack );
  71. // Import data
  72. Sav.CommitDictionary();
  73. Sav.ImportData( dataTable, data );
  74. }
  75. }
  76. /// <summary>
  77. /// Call to asynchronously convert data to SPSS format using a passed in SQL query
  78. /// to provide the data.
  79. /// </summary>
  80. /// <param name="dataTable">
  81. /// The DataTable to convert to SPSS format
  82. /// </param>
  83. /// <param name="data">An enumerable list of DataRows.</param>
  84. /// <param name="spssSavFilename">
  85. /// The fully-qualified target .SAV file to save results to
  86. /// </param>
  87. /// <param name="fillInMetaDataCallback">
  88. /// Callback function to provide per-variable metadata
  89. /// </param>
  90. /// <param name="notifyDoneCallback">
  91. /// The method to call when the process is complete.
  92. /// </param>
  93. /// <returns>
  94. /// Returns a handle to poll the status of the conversion.
  95. /// </returns>
  96. public static IAsyncResult ToFileAsync(DataTable dataTable, IEnumerable<DataRow> data, string spssSavFilename,
  97. Action<SpssVariable> fillInMetaDataCallback, EventHandler notifyDoneCallback)
  98. {
  99. // Spin off an asynchronous thread to do the work
  100. // Be sure to use a callback function, even if we don't care when this
  101. // conversion is done, since we must call EndInvoke.
  102. ToFileAsyncDelegate dlgt = new ToFileAsyncDelegate( ToFile );
  103. // Instantiate an instance of this class, to save the vbNotifyDone parameter
  104. // so that we know who to tell when this operation is complete.
  105. SpssConvert instance = new SpssConvert( notifyDoneCallback );
  106. return dlgt.BeginInvoke( dataTable, data, spssSavFilename, fillInMetaDataCallback,
  107. new AsyncCallback( instance.ToFileAsyncCB ), dlgt );
  108. }
  109. private void ToFileAsyncCB(IAsyncResult ar)
  110. {
  111. ToFileAsyncDelegate dlgt = (ToFileAsyncDelegate) ar.AsyncState;
  112. // Call EndInvoke, since the docs say we MUST
  113. dlgt.EndInvoke(ar);
  114. // Inform caller that the asynchronous operation is now complete
  115. if( NotifyDoneCallback != null ) NotifyDoneCallback(this, null);
  116. }
  117. /// <summary>
  118. /// Converts a <see cref="DataTable"/> to an SPSS .SAV file.
  119. /// </summary>
  120. /// <param name="dataTable">
  121. /// The <see cref="DataTable"/> with the schema and data to fill into the SPSS .SAV file.
  122. /// </param>
  123. /// <param name="data">An enumerable list of DataRows.</param>
  124. /// <param name="fillInMetaDataCallBack">
  125. /// The callback method that will provide additional metadata on each column.
  126. /// </param>
  127. /// <returns>
  128. /// A <see cref="MemoryStream"/> containing the contents of the SPSS .SAV data file.
  129. /// </returns>
  130. /// <remarks>
  131. /// A temporary file is created during this process, but is guaranteed to be removed
  132. /// as the method returns.
  133. /// </remarks>
  134. public static MemoryStream ToStream(DataTable dataTable, IEnumerable<DataRow> data, Action<SpssVariable> fillInMetaDataCallBack)
  135. {
  136. // Create a temporary file for the SPSS data that we will generate.
  137. using( TempFileCollection tfc = new TempFileCollection() )
  138. {
  139. string filename = tfc.AddExtension("sav", false);
  140. ToFile(dataTable, data, filename, fillInMetaDataCallBack);
  141. // Now read the file into memory
  142. using( FileStream fs = File.OpenRead(filename) )
  143. {
  144. MemoryStream ms = new MemoryStream((int)fs.Length);
  145. int b = 0;
  146. while( (b = fs.ReadByte()) >= 0 )
  147. ms.WriteByte((byte)b);
  148. // reset to start of stream.
  149. ms.Position = 0;
  150. // return the memory stream. All temporary files will delete as we exit.
  151. return ms;
  152. }
  153. }
  154. }
  155. /// <summary>
  156. /// Converts the metadata in an SPSS .SAV data file into a DDI codebook.
  157. /// </summary>
  158. /// <param name="spssSav">
  159. /// The stream containing the SPSS .SAV data file.
  160. /// </param>
  161. /// <returns>
  162. /// The <see cref="XmlDocument"/> containing all the metadata.
  163. /// </returns>
  164. public static XmlDocument ToDdi(Stream spssSav)
  165. {
  166. // To read an SPSS file, spssio32.dll requires that the file is actually on disk,
  167. // so persist this stream to a temporary file on disk.
  168. using( TempFileCollection tfc = new TempFileCollection() )
  169. {
  170. string filename = tfc.AddExtension("sav", false);
  171. using( FileStream fs = new FileStream(filename, FileMode.CreateNew) )
  172. {
  173. int b;
  174. while( (b = spssSav.ReadByte()) >= 0 )
  175. fs.WriteByte((byte)b);
  176. }
  177. return ToDdi(filename);
  178. // leaving this block will remove the temporary file automatically
  179. }
  180. }
  181. /// <summary>
  182. /// Converts the metadata in an SPSS .SAV data file into a DDI codebook.
  183. /// </summary>
  184. /// <param name="spssSavFilename">
  185. /// The filename of the SPSS .SAV data file.
  186. /// </param>
  187. /// <returns>
  188. /// The <see cref="XmlDocument"/> containing all the metadata.
  189. /// </returns>
  190. public static XmlDocument ToDdi(string spssSavFilename)
  191. {
  192. const string ddiNamespace = "http://www.icpsr.umich.edu/DDI";
  193. if( spssSavFilename == null ) throw new ArgumentNullException( "spssSavFilename" );
  194. XmlDocument ddi = new XmlDocument();
  195. // Build initial ddi document up.
  196. // Open up SPSS file and fill in the ddi var tags.
  197. using (SpssDataDocument doc = SpssDataDocument.Open(spssSavFilename, SpssFileAccess.Read)) {
  198. ddi.PreserveWhitespace = true;
  199. //Read from the embedded xml file: blankDdi.xml into the ddi document
  200. ddi.LoadXml(EmbeddedResources.LoadFileFromAssemblyWithNamespace("/blankDdi.xml", Project.DefaultNamespace));
  201. //This is where the hard coding ends and methods are called to extract data from the sav file
  202. XmlElement xmlRoot = ddi.DocumentElement;
  203. XmlNamespaceManager xmlNS = new XmlNamespaceManager(ddi.NameTable);
  204. xmlNS.AddNamespace("ddi", ddiNamespace);
  205. XmlNode nData = xmlRoot.SelectSingleNode(@"ddi:dataDscr", xmlNS);
  206. foreach (SpssVariable var in doc.Variables) {
  207. string nameOfVar = var.Name;
  208. //variable name and its ID and then if its a numeric : its interval
  209. XmlElement variable = ddi.CreateElement("ddi:var", ddiNamespace);
  210. variable.SetAttribute("ID", string.Empty, nameOfVar);
  211. variable.SetAttribute("name", string.Empty, nameOfVar);
  212. //This is the variable that holds the characteristic whether the variable has discrete or continuous interval
  213. int Dec;
  214. if (var is SpssNumericVariable) {
  215. Dec = ((SpssNumericVariable)var).PrintDecimal;
  216. string interval = string.Empty;
  217. if (Dec == 0) {
  218. interval = "discrete";
  219. } else {
  220. interval = "contin";
  221. }
  222. variable.SetAttribute("intrvl", string.Empty, interval);
  223. }
  224. //for the location width part
  225. XmlElement location = ddi.CreateElement("ddi:location", ddiNamespace);
  226. int Wid = var.ColumnWidth;
  227. location.SetAttribute("width", Wid.ToString());
  228. variable.AppendChild(location);
  229. //label of the variable is set in "varlabel" and extracted using var.Label
  230. XmlElement varLabel = ddi.CreateElement("ddi:labl", ddiNamespace);
  231. varLabel.InnerText = var.Label;
  232. variable.AppendChild(varLabel);
  233. foreach (var response in var.GetValueLabels()) {
  234. XmlElement answer = ddi.CreateElement("ddi:catgry", ddiNamespace);
  235. //catValue(category Value) is the element storing the text i.e. option number
  236. XmlElement catValue = ddi.CreateElement("ddi:catValu", ddiNamespace);
  237. catValue.InnerText = response.Key;
  238. answer.AppendChild(catValue);
  239. //catLabel(category Label) is the element storing the text i.e. name of answer
  240. XmlElement catLabel = ddi.CreateElement("ddi:labl", ddiNamespace);
  241. catLabel.InnerText = response.Value;
  242. answer.AppendChild(catLabel);
  243. //appending the answer option to the parent "variable" node i.e. the question node
  244. variable.AppendChild(answer);
  245. }
  246. // end of extracting the response values for each variable
  247. XmlElement varFormat = ddi.CreateElement("ddi:varFormat", ddiNamespace);
  248. if (var is SpssNumericVariable) {
  249. varFormat.SetAttribute("type", "numeric");
  250. } else if (var is SpssStringVariable) {
  251. varFormat.SetAttribute("type", "character");
  252. } else {
  253. throw new NotSupportedException("Variable " + nameOfVar + " is not a string or a numeric variable type.");
  254. }
  255. variable.AppendChild(varFormat);
  256. nData.AppendChild(variable);
  257. }
  258. //end of extraction of each variable and now we have put all the variable data into ndata
  259. // Return the completed ddi file.
  260. return ddi;
  261. }
  262. }
  263. /// <summary>
  264. /// Converts the metadata in an SPSS .SAV data file into a DataTable.
  265. /// </summary>
  266. /// <param name="spssSav">
  267. /// The stream containing the SPSS .SAV data file.
  268. /// </param>
  269. /// <returns>
  270. /// The <see cref="DataTable"/> containing all the metadata.
  271. /// </returns>
  272. public static DataTable ToDataTable(Stream spssSav)
  273. {
  274. if (spssSav == null) throw new ArgumentNullException("spssSav");
  275. // To read an SPSS file, spssio32.dll requires that the file is actually on disk,
  276. // so persist this stream to a temporary file on disk.
  277. using (TempFileCollection tfc = new TempFileCollection())
  278. {
  279. string filename = tfc.AddExtension("sav", false);
  280. using (FileStream fs = new FileStream(filename, FileMode.CreateNew))
  281. {
  282. int b;
  283. while ((b = spssSav.ReadByte()) >= 0) {
  284. fs.WriteByte((byte)b);
  285. }
  286. }
  287. return ToDataTable(filename);
  288. // leaving this block will remove the temporary file automatically
  289. }
  290. }
  291. /// <summary>
  292. /// Converts the metadata in an SPSS .SAV data file into a DataTable.
  293. /// </summary>
  294. /// <param name="spssSavFilename">
  295. /// The filename of the SPSS .SAV data file.
  296. /// </param>
  297. /// <returns>
  298. /// The <see cref="DataTable"/> containing all the metadata.
  299. /// </returns>
  300. public static DataTable ToDataTable(string spssSavFilename)
  301. {
  302. if (spssSavFilename == null) throw new ArgumentNullException("spssSavFilename");
  303. DataTable dataTable = new DataTable();
  304. using (SpssDataDocument doc = SpssDataDocument.Open(spssSavFilename, SpssFileAccess.Read)) {
  305. ToDataTable(doc, dataTable);
  306. }
  307. // Return the completed DataTable.
  308. return dataTable;
  309. }
  310. public static void ToDataTable(SpssDataDocument doc, DataTable dataTable) {
  311. if (doc == null) {
  312. throw new ArgumentNullException("doc");
  313. }
  314. if (dataTable == null) {
  315. throw new ArgumentNullException("dataTable");
  316. }
  317. // Build initial DataTable up.
  318. // Fill in the metadata.
  319. //set up the columns with the metadata
  320. foreach (SpssVariable var in doc.Variables) {
  321. string nameOfVar = var.Name;
  322. //add a column of the variable name to the DataTable
  323. DataColumn dataColumn = dataTable.Columns.Add(nameOfVar);
  324. //label of the variable is set in "varlabel" and extracted using var.Label
  325. dataColumn.Caption = var.Label;
  326. //set the type of the column
  327. if (var is SpssNumericVariable) {
  328. dataColumn.DataType = typeof(double);
  329. } else if (var is SpssStringVariable) {
  330. dataColumn.DataType = typeof(string);
  331. } else if (var is SpssDateVariable) {
  332. dataColumn.DataType = typeof(DateTime);
  333. } else {
  334. throw new NotSupportedException("Variable " + nameOfVar + " is not a string or a numeric variable type.");
  335. }
  336. }//end of extraction of metadata
  337. //add data into the DataTable
  338. var values = new object[doc.Variables.Count];
  339. foreach (SpssCase rowCase in doc.Cases) {
  340. for (int i = 0; i < doc.Variables.Count; i++) {
  341. values[i] = rowCase[i];
  342. }
  343. dataTable.Rows.Add(values);
  344. }
  345. }
  346. }
  347. }