/UPS/Program.cs

https://github.com/hap-adong/IDPicker · C# · 284 lines · 114 code · 10 blank · 160 comment · 8 complexity · b5dad52eef2f4ab5fa76a75a1d709eda MD5 · raw file

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Data;
  6. using System.IO;
  7. namespace UPS
  8. {
  9. class Program
  10. {
  11. static void Main(string[] args)
  12. {
  13. <<<<<<< HEAD
  14. //===========================================================
  15. string folder = "Z:\\home\\dwang\\fragmentation\\UPS";
  16. string file_naive = folder + "\\mvh-naive\\klc_031308p_cptac_study6_6_QC1.idpXML";
  17. string file_baso = "Z:\\home\\dwang\\fragmentation\\UPS\\mvh-baso\\klc_031308p_cptac_study6_6_QC1.idpXML";
  18. List<string> peplist_naive = Package.PepSecurity(file_naive, 3);
  19. List<string> peplist_baso = Package.PepSecurity(file_baso, 3);
  20. peplist_naive = Package.removeDuplicate(peplist_naive);
  21. peplist_baso = Package.removeDuplicate(peplist_baso);
  22. int sup_naive = 0;
  23. int sup_baso = 0;
  24. string fasta = "Z:\\home\\dwang\\fragmentation\\UPS\\Sigma49-reverse.fasta";
  25. List<string> proteinList = Package.readDatabase(fasta);
  26. foreach (var peptide in peplist_naive)
  27. {
  28. foreach (var protein in proteinList)
  29. {
  30. if (protein.Contains(peptide))
  31. {
  32. sup_naive++;
  33. break;
  34. }
  35. }
  36. }
  37. foreach (var peptide in peplist_baso)
  38. {
  39. foreach (var protein in proteinList)
  40. {
  41. if (protein.Contains(peptide))
  42. {
  43. sup_baso++;
  44. break;
  45. }
  46. }
  47. }
  48. Console.WriteLine(sup_naive);
  49. Console.WriteLine(sup_baso);
  50. //how is the overlap
  51. int overlap = 0;
  52. foreach (var pep_naive in peplist_naive)
  53. {
  54. foreach (var pep_baso in peplist_baso)
  55. {
  56. if (pep_naive == pep_baso)
  57. {
  58. overlap++;
  59. break;
  60. }
  61. }
  62. }
  63. Console.WriteLine("overlap: " + overlap);
  64. =======
  65. List<string> ProteinList = Package.readDatabase("Z:\\home\\dwang\\fragmentation\\UPS\\Sigma49-reverse.fasta");
  66. string path_n = "Z:\\home\\dwang\\fragmentation\\UPS\\naive\\klc_031308p_cptac_study6_6_QC1.idpXML";
  67. string path_b = "Z:\\home\\dwang\\fragmentation\\UPS\\MVH\\basophilenew\\klc_031308p_cptac_study6_6_QC1.idpXML";
  68. for (int z = 0; z <= 4; z++)
  69. {
  70. int num_n = 0, num_b = 0, num_c = 0;
  71. Console.WriteLine("starting analyzing z===" + z);
  72. List<string> pep_n = Package.PepSecurity(path_n, z);
  73. List<string> pep_b = Package.PepSecurity(path_b, z);
  74. List<string> pep_common = Package.findCommon(pep_n, pep_b);
  75. foreach (string ss in pep_n)
  76. {
  77. foreach (var protein in ProteinList)
  78. {
  79. if (protein.Contains(ss))
  80. {
  81. num_n++;
  82. break;
  83. }
  84. }
  85. }
  86. foreach (string ss in pep_b)
  87. {
  88. foreach (var protein in ProteinList)
  89. {
  90. if (protein.Contains(ss))
  91. {
  92. num_b++;
  93. break;
  94. }
  95. }
  96. }
  97. foreach (string ss in pep_common)
  98. {
  99. foreach (var protein in ProteinList)
  100. {
  101. if (protein.Contains(ss))
  102. {
  103. num_c++;
  104. break;
  105. }
  106. }
  107. }
  108. Console.WriteLine("number of naive: " + num_n + "/" + pep_n.Count);
  109. Console.WriteLine("number of baso: " + num_b + "/" + pep_b.Count);
  110. Console.WriteLine("number of common: " + num_c + "/" + pep_common.Count);
  111. Console.WriteLine();
  112. }
  113. /*
  114. //===========================================================
  115. string test_mm = "X:\\wangd5\\UPS\\MM\\UPS_FDR0.05.idpXML";
  116. string test_sq = "X:\\wangd5\\UPS\\SQ\\UPS_FDR0.05.idpXML";
  117. string test_xt = "X:\\wangd5\\UPS\\XT\\UPS_FDR0.05.idpXML";
  118. string test_p = "X:\\wangd5\\UPS\\evaluation\\p.csv";
  119. //get peptides in p
  120. List<string> p = new List<string>();
  121. TextReader file_p = new StreamReader(test_p);
  122. DataTable dt = CSV.CsvParser.Parse(file_p, true);
  123. foreach (DataRow dr in dt.Rows)
  124. {
  125. string pep = dr[2].ToString();
  126. p.Add(pep);
  127. }
  128. List<string> m = Package.PepSecurity(test_mm, 0);
  129. List<string> x = Package.PepSecurity(test_xt, 0);
  130. List<string> s = Package.PepSecurity(test_sq, 0);
  131. m = Package.removeDuplicate(m);
  132. x = Package.removeDuplicate(x);
  133. s = Package.removeDuplicate(s);
  134. p = Package.removeDuplicate(p);
  135. List<string> pm = Package.findCommon(p, m);
  136. List<string> mx = Package.findCommon(m, x);
  137. List<string> ms = Package.findCommon(m, s);
  138. List<string> xs = Package.findCommon(x, s);
  139. List<string> mxs = Package.findCommon(mx, s);
  140. ///////////////////////////////////////////////////////////////////////////////////////
  141. //read the fdr1.0.csv, to get all the peptide sequences that identified by 3 engines\
  142. //no matter how low the score is
  143. //also, the file contains the RNASeq information
  144. Dictionary<string, string> dic_RNASeq = new Dictionary<string, string>();
  145. TextReader file_csv = new StreamReader("X:\\wangd5\\UPS\\evaluation\\PepRNA.csv");
  146. //TextReader file_csv = new StreamReader("X:\\wangd5\\idpXML_FDR1.00\\score evaluation\\fdr1.0.csvn");
  147. DataTable table = CSV.CsvParser.Parse(file_csv, true);
  148. foreach (DataRow dr in table.Rows)
  149. {
  150. string pep = dr[0].ToString();
  151. string RNASeq = dr[1].ToString();
  152. dic_RNASeq.Add(pep, RNASeq);
  153. }
  154. int m_n = 0;
  155. int x_n = 0;
  156. int s_n = 0;
  157. int p_n = 0;
  158. int ms_n = 0;
  159. int mx_n = 0;
  160. int xs_n = 0;
  161. int mxs_n = 0;
  162. int pm_n = 0;
  163. foreach (string ss in m)
  164. {
  165. if (dic_RNASeq.ContainsKey(ss))
  166. {
  167. if (dic_RNASeq[ss] == "1") m_n++;
  168. }
  169. else Console.WriteLine("crap");
  170. }
  171. foreach (string ss in x)
  172. {
  173. if (dic_RNASeq.ContainsKey(ss))
  174. {
  175. if (dic_RNASeq[ss] == "1") x_n++;
  176. }
  177. }
  178. foreach (string ss in s)
  179. {
  180. if (dic_RNASeq.ContainsKey(ss))
  181. {
  182. if (dic_RNASeq[ss] == "1") s_n++;
  183. }
  184. }
  185. foreach (string ss in p)
  186. {
  187. if (dic_RNASeq.ContainsKey(ss))
  188. {
  189. if (dic_RNASeq[ss] == "1") p_n++;
  190. }
  191. }
  192. foreach (string ss in ms)
  193. {
  194. if (dic_RNASeq.ContainsKey(ss))
  195. {
  196. if (dic_RNASeq[ss] == "1") ms_n++;
  197. }
  198. }
  199. foreach (string ss in mx)
  200. {
  201. if (dic_RNASeq.ContainsKey(ss))
  202. {
  203. if (dic_RNASeq[ss] == "1") mx_n++;
  204. }
  205. }
  206. foreach (string ss in xs)
  207. {
  208. if (dic_RNASeq.ContainsKey(ss))
  209. {
  210. if (dic_RNASeq[ss] == "1") xs_n++;
  211. }
  212. }
  213. foreach (string ss in mxs)
  214. {
  215. if (dic_RNASeq.ContainsKey(ss))
  216. {
  217. if (dic_RNASeq[ss] == "1") mxs_n++;
  218. }
  219. }
  220. foreach (string ss in pm)
  221. {
  222. if (dic_RNASeq.ContainsKey(ss))
  223. {
  224. if (dic_RNASeq[ss] == "1") pm_n++;
  225. }
  226. }
  227. Console.WriteLine("m: " + m.Count + "=" + m_n);
  228. Console.WriteLine("x: " + x.Count + "=" + x_n);
  229. Console.WriteLine("s: " + s.Count + "=" + s_n);
  230. Console.WriteLine("p: " + p.Count + "=" + p_n);
  231. Console.WriteLine("mx: " + mx.Count + "=" + mx_n);
  232. Console.WriteLine("ms: " + ms.Count + "=" + ms_n);
  233. Console.WriteLine("xs: " + xs.Count + "=" + xs_n);
  234. Console.WriteLine("mxs: " + mxs.Count + "=" + mxs_n);
  235. Console.WriteLine("pm: " + pm.Count + "=" + pm_n);
  236. */
  237. >>>>>>> 80fc9e47b4dafd28bcc96f478ae26543036b9ff6
  238. }
  239. }
  240. }