PageRenderTime 22ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 0ms

/NuGenBioChem/Data/Importers/ProteinDataBankFile.cs

https://github.com/AnthonyNystrom/NuGenBioChem
C# | 241 lines | 172 code | 34 blank | 35 comment | 26 complexity | ab62cefccc9add5e3873b1c2684d96a1 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Windows.Media.Media3D;
  5. using System.Globalization;
  6. namespace NuGenBioChem.Data.Importers
  7. {
  8. /// <summary>
  9. /// Provides protein molecule import from the .PDB file format
  10. /// </summary>
  11. public class ProteinDataBankFile : IFileImporter
  12. {
  13. #region Fields
  14. // Message describes the loading process
  15. string message = "";
  16. // Indicates wether loading is succesful
  17. bool isSuccessful;
  18. // Molecules from the file
  19. readonly MoleculeCollection molecules = new MoleculeCollection();
  20. // Chain's table
  21. readonly Dictionary<char, Chain> chains = new Dictionary<char, Chain>();
  22. // List of the atom
  23. readonly List<Atom> atoms = new List<Atom>();
  24. #endregion
  25. #region Properties
  26. /// <summary>
  27. /// Indicates whether loading were succesful
  28. /// </summary>
  29. public bool IsSuccessful
  30. {
  31. get { return isSuccessful; }
  32. }
  33. /// <summary>
  34. /// Gets information about loading process
  35. /// </summary>
  36. public string Messages
  37. {
  38. get { return message; }
  39. }
  40. /// <summary>
  41. /// Gets molecules from the file
  42. /// </summary>
  43. public MoleculeCollection Molecules
  44. {
  45. get { return molecules; }
  46. }
  47. #endregion
  48. #region Initialzation
  49. /// <summary>
  50. /// Loads molecule models from the specified file
  51. /// </summary>
  52. /// <param name="path">Filename</param>
  53. public ProteinDataBankFile(string path)
  54. {
  55. try
  56. {
  57. Parse(File.ReadLines(path));
  58. }
  59. catch (Exception exception)
  60. {
  61. message = exception.Message;
  62. isSuccessful = false;
  63. }
  64. }
  65. /// <summary>
  66. /// Loads molecule models from the text lines
  67. /// </summary>
  68. /// <param name="lines">Lines</param>
  69. public ProteinDataBankFile(string[] lines)
  70. {
  71. try
  72. {
  73. Parse(lines);
  74. }
  75. catch (Exception exception)
  76. {
  77. message = exception.Message;
  78. isSuccessful = false;
  79. }
  80. }
  81. #endregion
  82. #region Methods
  83. // Parses through all pdb-file's lines
  84. void Parse(IEnumerable<string> pdbLines)
  85. {
  86. foreach (string pdbLine in pdbLines)
  87. {
  88. if (pdbLine.StartsWith("REMARK"))
  89. continue;
  90. if (pdbLine.StartsWith("ATOM"))
  91. {
  92. ParseAtom(pdbLine);
  93. }
  94. else if (pdbLine.StartsWith("HETATM"))
  95. {
  96. ParseHetAtom(pdbLine);
  97. }
  98. else if (pdbLine.StartsWith("HELIX"))
  99. {
  100. ParseHelix(pdbLine);
  101. }
  102. else if (pdbLine.StartsWith("SHEET"))
  103. {
  104. ParseSheet(pdbLine);
  105. }
  106. else if (pdbLine.StartsWith("ENDMDL") || pdbLine.StartsWith("END"))
  107. {
  108. Molecule molecule = new Molecule();
  109. molecule.Atoms.AddRange(atoms);
  110. molecule.Chains.AddRange(chains.Values);
  111. molecule.CalculateBonds();
  112. molecules.Add(molecule);
  113. atoms.Clear();
  114. chains.Clear();
  115. }
  116. }
  117. }
  118. // Parses residue's atom specified by the pdb-file line
  119. // returns residue number where atom has been added (for residue changing control)
  120. void ParseAtom(string pdbLine)
  121. {
  122. Atom atom = new Atom();
  123. atoms.Add(atom);
  124. Chain chain;
  125. char chainId = pdbLine[21];
  126. // FIXME: name of the chain must be gotten from DBREF record (?)
  127. if (!chains.TryGetValue(chainId, out chain)) chains[chainId] = chain = new Chain() { Name = chainId.ToString() };
  128. int number = int.Parse(pdbLine.Substring(22, 4), CultureInfo.InvariantCulture);
  129. Residue residue;
  130. if (chain.Residues.Count == 0 || (residue = chain.Residues[chain.Residues.Count - 1]).SequenceNumber != number)
  131. {
  132. residue = new Residue();
  133. chain.Residues.Add(residue);
  134. residue.SequenceNumber = number;
  135. residue.Name = pdbLine.Substring(17, 3);
  136. }
  137. residue.Atoms.Add(atom);
  138. atom.Position = GetAtomPosition(pdbLine);
  139. string atomName = pdbLine.Substring(12, 4).Trim();
  140. if (atomName == "CA" || atomName == "C1")
  141. {
  142. // Alfa-carbon has been got
  143. atom.Element = Element.GetBySymbol("C");
  144. residue.AlfaCarbon = atom;
  145. }
  146. else
  147. {
  148. atom.Element = GetAtomElement(pdbLine);
  149. }
  150. }
  151. // Parses heterogenius atom specified by the pdb-file line
  152. void ParseHetAtom(string pdbLine)
  153. {
  154. Atom atom = new Atom();
  155. atoms.Add(atom);
  156. atom.Position = GetAtomPosition(pdbLine);
  157. atom.Element = GetAtomElement(pdbLine);
  158. }
  159. // Parses helix secondary structure object specified by the pdb-file line
  160. void ParseHelix(string pdbLine)
  161. {
  162. Chain chain;
  163. char chainId = pdbLine[19];
  164. if (!chains.TryGetValue(chainId, out chain)) chains[chainId] = chain = new Chain() { Name = chainId.ToString() };
  165. SecondaryStructure helix = new SecondaryStructure();
  166. helix.StructureType = SecondaryStructureType.Helix;
  167. helix.FirstResidueSequenceNumber = int.Parse(pdbLine.Substring(21, 4), CultureInfo.InvariantCulture);
  168. helix.LastResidueSequenceNumber = int.Parse(pdbLine.Substring(33, 4), CultureInfo.InvariantCulture);
  169. chain.SecondaryStructures.Add(helix);
  170. }
  171. // Parses sheet secondary structure object specified by the pdb-file line
  172. void ParseSheet(string pdbLine)
  173. {
  174. Chain chain;
  175. char chainId = pdbLine[21];
  176. if (!chains.TryGetValue(chainId, out chain)) chains[chainId] = chain = new Chain() { Name = chainId.ToString() };
  177. SecondaryStructure sheet = new SecondaryStructure();
  178. sheet.StructureType = SecondaryStructureType.Sheet;
  179. sheet.FirstResidueSequenceNumber = int.Parse(pdbLine.Substring(22, 4), CultureInfo.InvariantCulture);
  180. sheet.LastResidueSequenceNumber = int.Parse(pdbLine.Substring(33, 4), CultureInfo.InvariantCulture);
  181. chain.SecondaryStructures.Add(sheet);
  182. }
  183. // Parses position of the atom specified by the pdb-file line
  184. static Point3D GetAtomPosition(string pdbLine)
  185. {
  186. Point3D position = new Point3D();
  187. position.X = double.Parse(pdbLine.Substring(30, 8), CultureInfo.InvariantCulture);
  188. position.Y = double.Parse(pdbLine.Substring(38, 8), CultureInfo.InvariantCulture);
  189. position.Z = double.Parse(pdbLine.Substring(46, 8), CultureInfo.InvariantCulture);
  190. return position;
  191. }
  192. // Creates element of the atom specified by the pdb-file line
  193. static Element GetAtomElement(string pdbLine)
  194. {
  195. string symbol = pdbLine.Substring(76, 2).Trim();
  196. if (symbol.Length == 2) symbol = symbol.Substring(0, 1) + symbol.Substring(1, 1).ToLowerInvariant();
  197. Element result = Element.GetBySymbol(symbol);
  198. if (result == null)
  199. {
  200. symbol = pdbLine.Substring(12, 2).Trim().Substring(0, 1);
  201. result = Element.GetBySymbol(symbol);
  202. }
  203. return result;
  204. }
  205. #endregion
  206. }
  207. }