PageRenderTime 65ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/BioLiteManual/_modules/biolite/workflows.html

https://bitbucket.org/aguang/biolite
HTML | 399 lines | 348 code | 51 blank | 0 comment | 0 complexity | 2454942d32297449f7d74b3e3f60bf62 MD5 | raw file
  1. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3. <html xmlns="http://www.w3.org/1999/xhtml">
  4. <head>
  5. <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  6. <title>biolite.workflows &mdash; BioLite 0.3.3 documentation</title>
  7. <link rel="stylesheet" href="../../_static/default.css" type="text/css" />
  8. <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
  9. <script type="text/javascript">
  10. var DOCUMENTATION_OPTIONS = {
  11. URL_ROOT: '../../',
  12. VERSION: '0.3.3',
  13. COLLAPSE_INDEX: false,
  14. FILE_SUFFIX: '.html',
  15. HAS_SOURCE: true
  16. };
  17. </script>
  18. <script type="text/javascript" src="../../_static/jquery.js"></script>
  19. <script type="text/javascript" src="../../_static/underscore.js"></script>
  20. <script type="text/javascript" src="../../_static/doctools.js"></script>
  21. <link rel="top" title="BioLite 0.3.3 documentation" href="../../index.html" />
  22. <link rel="up" title="Module code" href="../index.html" />
  23. </head>
  24. <body>
  25. <div class="related">
  26. <h3>Navigation</h3>
  27. <ul>
  28. <li class="right" style="margin-right: 10px">
  29. <a href="../../genindex.html" title="General Index"
  30. accesskey="I">index</a></li>
  31. <li class="right" >
  32. <a href="../../py-modindex.html" title="Python Module Index"
  33. >modules</a> |</li>
  34. <li><a href="../../index.html">BioLite 0.3.3 documentation</a> &raquo;</li>
  35. <li><a href="../index.html" accesskey="U">Module code</a> &raquo;</li>
  36. </ul>
  37. </div>
  38. <div class="document">
  39. <div class="documentwrapper">
  40. <div class="bodywrapper">
  41. <div class="body">
  42. <h1>Source code for biolite.workflows</h1><div class="highlight"><pre>
  43. <span class="c"># BioLite - Tools for processing gene sequence data and automating workflows</span>
  44. <span class="c"># Copyright (c) 2012-2013 Brown University. All rights reserved.</span>
  45. <span class="c"># </span>
  46. <span class="c"># This file is part of BioLite.</span>
  47. <span class="c"># </span>
  48. <span class="c"># BioLite is free software: you can redistribute it and/or modify</span>
  49. <span class="c"># it under the terms of the GNU General Public License as published by</span>
  50. <span class="c"># the Free Software Foundation, either version 3 of the License, or</span>
  51. <span class="c"># (at your option) any later version.</span>
  52. <span class="c"># </span>
  53. <span class="c"># BioLite is distributed in the hope that it will be useful,</span>
  54. <span class="c"># but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
  55. <span class="c"># MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the</span>
  56. <span class="c"># GNU General Public License for more details.</span>
  57. <span class="c"># </span>
  58. <span class="c"># You should have received a copy of the GNU General Public License</span>
  59. <span class="c"># along with BioLite. If not, see &lt;http://www.gnu.org/licenses/&gt;.</span>
  60. <span class="sd">&quot;&quot;&quot;</span>
  61. <span class="sd">Provides a collection of helper functions that coordinate multiple wrappers</span>
  62. <span class="sd">from the :ref:`wrappers` to accomplish a unified goal or automate a common</span>
  63. <span class="sd">analysis task.</span>
  64. <span class="sd">Workflows are available for the following groups of tasks:</span>
  65. <span class="sd">* Assembly statistics and sweeps</span>
  66. <span class="sd">* Contig parsing</span>
  67. <span class="sd">* Blast result parsing</span>
  68. <span class="sd">* SamTools automation</span>
  69. <span class="sd">* Transcript cleaning</span>
  70. <span class="sd">&quot;&quot;&quot;</span>
  71. <span class="kn">import</span> <span class="nn">os</span>
  72. <span class="kn">import</span> <span class="nn">re</span>
  73. <span class="kn">import</span> <span class="nn">shutil</span>
  74. <span class="kn">import</span> <span class="nn">sys</span>
  75. <span class="kn">from</span> <span class="nn">Bio.Blast</span> <span class="kn">import</span> <span class="n">NCBIXML</span>
  76. <span class="kn">from</span> <span class="nn">Bio</span> <span class="kn">import</span> <span class="n">SeqIO</span>
  77. <span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">namedtuple</span><span class="p">,</span> <span class="n">OrderedDict</span>
  78. <span class="kn">from</span> <span class="nn">biolite</span> <span class="kn">import</span> <span class="n">config</span>
  79. <span class="kn">from</span> <span class="nn">biolite</span> <span class="kn">import</span> <span class="n">diagnostics</span>
  80. <span class="kn">from</span> <span class="nn">biolite</span> <span class="kn">import</span> <span class="n">utils</span>
  81. <span class="kn">from</span> <span class="nn">biolite</span> <span class="kn">import</span> <span class="n">wrappers</span>
  82. <span class="kn">import</span> <span class="nn">blast</span>
  83. <span class="kn">import</span> <span class="nn">phylogeny</span>
  84. <span class="n">ContigHeader</span> <span class="o">=</span> <span class="n">namedtuple</span><span class="p">(</span><span class="s">&#39;ContigHeader&#39;</span><span class="p">,</span> <span class="s">&quot;locus transcript confidence length&quot;</span><span class="p">)</span>
  85. <span class="sd">&quot;&quot;&quot;</span>
  86. <span class="sd">A namedtuple for storing the information from a transcript header.</span>
  87. <span class="sd">&quot;&quot;&quot;</span>
  88. <span class="n">standard_contig_header</span> <span class="o">=</span> \
  89. <span class="s">&quot;Locus_{0}_Transcript_{1}/0_Confidence_{2}_Length_{3}</span><span class="se">\n</span><span class="s">&quot;</span>
  90. <span class="n">oases_header_pattern</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span>\
  91. <span class="s">r&#39;Locus_([\d\.]+)_Transcript_(\d+)/\d+_Confidence_([\.\d]+)_Length_(\d+)$&#39;</span><span class="p">)</span>
  92. <span class="n">trinity_header_pattern</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span>\
  93. <span class="s">r&#39;comp(\d+)_c(\d+)_seq(\d+) len=(\d+) path=\[([-: \d]+)\]$&#39;</span><span class="p">)</span>
  94. <div class="viewcode-block" id="unpack_oases_header"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.unpack_oases_header">[docs]</a><span class="k">def</span> <span class="nf">unpack_oases_header</span><span class="p">(</span><span class="n">header</span><span class="p">):</span>
  95. <span class="sd">&quot;&quot;&quot;</span>
  96. <span class="sd"> Unpacks an Oases contig header into a ContigHeader object.</span>
  97. <span class="sd"> Example header:</span>
  98. <span class="sd"> </span>
  99. <span class="sd"> &gt;Locus_9919_Transcript_1/1_Confidence_1.000_Length_160</span>
  100. <span class="sd"> &quot;&quot;&quot;</span>
  101. <span class="n">match</span> <span class="o">=</span> <span class="n">oases_header_pattern</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">header</span><span class="p">)</span>
  102. <span class="k">if</span> <span class="n">match</span><span class="p">:</span>
  103. <span class="k">return</span> <span class="n">ContigHeader</span><span class="p">(</span><span class="o">*</span><span class="n">match</span><span class="o">.</span><span class="n">groups</span><span class="p">())</span>
  104. <span class="k">else</span><span class="p">:</span>
  105. <span class="n">utils</span><span class="o">.</span><span class="n">die</span><span class="p">(</span><span class="s">&quot;bad oases header: </span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="n">header</span><span class="p">)</span>
  106. </div>
  107. <div class="viewcode-block" id="contig_stats"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.contig_stats">[docs]</a><span class="k">def</span> <span class="nf">contig_stats</span><span class="p">(</span><span class="n">fasta_path</span><span class="p">,</span> <span class="n">hist_path</span><span class="p">):</span>
  108. <span class="sd">&quot;&quot;&quot;</span>
  109. <span class="sd"> Parses the assembled contigs in `fasta_path` and writes a histogram of</span>
  110. <span class="sd"> contig length to `hist_path`.</span>
  111. <span class="sd"> </span>
  112. <span class="sd"> Writes the total contig count, mean length, and N50 length to</span>
  113. <span class="sd"> the diagnostics.</span>
  114. <span class="sd"> &quot;&quot;&quot;</span>
  115. <span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
  116. <span class="n">mean</span> <span class="o">=</span> <span class="mf">0.0</span>
  117. <span class="n">n50</span> <span class="o">=</span> <span class="mi">0</span>
  118. <span class="c"># Parse the fasta file to construct a histogram, stored as a dictionary</span>
  119. <span class="c"># {contig_length: frequency}, and to calculate total contig count and</span>
  120. <span class="c"># mean length.</span>
  121. <span class="n">hist</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
  122. <span class="n">total_length</span> <span class="o">=</span> <span class="mi">0</span>
  123. <span class="n">lengths</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span>
  124. <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">SeqIO</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">fasta_path</span><span class="p">,</span> <span class="s">&#39;fasta&#39;</span><span class="p">):</span>
  125. <span class="n">count</span> <span class="o">+=</span> <span class="mi">1</span>
  126. <span class="n">length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">seq</span><span class="p">)</span>
  127. <span class="n">lengths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">length</span><span class="p">)</span>
  128. <span class="n">hist</span><span class="p">[</span><span class="n">length</span><span class="p">]</span> <span class="o">=</span> <span class="n">hist</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">length</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span>
  129. <span class="n">lengths</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
  130. <span class="n">total_length</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">lengths</span><span class="p">)</span>
  131. <span class="k">if</span> <span class="n">count</span><span class="p">:</span>
  132. <span class="n">mean</span> <span class="o">=</span> <span class="n">total_length</span> <span class="o">/</span> <span class="nb">float</span><span class="p">(</span><span class="n">count</span><span class="p">)</span>
  133. <span class="c"># Find the n50 value</span>
  134. <span class="n">cum_length</span> <span class="o">=</span> <span class="mi">0</span>
  135. <span class="k">for</span> <span class="n">length</span> <span class="ow">in</span> <span class="n">lengths</span><span class="p">:</span>
  136. <span class="n">cum_length</span> <span class="o">+=</span> <span class="n">length</span>
  137. <span class="k">if</span> <span class="n">cum_length</span> <span class="o">&gt;=</span> <span class="p">(</span><span class="n">total_length</span> <span class="o">/</span> <span class="mi">2</span><span class="p">):</span>
  138. <span class="n">n50</span> <span class="o">=</span> <span class="n">length</span>
  139. <span class="k">break</span>
  140. <span class="c"># Loop through the histogram to dump it to a text file.</span>
  141. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">hist_path</span><span class="p">,</span> <span class="s">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  142. <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">hist</span><span class="o">.</span><span class="n">keys</span><span class="p">()):</span>
  143. <span class="k">print</span> <span class="o">&gt;&gt;</span><span class="n">f</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">hist</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
  144. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">&#39;contig_stats&#39;</span><span class="p">)</span>
  145. <span class="n">diagnostics</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="s">&#39;count&#39;</span><span class="p">,</span> <span class="n">count</span><span class="p">)</span>
  146. <span class="n">diagnostics</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="s">&#39;mean&#39;</span><span class="p">,</span> <span class="n">mean</span><span class="p">)</span>
  147. <span class="n">diagnostics</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="s">&#39;n50&#39;</span><span class="p">,</span> <span class="n">n50</span><span class="p">)</span>
  148. <span class="n">diagnostics</span><span class="o">.</span><span class="n">log_path</span><span class="p">(</span><span class="n">hist_path</span><span class="p">)</span>
  149. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
  150. </div>
  151. <div class="viewcode-block" id="oases_clean"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.oases_clean">[docs]</a><span class="k">def</span> <span class="nf">oases_clean</span><span class="p">(</span><span class="n">workdir</span><span class="o">=</span><span class="s">&#39;./&#39;</span><span class="p">):</span>
  152. <span class="sd">&quot;&quot;&quot;</span>
  153. <span class="sd"> Cleans up a work directory that was used for an Oases assembly.</span>
  154. <span class="sd"> &quot;&quot;&quot;</span>
  155. <span class="n">files</span> <span class="o">=</span> <span class="p">(</span><span class="s">&#39;Sequences&#39;</span><span class="p">,</span> <span class="s">&#39;Roadmaps&#39;</span><span class="p">,</span> <span class="s">&#39;PreGraph&#39;</span><span class="p">,</span> <span class="s">&#39;Graph2&#39;</span><span class="p">,</span> <span class="s">&#39;LastGraph&#39;</span><span class="p">,</span> <span class="s">&#39;contigs.fa&#39;</span><span class="p">,</span> <span class="s">&#39;Log&#39;</span><span class="p">)</span>
  156. <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">files</span><span class="p">:</span>
  157. <span class="n">utils</span><span class="o">.</span><span class="n">safe_remove</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="n">f</span><span class="p">))</span>
  158. </div>
  159. <div class="viewcode-block" id="oases_assemblies"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.oases_assemblies">[docs]</a><span class="k">def</span> <span class="nf">oases_assemblies</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">kmers</span><span class="o">=</span><span class="p">[</span><span class="mi">61</span><span class="p">],</span> <span class="n">workdir</span><span class="o">=</span><span class="s">&#39;./&#39;</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">ins_length</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
  160. <span class="sd">&quot;&quot;&quot;</span>
  161. <span class="sd"> Automates Oases assemblies that sweep multiple `kmers`.</span>
  162. <span class="sd"> If `inputs` is a list of FASTQ files, they are automatically shuffled</span>
  163. <span class="sd"> together. Or, provide a singleton list with the path to a pre-shuffled</span>
  164. <span class="sd"> FASTQ file.</span>
  165. <span class="sd"> &quot;&quot;&quot;</span>
  166. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">&#39;oases_assemblies&#39;</span><span class="p">)</span>
  167. <span class="n">contig_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">()</span>
  168. <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">kmers</span><span class="p">:</span>
  169. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">&#39;k</span><span class="si">%d</span><span class="s">&#39;</span> <span class="o">%</span> <span class="n">k</span><span class="p">)</span>
  170. <span class="n">subdir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="s">&#39;k</span><span class="si">%d</span><span class="s">&#39;</span> <span class="o">%</span> <span class="n">k</span><span class="p">)</span>
  171. <span class="c"># clear out any previous runs</span>
  172. <span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">subdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
  173. <span class="n">utils</span><span class="o">.</span><span class="n">safe_mkdir</span><span class="p">(</span><span class="n">subdir</span><span class="p">)</span>
  174. <span class="c">#oases_clean(subdir)</span>
  175. <span class="n">wrappers</span><span class="o">.</span><span class="n">VelvetH</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">subdir</span><span class="p">,</span> <span class="n">kmer</span><span class="o">=</span><span class="n">k</span><span class="p">)</span>
  176. <span class="n">wrappers</span><span class="o">.</span><span class="n">VelvetG</span><span class="p">(</span><span class="n">subdir</span><span class="p">,</span> <span class="n">ins_length</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="n">min_length</span><span class="p">)</span>
  177. <span class="c"># makes transcripts.fa</span>
  178. <span class="n">wrappers</span><span class="o">.</span><span class="n">Oases</span><span class="p">(</span><span class="n">subdir</span><span class="p">,</span> <span class="n">ins_length</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="n">min_length</span><span class="p">)</span>
  179. <span class="n">contigs</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">subdir</span><span class="p">,</span> <span class="s">&#39;transcripts.fa&#39;</span><span class="p">)</span>
  180. <span class="n">contigs_k</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="s">&#39;transcripts-k</span><span class="si">%d</span><span class="s">.fa&#39;</span> <span class="o">%</span> <span class="n">k</span><span class="p">)</span>
  181. <span class="n">shutil</span><span class="o">.</span><span class="n">move</span><span class="p">(</span><span class="n">contigs</span><span class="p">,</span> <span class="n">contigs_k</span><span class="p">)</span>
  182. <span class="n">contig_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">contigs_k</span><span class="p">)</span>
  183. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
  184. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
  185. <span class="k">return</span> <span class="n">contig_list</span>
  186. </div>
  187. <div class="viewcode-block" id="oases_concat_assembly"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.oases_concat_assembly">[docs]</a><span class="k">def</span> <span class="nf">oases_concat_assembly</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">concat_path</span><span class="p">,</span> <span class="n">kmers</span><span class="p">,</span> <span class="n">workdir</span><span class="o">=</span><span class="s">&#39;./&#39;</span><span class="p">,</span> <span class="n">ins_length</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
  188. <span class="sd">&quot;&quot;&quot;</span>
  189. <span class="sd"> Performs Oases assemblies sweeping over the provided `kmers` list, and</span>
  190. <span class="sd"> concatenates all contigs to `concat_path`.</span>
  191. <span class="sd"> If `inputs` is a list of FASTQ files, they are automatically shuffled</span>
  192. <span class="sd"> together. Or, provide a singleton list with the path to a pre-shuffled</span>
  193. <span class="sd"> FASTQ file.</span>
  194. <span class="sd"> &quot;&quot;&quot;</span>
  195. <span class="n">utils</span><span class="o">.</span><span class="n">truncate_file</span><span class="p">(</span><span class="n">concat_path</span><span class="p">)</span>
  196. <span class="k">for</span> <span class="n">contigs</span> <span class="ow">in</span> <span class="n">oases_assemblies</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">kmers</span><span class="p">,</span> <span class="n">workdir</span><span class="p">,</span> <span class="n">ins_length</span><span class="p">):</span>
  197. <span class="n">utils</span><span class="o">.</span><span class="n">cat_to_file</span><span class="p">(</span><span class="n">contigs</span><span class="p">,</span> <span class="n">concat_path</span><span class="p">)</span>
  198. </div>
  199. <div class="viewcode-block" id="oases_merge_assembly"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.oases_merge_assembly">[docs]</a><span class="k">def</span> <span class="nf">oases_merge_assembly</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">merge_path</span><span class="p">,</span> <span class="n">merge_kmer</span><span class="p">,</span> <span class="n">kmers</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="bp">None</span><span class="p">,</span> <span class="n">workdir</span><span class="o">=</span><span class="s">&#39;./&#39;</span><span class="p">,</span> <span class="n">ins_length</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
  200. <span class="sd">&quot;&quot;&quot;</span>
  201. <span class="sd"> Implements the Oases-M protocol for merging several Oases assemblies, as</span>
  202. <span class="sd"> described in:</span>
  203. <span class="sd"> Schulz, M. H., Zerbino, D. R., Vingron, M., &amp; Birney, E. (2012). Oases:</span>
  204. <span class="sd"> Robust de novo RNA-seq assembly across the dynamic range of expression</span>
  205. <span class="sd"> levels. Bioinformatics (Oxford, England), 1-7.</span>
  206. <span class="sd"> doi:10.1093/bioinformatics/bts094</span>
  207. <span class="sd"> Performs Oases assemblies sweeping over the provided `kmers` list, then</span>
  208. <span class="sd"> performs a Oases merge assembly with `merge_kmer`.</span>
  209. <span class="sd"> &quot;&quot;&quot;</span>
  210. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s">&#39;oases_merge_assembly&#39;</span><span class="p">)</span>
  211. <span class="n">oases_clean</span><span class="p">(</span><span class="n">workdir</span><span class="p">)</span>
  212. <span class="n">contig_list</span> <span class="o">=</span> <span class="n">oases_assemblies</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">kmers</span><span class="p">,</span> <span class="n">workdir</span><span class="p">,</span> <span class="n">min_length</span><span class="p">,</span> <span class="n">ins_length</span><span class="p">)</span>
  213. <span class="n">wrappers</span><span class="o">.</span><span class="n">VelvetH</span><span class="p">(</span><span class="n">contig_list</span><span class="p">,</span> <span class="n">workdir</span><span class="p">,</span> <span class="n">kmer</span><span class="o">=</span><span class="n">merge_kmer</span><span class="p">,</span> <span class="n">merge</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
  214. <span class="n">wrappers</span><span class="o">.</span><span class="n">VelvetG</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="n">merge</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="n">min_length</span><span class="p">)</span>
  215. <span class="n">wrappers</span><span class="o">.</span><span class="n">Oases</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="n">merge</span><span class="o">=</span><span class="bp">True</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="n">min_length</span><span class="p">)</span>
  216. <span class="n">contigs</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="s">&#39;transcripts.fa&#39;</span><span class="p">)</span>
  217. <span class="n">shutil</span><span class="o">.</span><span class="n">move</span><span class="p">(</span><span class="n">contigs</span><span class="p">,</span> <span class="n">merge_path</span><span class="p">)</span>
  218. <span class="n">diagnostics</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="s">&#39;kmer&#39;</span><span class="p">,</span> <span class="n">merge_kmer</span><span class="p">)</span>
  219. <span class="n">diagnostics</span><span class="o">.</span><span class="n">log_path</span><span class="p">(</span><span class="n">merge_path</span><span class="p">)</span>
  220. <span class="n">diagnostics</span><span class="o">.</span><span class="n">prefix</span><span class="o">.</span><span class="n">pop</span><span class="p">()</span>
  221. </div>
  222. <div class="viewcode-block" id="trinity_assembly"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.trinity_assembly">[docs]</a><span class="k">def</span> <span class="nf">trinity_assembly</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">inputs</span><span class="p">,</span> <span class="n">workdir</span><span class="o">=</span><span class="s">&#39;./&#39;</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
  223. <span class="n">wrappers</span><span class="o">.</span><span class="n">Trinity</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="n">workdir</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="n">min_length</span><span class="p">,</span> <span class="n">seq_type</span><span class="o">=</span><span class="s">&#39;fq&#39;</span><span class="p">)</span>
  224. <span class="n">commands</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="s">&#39;chrysalis&#39;</span><span class="p">,</span> <span class="s">&#39;butterfly_commands.adj&#39;</span><span class="p">)</span>
  225. <span class="n">contigs</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">workdir</span><span class="p">,</span> <span class="s">&#39;Trinity.fasta&#39;</span><span class="p">)</span>
  226. <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">commands</span><span class="p">):</span>
  227. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">commands</span><span class="o">+</span><span class="s">&#39;.biolite&#39;</span><span class="p">,</span> <span class="s">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  228. <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="nb">open</span><span class="p">(</span><span class="n">commands</span><span class="p">):</span>
  229. <span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">()</span>
  230. <span class="n">comp</span> <span class="o">=</span> <span class="n">line</span><span class="p">[</span><span class="n">line</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s">&#39;-C&#39;</span><span class="p">)</span><span class="o">+</span><span class="mi">3</span><span class="p">:]</span><span class="o">.</span><span class="n">partition</span><span class="p">(</span><span class="s">&#39; &#39;</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
  231. <span class="k">if</span> <span class="n">utils</span><span class="o">.</span><span class="n">count_lines</span><span class="p">(</span><span class="n">comp</span><span class="o">+</span><span class="s">&#39;.out&#39;</span><span class="p">)</span> <span class="o">&gt;</span> <span class="n">min_length</span><span class="p">:</span>
  232. <span class="k">print</span> <span class="o">&gt;&gt;</span><span class="n">f</span><span class="p">,</span> <span class="n">line</span><span class="p">,</span> <span class="s">&#39;&amp;&amp; cat </span><span class="si">%s</span><span class="s">.allProbPaths.fasta &gt;&gt;</span><span class="si">%s</span><span class="s">&#39;</span> <span class="o">%</span> <span class="p">(</span><span class="n">comp</span><span class="p">,</span> <span class="n">contigs</span><span class="p">)</span>
  233. <span class="n">wrappers</span><span class="o">.</span><span class="n">ParallelButterfly</span><span class="p">(</span>
  234. <span class="n">commands</span><span class="o">+</span><span class="s">&#39;.biolite&#39;</span><span class="p">,</span> <span class="s">&#39;--joblog&#39;</span><span class="p">,</span> <span class="s">&#39;butterfly.joblog.txt&#39;</span><span class="p">,</span>
  235. <span class="n">threads</span><span class="o">=</span><span class="nb">max</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">config</span><span class="o">.</span><span class="n">get_resource</span><span class="p">(</span><span class="s">&#39;threads&#39;</span><span class="p">))</span><span class="o">/</span><span class="mi">2</span><span class="p">),</span>
  236. <span class="n">return_ok</span><span class="o">=</span><span class="bp">None</span><span class="p">)</span>
  237. <span class="k">else</span><span class="p">:</span>
  238. <span class="n">utils</span><span class="o">.</span><span class="n">die</span><span class="p">(</span><span class="s">&quot;no butterfly command output from Trinity&quot;</span><span class="p">)</span>
  239. <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">contigs</span><span class="p">):</span>
  240. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">contigs</span><span class="p">,</span> <span class="s">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f1</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="s">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f2</span><span class="p">:</span>
  241. <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">SeqIO</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f1</span><span class="p">,</span> <span class="s">&#39;fasta&#39;</span><span class="p">):</span>
  242. <span class="n">match</span> <span class="o">=</span> <span class="n">trinity_header_pattern</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">description</span><span class="p">)</span>
  243. <span class="k">if</span> <span class="n">match</span> <span class="ow">is</span> <span class="ow">not</span> <span class="bp">None</span><span class="p">:</span>
  244. <span class="n">num</span><span class="p">,</span> <span class="n">sub</span><span class="p">,</span> <span class="n">seq</span><span class="p">,</span> <span class="n">length</span><span class="p">,</span> <span class="n">path</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">groups</span><span class="p">()</span>
  245. <span class="n">record</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="s">&#39;&#39;</span>
  246. <span class="n">record</span><span class="o">.</span><span class="n">description</span> <span class="o">=</span> <span class="s">&#39;&#39;</span>
  247. <span class="n">record</span><span class="o">.</span><span class="n">id</span> <span class="o">=</span> <span class="n">standard_contig_header</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
  248. <span class="s">&quot;</span><span class="si">%s</span><span class="s">.</span><span class="si">%s</span><span class="s">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">num</span><span class="p">,</span> <span class="n">sub</span><span class="p">),</span> <span class="n">seq</span><span class="p">,</span>
  249. <span class="nb">sum</span><span class="p">(</span><span class="n">c</span> <span class="o">==</span> <span class="s">&#39; &#39;</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">path</span><span class="p">),</span> <span class="n">length</span><span class="p">)</span>
  250. <span class="n">SeqIO</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">record</span><span class="p">,</span> <span class="n">f2</span><span class="p">,</span> <span class="s">&#39;fasta&#39;</span><span class="p">)</span>
  251. <span class="k">else</span><span class="p">:</span>
  252. <span class="n">utils</span><span class="o">.</span><span class="n">die</span><span class="p">(</span><span class="s">&quot;no contig output from Trinity&quot;</span><span class="p">)</span>
  253. </div>
  254. <div class="viewcode-block" id="extract_oases_exemplars"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.extract_oases_exemplars">[docs]</a><span class="k">def</span> <span class="nf">extract_oases_exemplars</span><span class="p">(</span><span class="n">input_path</span><span class="p">,</span> <span class="n">output_path</span><span class="p">,</span> <span class="n">min_length</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
  255. <span class="sd">&quot;&quot;&quot;</span>
  256. <span class="sd"> Extracts a single exemplar transcript for each locus in an Oases assembly</span>
  257. <span class="sd"> at `input_path` and writes it to `output_path`. Only transcripts longer</span>
  258. <span class="sd"> than `min_length` are considered.</span>
  259. <span class="sd"> The exemplar is chosen as the transcript with the highest confidence score.</span>
  260. <span class="sd"> &quot;&quot;&quot;</span>
  261. <span class="c"># Dictionary with key locus and value sequence record</span>
  262. <span class="n">exemplars</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span>
  263. <span class="c"># Parse transcripts</span>
  264. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">input_path</span><span class="p">,</span> <span class="s">&#39;rU&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  265. <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">SeqIO</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="s">&#39;fasta&#39;</span><span class="p">):</span>
  266. <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">seq</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">min_length</span><span class="p">:</span>
  267. <span class="k">continue</span>
  268. <span class="n">header</span> <span class="o">=</span> <span class="n">unpack_oases_header</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>
  269. <span class="k">if</span> <span class="n">header</span><span class="o">.</span><span class="n">locus</span> <span class="ow">in</span> <span class="n">exemplars</span><span class="p">:</span>
  270. <span class="n">old_header</span> <span class="o">=</span> <span class="n">unpack_oases_header</span><span class="p">(</span><span class="n">exemplars</span><span class="p">[</span><span class="n">header</span><span class="o">.</span><span class="n">locus</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>
  271. <span class="c"># If the new confidence is higher, replace the current record</span>
  272. <span class="k">if</span> <span class="nb">float</span><span class="p">(</span><span class="n">header</span><span class="o">.</span><span class="n">confidence</span><span class="p">)</span> <span class="o">&gt;</span> <span class="nb">float</span><span class="p">(</span><span class="n">old_header</span><span class="o">.</span><span class="n">confidence</span><span class="p">):</span>
  273. <span class="n">exemplars</span><span class="p">[</span><span class="n">header</span><span class="o">.</span><span class="n">locus</span><span class="p">]</span> <span class="o">=</span> <span class="n">record</span>
  274. <span class="c"># If the confidences are the same, then replace the old record</span>
  275. <span class="c"># if the new length is greater</span>
  276. <span class="k">elif</span> <span class="nb">float</span><span class="p">(</span><span class="n">header</span><span class="o">.</span><span class="n">confidence</span><span class="p">)</span> <span class="o">==</span> <span class="nb">float</span><span class="p">(</span><span class="n">old_header</span><span class="o">.</span><span class="n">confidence</span><span class="p">):</span>
  277. <span class="k">if</span> <span class="nb">int</span><span class="p">(</span><span class="n">header</span><span class="o">.</span><span class="n">length</span><span class="p">)</span> <span class="o">&gt;</span> <span class="nb">int</span><span class="p">(</span><span class="n">old_header</span><span class="o">.</span><span class="n">length</span><span class="p">):</span>
  278. <span class="n">exemplars</span><span class="p">[</span><span class="n">header</span><span class="o">.</span><span class="n">locus</span><span class="p">]</span> <span class="o">=</span> <span class="n">record</span>
  279. <span class="c"># Create an entry for the record of it didn&#39;t exist</span>
  280. <span class="k">else</span><span class="p">:</span>
  281. <span class="n">exemplars</span><span class="p">[</span><span class="n">header</span><span class="o">.</span><span class="n">locus</span><span class="p">]</span> <span class="o">=</span> <span class="n">record</span>
  282. <span class="c"># Write out the exemplars in the order of the loci</span>
  283. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="s">&#39;w&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  284. <span class="n">SeqIO</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">exemplars</span><span class="o">.</span><span class="n">itervalues</span><span class="p">(),</span> <span class="n">f</span><span class="p">,</span> <span class="s">&quot;fasta&quot;</span><span class="p">)</span>
  285. </div>
  286. <span class="n">BlastHit</span> <span class="o">=</span> <span class="n">namedtuple</span><span class="p">(</span><span class="s">&#39;BlastHit&#39;</span><span class="p">,</span> <span class="s">&quot;query title definition id evalue rank orient mask score bitscore length percent&quot;</span><span class="p">)</span>
  287. <span class="sd">&quot;&quot;&quot;</span>
  288. <span class="sd">A namedtuple for storing several fields of a Blast hit.</span>
  289. <span class="sd">&quot;&quot;&quot;</span>
  290. <div class="viewcode-block" id="blast_hits"><a class="viewcode-back" href="../../workflows.html#biolite.workflows.blast_hits">[docs]</a><span class="k">def</span> <span class="nf">blast_hits</span><span class="p">(</span><span class="n">xml_path</span><span class="p">,</span> <span class="n">nlimit</span><span class="o">=</span><span class="bp">None</span><span class="p">):</span>
  291. <span class="sd">&quot;&quot;&quot;</span>
  292. <span class="sd"> Reads an XML formatted BLAST report, and yields one named tuple per</span>
  293. <span class="sd"> alignment, i.e. per hit between a query and a subject. Each named tuple</span>
  294. <span class="sd"> has the following elements:</span>
  295. <span class="sd"> </span>
  296. <span class="sd"> query title definition id evalue rank orient mask score bitscore length percent</span>
  297. <span class="sd"> where:</span>
  298. <span class="sd"> * orient is 1 if query and subject are in the same direction, 2 if they are</span>
  299. <span class="sd"> in the opposite direction, and 0 if direction is inconsistent across hsp&#39;s</span>
  300. <span class="sd"> * evalue is the minimum evalue across hsp&#39;s</span>
  301. <span class="sd"> * score, bitcore and length are maximal across hsp&#39;s</span>
  302. <span class="sd"> &quot;&quot;&quot;</span>
  303. <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">xml_path</span><span class="p">,</span> <span class="s">&#39;r&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
  304. <span class="c"># Loop over the blast records.</span>
  305. <span class="k">for</span> <span class="n">entry</span> <span class="ow">in</span> <span class="n">NCBIXML</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">):</span>
  306. <span class="c"># Limit the number of alignments per query, if nlimit is set.</span>
  307. <span class="k">if</span> <span class="n">nlimit</span><span class="p">:</span>
  308. <span class="n">alignments</span> <span class="o">=</span> <span class="n">entry</span><span class="o">.</span><span class="n">alignments</span><span class="p">[:</span><span class="n">nlimit</span><span class="p">]</span>
  309. <span class="k">else</span><span class="p">:</span>
  310. <span class="n">alignments</span> <span class="o">=</span> <span class="n">entry</span><span class="o">.</span><span class="n">alignments</span>
  311. <span class="n">rank</span> <span class="o">=</span> <span class="mi">0</span>
  312. <span class="c"># Loop over the alignments.</span>
  313. <span class="k">for</span> <span class="n">alignment</span> <span class="ow">in</span> <span class="n">alignments</span><span class="p">:</span>
  314. <span class="n">rank</span> <span class="o">=</span> <span class="n">rank</span> <span class="o">+</span> <span class="mi">1</span>
  315. <span class="n">evalue</span> <span class="o">=</span> <span class="n">sys</span><span class="o">.</span><span class="n">maxint</span>
  316. <span class="n">score</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
  317. <span class="n">bitscore</span> <span class="o">=</span> <span class="o">-</span><span class="mf">1.0</span>
  318. <span class="n">length</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
  319. <span class="n">orient</span> <span class="o">=</span> <span class="mi">0</span>
  320. <span class="c"># Initialize a string to hold the sites that are within hsp&#39;s</span>
  321. <span class="c"># String has the same length as the query Sites with 0 are not</span>
  322. <span class="c"># in any hsp, sites that are nonzero are in hsps</span>
  323. <span class="n">mask</span> <span class="o">=</span> <span class="p">[</span><span class="s">&#39;0&#39;</span><span class="p">]</span> <span class="o">*</span> <span class="n">entry</span><span class="o">.</span><span class="n">query_length</span>
  324. <span class="c"># Loop over the hsp&#39;s to get the lowest evalue and the</span>
  325. <span class="c"># orientation.</span>
  326. <span class="k">for</span> <span class="n">hsp</span> <span class="ow">in</span> <span class="n">alignment</span><span class="o">.</span><span class="n">hsps</span><span class="p">:</span>
  327. <span class="c"># Check if the orient is the same by comparing</span>
  328. <span class="c"># orientation of query and subject.</span>
  329. <span class="k">if</span> <span class="p">(</span><span class="n">hsp</span><span class="o">.</span><span class="n">query_start</span> <span class="o">&lt;</span> <span class="n">hsp</span><span class="o">.</span><span class="n">query_end</span><span class="p">)</span> <span class="o">==</span> \
  330. <span class="p">(</span><span class="n">hsp</span><span class="o">.</span><span class="n">sbjct_start</span> <span class="o">&lt;</span> <span class="n">hsp</span><span class="o">.</span><span class="n">sbjct_end</span><span class="p">):</span>
  331. <span class="n">orient</span> <span class="o">|=</span> <span class="mi">1</span>
  332. <span class="c"># Or different.</span>
  333. <span class="k">else</span><span class="p">:</span>
  334. <span class="n">orient</span> <span class="o">|=</span> <span class="mi">2</span>
  335. <span class="c"># Record the lowest e-value.</span>
  336. <span class="n">evalue</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="n">evalue</span><span class="p">,</span> <span class="n">hsp</span><span class="o">.</span><span class="n">expect</span><span class="p">)</span>
  337. <span class="c"># Create a 2 element list with the starting and end point</span>
  338. <span class="c"># of the hsp, adjusted to be 0 offset rather than 1 offset</span>
  339. <span class="n">ends</span> <span class="o">=</span> <span class="p">[</span><span class="n">hsp</span><span class="o">.</span><span class="n">query_start</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">hsp</span><span class="o">.</span><span class="n">query_end</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
  340. <span class="c"># Sort the list to orient it so that start is less then</span>
  341. <span class="c"># finish, then loop over the string and set the sites in</span>
  342. <span class="c"># the hsp range to &#39;1&#39;</span>
  343. <span class="n">ends</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
  344. <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">ends</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">ends</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">):</span>
  345. <span class="n">mask</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="s">&#39;1&#39;</span>
  346. <span class="c"># Convert list to string</span>
  347. <span class="n">mask_string</span> <span class="o">=</span> <span class="s">&#39;&#39;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span>
  348. <span class="c"># Record the highest score.</span>