/nextgen/bcbio/pipeline/lane.py

http://github.com/chapmanb/bcbb
Python | 92 lines | 73 code | 6 blank | 13 comment | 21 complexity | c440772878d0fb3714d8bd8a0cca1ed0 MD5 | raw file
  1. """Top level driver functionality for processing a sequencing lane.
  2. """
  3. import os
  4. import copy
  5. from bcbio.log import logger
  6. from bcbio import utils
  7. from bcbio.pipeline.fastq import get_fastq_files
  8. from bcbio.pipeline.demultiplex import split_by_barcode
  9. from bcbio.pipeline.alignment import align_to_sort_bam
  10. from bcbio.ngsalign.split import split_read_files
  11. from bcbio.bam.trim import brun_trim_fastq
  12. def _prep_fastq_files(item, bc_files, dirs, config):
  13. """Potentially prepare input FASTQ files for processing.
  14. """
  15. fastq1, fastq2 = bc_files[item["barcode_id"]]
  16. split_size = config.get("distributed", {}).get("align_split_size",
  17. config["algorithm"].get("align_split_size", None))
  18. if split_size:
  19. split_dir = utils.safe_makedir(os.path.join(dirs["work"], "align_splitprep", item["description"]))
  20. return split_read_files(fastq1, fastq2, split_size, split_dir, config)
  21. else:
  22. return [[fastq1, fastq2, None]]
  23. def process_lane(lane_items, fc_name, fc_date, dirs, config):
  24. """Prepare lanes, potentially splitting based on barcodes.
  25. """
  26. lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
  27. logger.info("Demulitplexing %s" % lane_name)
  28. full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"],
  29. lane_items[0], fc_name,
  30. config=_update_config_w_custom(config, lane_items[0]))
  31. bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
  32. lane_name, dirs, config)
  33. out = []
  34. for item in lane_items:
  35. config = _update_config_w_custom(config, item)
  36. # Can specify all barcodes but might not have actual sequences
  37. # Would be nice to have a good way to check this is okay here.
  38. if bc_files.has_key(item["barcode_id"]):
  39. for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config):
  40. cur_lane_name = lane_name
  41. cur_lane_desc = item["description"]
  42. if item.get("name", "") and config["algorithm"].get("include_short_name", True):
  43. cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
  44. if item["barcode_id"] is not None:
  45. cur_lane_name += "_%s" % (item["barcode_id"])
  46. if lane_ext is not None:
  47. cur_lane_name += "_s{0}".format(lane_ext)
  48. if config["algorithm"].get("trim_reads", False):
  49. trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None],
  50. dirs, config)
  51. fastq1 = trim_info[0]
  52. if fastq2 is not None:
  53. fastq2 = trim_info[1]
  54. out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
  55. dirs, config))
  56. return out
  57. def process_alignment(fastq1, fastq2, info, lane_name, lane_desc,
  58. dirs, config):
  59. """Do an alignment of fastq files, preparing a sorted BAM output file.
  60. """
  61. aligner = config["algorithm"].get("aligner", None)
  62. out_bam = ""
  63. if os.path.exists(fastq1) and aligner:
  64. logger.info("Aligning lane %s with %s aligner" % (lane_name, aligner))
  65. out_bam = align_to_sort_bam(fastq1, fastq2, info["genome_build"], aligner,
  66. lane_name, lane_desc, dirs, config)
  67. elif os.path.exists(fastq1) and fastq1.endswith(".bam"):
  68. out_bam = fastq1
  69. return [{"fastq": [fastq1, fastq2], "out_bam": out_bam, "info": info,
  70. "config": config}]
  71. def _update_config_w_custom(config, lane_info):
  72. """Update the configuration for this lane if a custom analysis is specified.
  73. """
  74. name_remaps = {"variant": ["SNP calling", "variant"],
  75. "SNP calling": ["SNP calling", "variant"]}
  76. config = copy.deepcopy(config)
  77. base_name = lane_info.get("analysis")
  78. for analysis_type in name_remaps.get(base_name, [base_name]):
  79. custom = config["custom_algorithms"].get(analysis_type, None)
  80. if custom:
  81. for key, val in custom.iteritems():
  82. config["algorithm"][key] = val
  83. # apply any algorithm details specified with the lane
  84. for key, val in lane_info.get("algorithm", {}).iteritems():
  85. config["algorithm"][key] = val
  86. return config