/exomate/views/recessive.py
Python | 180 lines | 97 code | 44 blank | 39 comment | 21 complexity | 9069079f5d8582b95a52b5ecb7d0320e MD5 | raw file
- """
- The recessive page
- """
- from exomate.database import get_session
- from flask import render_template, request
- from exomate.application import app
- from exomate.models import Sample, Disease, KnownSource, KnownVariant, Call, Annotation, Variant
- from exomate.views.common import get_incomplete_annotation_warning, parse_mutation_query_parameters, Row
- from exomate.variants import VARIANT_OUT_NAMES, EXON_VARIANTS
- from collections import defaultdict, namedtuple
- from sqlalchemy import not_ , or_
- from sqlalchemy.orm import aliased
- session = get_session()
- #@shorten('recessive')
- @app.route('/recessive', methods=['GET', 'POST'])
- def recessive():
- if request.method != 'POST':
- samples = session.query(Sample).join(Disease).order_by(Disease.name, Sample.accession).all()
- warning = get_incomplete_annotation_warning()
- known_sources = session.query(KnownSource).all()
- return render_template('recessive-query.html',
- samples=samples,
- known_sources=known_sources,
- warning=warning,
- variant=VARIANT_OUT_NAMES,
- exon_variants=EXON_VARIANTS)
- q = parse_mutation_query_parameters(request.form)
- q.known_sources = session.query(KnownSource).filter(KnownSource.id.in_(q.ks)).all() if q.ks else []
- affected_sample = session.query(Sample).filter(Sample.accession == q.affected_sample_name).one()
- if q.ancestor1 == "father":
- anc1_samples = affected_sample.patient.father.samples
- else:
- anc1_samples = [session.query(Sample).filter(Sample.accession == q.ancestor1).one()]
- if q.ancestor2 == "mother":
- anc2_samples = affected_sample.patient.mother.samples
- else:
- anc2_samples = [session.query(Sample).filter(Sample.accession == q.ancestor2).one()]
- # check if the sample has parents if recessive mutation is selected
- # no_parents = [s.accession for s in affected_samples if s.patient.father == None or s.patient.mother == None]
- # if len(no_parents) > 0:
- # # error out if parent informations are missing
- # return render_template('error.html', message=" ".join(["The following samples have incomplete parent information:"] + no_parents))
- rows = defaultdict(Row)
- if q.query_mode == 'homozygous':
- mutations = get_homozygous(affected_sample, anc1_samples, anc2_samples, q)
- else:
- mutations = get_compound_heterozygous(affected_sample, anc1_samples, anc2_samples, q)
- # grouping (call/annotation)s
- CallAnnotation = namedtuple("CallAnnotation", "call annotation bed")
- for call, annotation in mutations:
- gene = annotation.symbol
- unit = call.sample.units[0]
- # the key to group the entries
- key = (gene, annotation.variant.id)
- rows[key].add(CallAnnotation(call, annotation, None))
- # filter records with too few common genes and sort by gene name
- sorted_rows = sorted([row for row in rows.values()], key=lambda row: row.gene.name)
- return render_template('mutations-result.html', rows=sorted_rows, query=q, common_genes=[])
- def get_homozygous(index_sample, anc1_samples, anc2_samples, q):
- """Return a list of calls which are homozygous in index and heterozygous in at least one parent (to account for new mutations on
- the other allele), and which are not homozygous in healthy samples."""
- assert len(anc1_samples) > 0
- assert len(anc2_samples) > 0
- anc1_sample = sorted(anc1_samples, key=lambda s: s.id, reverse=True)[0]
- anc2_sample = sorted(anc2_samples, key=lambda s: s.id, reverse=True)[0]
- index_homo = session.query(Call, Annotation).join(Variant).join(Annotation).join(Sample)\
- .filter(Sample.accession==index_sample.accession)\
- .filter(not_(Call.is_heterozygous))\
- .filter(Annotation.consequence.op('&')(q.consequences_bits)>0)\
- .filter(Call.qual > q.affected_qual)
- if q.known_sources:
- index_homo = index_homo.outerjoin(KnownVariant)\
- .filter(or_(\
- KnownVariant.variant_id == None,
- KnownVariant.allelefreq <= q.dbsnp_allelefreq,
- KnownVariant.allelefreq == None))
- AncestorCall = aliased(Call)
- x = index_homo.join(AncestorCall, Call.variant_id == AncestorCall.variant_id)\
- .filter((AncestorCall.sample == anc1_sample) | (AncestorCall.sample == anc2_sample))\
- .filter(AncestorCall.is_heterozygous == True)
- healthy_homo = session.query(Call).join(Sample).join(Disease)\
- .filter((Disease.name=='healthy') | (Call.sample == anc1_sample) | (Call.sample == anc2_sample))\
- .filter(not_(Call.is_heterozygous)).subquery()
- x = x.outerjoin(healthy_homo, healthy_homo.c.variant_id == Call.variant_id).filter(healthy_homo.c.variant_id == None)
- return x.all()
- def get_compound_heterozygous(index_sample, anc1_samples, anc2_samples, q):
- """Return a list of calls which are homozygous in index and heterozygous in at least one parent (to account for new mutations on
- the other allele), and which are not homozygous in healthy samples."""
- assert len(anc1_samples) > 0
- assert len(anc2_samples) > 0
- anc1_sample = sorted(anc1_samples, key=lambda s: s.id, reverse=True)[0]
- anc2_sample = sorted(anc2_samples, key=lambda s: s.id, reverse=True)[0]
- # interesting = [ ]
- ## interesting = [ func.abs(Annotation.splice_dist) <= q.splice_dist ]
- ## if q.show_coding_variants:
- ## # coding, but not synonymous
- ## interesting.append(and_(Annotation.region == VARIANT_REGIONS.CODING, Annotation.type != VARIANT_TYPES.SYNONYMOUS))
- ## if q.show_intron_variants:
- ## interesting.append(Annotation.region == VARIANT_REGIONS.INTRON)
- ## if q.show_utr_variants:
- ## interesting.append(Annotation.region == VARIANT_REGIONS.UTR)
- index_hetero = session.query(Call, Annotation)\
- .join(Variant)\
- .join(Annotation)\
- .join(Sample)\
- .filter(Sample.accession==index_sample.accession)\
- .filter(Call.is_heterozygous == True)\
- .filter(Annotation.consequence.op('&')(q.consequences_bits)>0)\
- .filter((Annotation.eur_maf == None) | (Annotation.eur_maf < q.allelecount_inhouse))\
- .filter(Call.qual > q.affected_qual)
- if q.known_sources:
- index_hetero = index_hetero.outerjoin(KnownVariant)\
- .filter(or_(KnownVariant.variant_id == None, KnownVariant.allelefreq <= q.dbsnp_allelefreq, KnownVariant.allelefreq == None))
- # fetch alle calls / annotations / gene occuring in index and Ancestor1
- Ancestor1Call = aliased(Call)
- x = index_hetero.join(Ancestor1Call, Call.variant_id == Ancestor1Call.variant_id)\
- .filter(Ancestor1Call.sample==anc1_sample)\
- .filter(Ancestor1Call.is_heterozygous == True).all()
- # fetch all calls / annotations / gene occuring in index and Ancestor2
- Ancestor2Call = aliased(Call)
- y = index_hetero.join(Ancestor2Call, Call.variant_id == Ancestor2Call.variant_id).filter(Ancestor2Call.sample==anc2_sample).filter(Ancestor2Call.is_heterozygous == True).all()
- # filter these calls where variant of father = variant of Ancestor. These mutations should not be relevant because in this situation
- # index
- # ----A------
- # -------B---
- #
- # Ancestor1
- # ----A------
- # -------B---
- # Ancestor2
- # ----A------
- # -----------
- # the Ancestor1 would also have the disease
- forbidden_variants = set(call.variant_id for call, annotation in x) & set(call.variant_id for call, annotation in y)
- valid_genes = set(annotation.symbol for call, annotation in x if call.variant_id not in forbidden_variants) & set(annotation.symbol for call, annotation in y if call.variant_id not in forbidden_variants)
- return [(call, annotation) for call, annotation in x+y if annotation.symbol in valid_genes]